sched: fix bug in balance_tasks()

author Peter Williams <pwil3058@bigpond.net.au>

Thu, 9 Aug 2007 09:16:46 +0000 (11:16 +0200)

committer Ingo Molnar <mingo@elte.hu>

Thu, 9 Aug 2007 09:16:46 +0000 (11:16 +0200)
author Peter Williams <pwil3058@bigpond.net.au>
Thu, 9 Aug 2007 09:16:46 +0000 (11:16 +0200)
committer Ingo Molnar <mingo@elte.hu>
Thu, 9 Aug 2007 09:16:46 +0000 (11:16 +0200)
diff --git a/include/linux/sched.h b/include/linux/sched.h

index 24bce423f10d74b84f1306a0de6c1a1412669a45..513b81c60e8737e9e6a1dedc60d7812efb0f7dd8 100644 (file)
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -870,7 +870,7 @@ struct sched_class {
                         struct rq *busiest,
                         unsigned long max_nr_move, unsigned long max_load_move,
                         struct sched_domain *sd, enum cpu_idle_type idle,
-                       int *all_pinned);
+                       int *all_pinned, int *this_best_prio);
  
         void (*set_curr_task) (struct rq *rq);
         void (*task_tick) (struct rq *rq, struct task_struct *p);
diff --git a/kernel/sched.c b/kernel/sched.c

index 85b93118d244106bf1e74bd30064f9a15a9ab750..1fa07c14624ee9c04e195b907f0f3de29afa7973 100644 (file)
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -745,8 +745,7 @@ static int balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
                       unsigned long max_nr_move, unsigned long max_load_move,
                       struct sched_domain *sd, enum cpu_idle_type idle,
                       int *all_pinned, unsigned long *load_moved,
-                     int this_best_prio, int best_prio, int best_prio_seen,
-                     struct rq_iterator *iterator);
+                     int *this_best_prio, struct rq_iterator *iterator);
  
  #include "sched_stats.h"
  #include "sched_rt.c"
@@ -2165,8 +2164,7 @@ static int balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
                       unsigned long max_nr_move, unsigned long max_load_move,
                       struct sched_domain *sd, enum cpu_idle_type idle,
                       int *all_pinned, unsigned long *load_moved,
-                     int this_best_prio, int best_prio, int best_prio_seen,
-                     struct rq_iterator *iterator)
+                     int *this_best_prio, struct rq_iterator *iterator)
  {
         int pulled = 0, pinned = 0, skip_for_load;
         struct task_struct *p;
@@ -2191,12 +2189,8 @@ next:
          */
         skip_for_load = (p->se.load.weight >> 1) > rem_load_move +
                                                          SCHED_LOAD_SCALE_FUZZ;
-       if (skip_for_load && p->prio < this_best_prio)
-               skip_for_load = !best_prio_seen && p->prio == best_prio;
-       if (skip_for_load ||
+       if ((skip_for_load && p->prio >= *this_best_prio) ||
             !can_migrate_task(p, busiest, this_cpu, sd, idle, &pinned)) {
-
-               best_prio_seen |= p->prio == best_prio;
                 p = iterator->next(iterator->arg);
                 goto next;
         }
@@ -2210,8 +2204,8 @@ next:
          * and the prescribed amount of weighted load.
          */
         if (pulled < max_nr_move && rem_load_move > 0) {
-               if (p->prio < this_best_prio)
-                       this_best_prio = p->prio;
+               if (p->prio < *this_best_prio)
+                       *this_best_prio = p->prio;
                 p = iterator->next(iterator->arg);
                 goto next;
         }
@@ -2243,12 +2237,13 @@ static int move_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
  {
         struct sched_class *class = sched_class_highest;
         unsigned long total_load_moved = 0;
+       int this_best_prio = this_rq->curr->prio;
  
         do {
                 total_load_moved +=
                         class->load_balance(this_rq, this_cpu, busiest,
                                 ULONG_MAX, max_load_move - total_load_moved,
-                               sd, idle, all_pinned);
+                               sd, idle, all_pinned, &this_best_prio);
                 class = class->next;
         } while (class && max_load_move > total_load_moved);
  
@@ -2266,10 +2261,12 @@ static int move_one_task(struct rq *this_rq, int this_cpu, struct rq *busiest,
                          struct sched_domain *sd, enum cpu_idle_type idle)
  {
         struct sched_class *class;
+       int this_best_prio = MAX_PRIO;
  
         for (class = sched_class_highest; class; class = class->next)
                 if (class->load_balance(this_rq, this_cpu, busiest,
-                                       1, ULONG_MAX, sd, idle, NULL))
+                                       1, ULONG_MAX, sd, idle, NULL,
+                                       &this_best_prio))
                         return 1;
  
         return 0;
@@ -3184,8 +3181,7 @@ static int balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
                       unsigned long max_nr_move, unsigned long max_load_move,
                       struct sched_domain *sd, enum cpu_idle_type idle,
                       int *all_pinned, unsigned long *load_moved,
-                     int this_best_prio, int best_prio, int best_prio_seen,
-                     struct rq_iterator *iterator)
+                     int *this_best_prio, struct rq_iterator *iterator)
  {
         *load_moved = 0;
  
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c

index 16511e9e552877149e3af64563dbeba73421de9e..923bed0b0c426fe6d71bee892755aca355936a20 100644 (file)
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -929,6 +929,7 @@ static struct task_struct *load_balance_next_fair(void *arg)
         return __load_balance_iterator(cfs_rq, cfs_rq->rb_load_balance_curr);
  }
  
+#ifdef CONFIG_FAIR_GROUP_SCHED
  static int cfs_rq_best_prio(struct cfs_rq *cfs_rq)
  {
         struct sched_entity *curr;
@@ -942,12 +943,13 @@ static int cfs_rq_best_prio(struct cfs_rq *cfs_rq)
  
         return p->prio;
  }
+#endif
  
  static unsigned long
  load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
-                       unsigned long max_nr_move, unsigned long max_load_move,
-                       struct sched_domain *sd, enum cpu_idle_type idle,
-                       int *all_pinned)
+                 unsigned long max_nr_move, unsigned long max_load_move,
+                 struct sched_domain *sd, enum cpu_idle_type idle,
+                 int *all_pinned, int *this_best_prio)
  {
         struct cfs_rq *busy_cfs_rq;
         unsigned long load_moved, total_nr_moved = 0, nr_moved;
@@ -958,10 +960,10 @@ load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
         cfs_rq_iterator.next = load_balance_next_fair;
  
         for_each_leaf_cfs_rq(busiest, busy_cfs_rq) {
+#ifdef CONFIG_FAIR_GROUP_SCHED
                 struct cfs_rq *this_cfs_rq;
-               long imbalance;
+               long imbalances;
                 unsigned long maxload;
-               int this_best_prio, best_prio, best_prio_seen = 0;
  
                 this_cfs_rq = cpu_cfs_rq(busy_cfs_rq, this_cpu);
  
@@ -975,27 +977,17 @@ load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
                 imbalance /= 2;
                 maxload = min(rem_load_move, imbalance);
  
-               this_best_prio = cfs_rq_best_prio(this_cfs_rq);
-               best_prio = cfs_rq_best_prio(busy_cfs_rq);
-
-               /*
-                * Enable handling of the case where there is more than one task
-                * with the best priority. If the current running task is one
-                * of those with prio==best_prio we know it won't be moved
-                * and therefore it's safe to override the skip (based on load)
-                * of any task we find with that prio.
-                */
-               if (cfs_rq_curr(busy_cfs_rq) == &busiest->curr->se)
-                       best_prio_seen = 1;
-
+               *this_best_prio = cfs_rq_best_prio(this_cfs_rq);
+#else
+#define maxload rem_load_move
+#endif
                 /* pass busy_cfs_rq argument into
                  * load_balance_[start|next]_fair iterators
                  */
                 cfs_rq_iterator.arg = busy_cfs_rq;
                 nr_moved = balance_tasks(this_rq, this_cpu, busiest,
                                 max_nr_move, maxload, sd, idle, all_pinned,
-                               &load_moved, this_best_prio, best_prio,
-                               best_prio_seen, &cfs_rq_iterator);
+                               &load_moved, this_best_prio, &cfs_rq_iterator);
  
                 total_nr_moved += nr_moved;
                 max_nr_move -= nr_moved;
diff --git a/kernel/sched_idletask.c b/kernel/sched_idletask.c

index 1d8d9e13d95013e46df2bbd02eba7214337e93df..dc9e1068911fd822c10038cd044cf89b8e1f0f80 100644 (file)
--- a/kernel/sched_idletask.c
+++ b/kernel/sched_idletask.c
@@ -41,7 +41,7 @@ static unsigned long
  load_balance_idle(struct rq *this_rq, int this_cpu, struct rq *busiest,
                         unsigned long max_nr_move, unsigned long max_load_move,
                         struct sched_domain *sd, enum cpu_idle_type idle,
-                       int *all_pinned)
+                       int *all_pinned, int *this_best_prio)
  {
         return 0;
  }
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c

index 2b0626a43cb8044b43c4425050427f1e681d0fdc..5b559e8c8aa6559a40c276dd268a5dcd35975b15 100644 (file)
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -176,26 +176,12 @@ static unsigned long
  load_balance_rt(struct rq *this_rq, int this_cpu, struct rq *busiest,
                         unsigned long max_nr_move, unsigned long max_load_move,
                         struct sched_domain *sd, enum cpu_idle_type idle,
-                       int *all_pinned)
+                       int *all_pinned, int *this_best_prio)
  {
-       int this_best_prio, best_prio, best_prio_seen = 0;
         int nr_moved;
         struct rq_iterator rt_rq_iterator;
         unsigned long load_moved;
  
-       best_prio = sched_find_first_bit(busiest->rt.active.bitmap);
-       this_best_prio = sched_find_first_bit(this_rq->rt.active.bitmap);
-
-       /*
-        * Enable handling of the case where there is more than one task
-        * with the best priority.   If the current running task is one
-        * of those with prio==best_prio we know it won't be moved
-        * and therefore it's safe to override the skip (based on load)
-        * of any task we find with that prio.
-        */
-       if (busiest->curr->prio == best_prio)
-               best_prio_seen = 1;
-
         rt_rq_iterator.start = load_balance_start_rt;
         rt_rq_iterator.next = load_balance_next_rt;
         /* pass 'busiest' rq argument into
@@ -205,8 +191,7 @@ load_balance_rt(struct rq *this_rq, int this_cpu, struct rq *busiest,
  
         nr_moved = balance_tasks(this_rq, this_cpu, busiest, max_nr_move,
                         max_load_move, sd, idle, all_pinned, &load_moved,
-                       this_best_prio, best_prio, best_prio_seen,
-                       &rt_rq_iterator);
+                       this_best_prio, &rt_rq_iterator);
  
         return load_moved;
  }
author	Peter Williams <pwil3058@bigpond.net.au>
	Thu, 9 Aug 2007 09:16:46 +0000 (11:16 +0200)
committer	Ingo Molnar <mingo@elte.hu>
	Thu, 9 Aug 2007 09:16:46 +0000 (11:16 +0200)
include/linux/sched.h		patch \| blob \| history
kernel/sched.c		patch \| blob \| history
kernel/sched_fair.c		patch \| blob \| history
kernel/sched_idletask.c		patch \| blob \| history
kernel/sched_rt.c		patch \| blob \| history