]> pilppa.org Git - linux-2.6-omap-h63xx.git/blobdiff - kernel/sched_rt.c
sched: RT-balance, only adjust overload state when changing
[linux-2.6-omap-h63xx.git] / kernel / sched_rt.c
index deff0c77d7059909f1744629e98980e5662e2768..a386758ffebb14c701a3cca37cdb9962162c71c9 100644 (file)
@@ -5,22 +5,14 @@
 
 #ifdef CONFIG_SMP
 
-/*
- * The "RT overload" flag: it gets set if a CPU has more than
- * one runnable RT task.
- */
-static cpumask_t rt_overload_mask;
-static atomic_t rto_count;
-
-static inline int rt_overloaded(void)
+static inline int rt_overloaded(struct rq *rq)
 {
-       return atomic_read(&rto_count);
+       return atomic_read(&rq->rd->rto_count);
 }
 
 static inline void rt_set_overload(struct rq *rq)
 {
-       rq->rt.overloaded = 1;
-       cpu_set(rq->cpu, rt_overload_mask);
+       cpu_set(rq->cpu, rq->rd->rto_mask);
        /*
         * Make sure the mask is visible before we set
         * the overload count. That is checked to determine
@@ -29,23 +21,27 @@ static inline void rt_set_overload(struct rq *rq)
         * updated yet.
         */
        wmb();
-       atomic_inc(&rto_count);
+       atomic_inc(&rq->rd->rto_count);
 }
 
 static inline void rt_clear_overload(struct rq *rq)
 {
        /* the order here really doesn't matter */
-       atomic_dec(&rto_count);
-       cpu_clear(rq->cpu, rt_overload_mask);
-       rq->rt.overloaded = 0;
+       atomic_dec(&rq->rd->rto_count);
+       cpu_clear(rq->cpu, rq->rd->rto_mask);
 }
 
 static void update_rt_migration(struct rq *rq)
 {
-       if (rq->rt.rt_nr_migratory && (rq->rt.rt_nr_running > 1))
-               rt_set_overload(rq);
-       else
+       if (rq->rt.rt_nr_migratory && (rq->rt.rt_nr_running > 1)) {
+               if (!rq->rt.overloaded) {
+                       rt_set_overload(rq);
+                       rq->rt.overloaded = 1;
+               }
+       } else if (rq->rt.overloaded) {
                rt_clear_overload(rq);
+               rq->rt.overloaded = 0;
+       }
 }
 #endif /* CONFIG_SMP */
 
@@ -253,8 +249,6 @@ static struct task_struct *pick_next_highest_task_rt(struct rq *rq, int cpu)
        struct list_head *queue;
        int idx;
 
-       assert_spin_locked(&rq->lock);
-
        if (likely(rq->rt.rt_nr_running < 2))
                return NULL;
 
@@ -308,7 +302,7 @@ static int find_lowest_cpus(struct task_struct *task, cpumask_t *lowest_mask)
        int       count       = 0;
        int       cpu;
 
-       cpus_and(*lowest_mask, cpu_online_map, task->cpus_allowed);
+       cpus_and(*lowest_mask, task_rq(task)->rd->online, task->cpus_allowed);
 
        /*
         * Scan each rq for the lowest prio.
@@ -500,8 +494,6 @@ static int push_rt_task(struct rq *rq)
        int ret = 0;
        int paranoid = RT_MAX_TRIES;
 
-       assert_spin_locked(&rq->lock);
-
        if (!rq->rt.overloaded)
                return 0;
 
@@ -546,8 +538,6 @@ static int push_rt_task(struct rq *rq)
                goto out;
        }
 
-       assert_spin_locked(&lowest_rq->lock);
-
        deactivate_task(rq, next_task, 0);
        set_task_cpu(next_task, lowest_rq->cpu);
        activate_task(lowest_rq, next_task, 0);
@@ -582,27 +572,16 @@ static void push_rt_tasks(struct rq *rq)
 
 static int pull_rt_task(struct rq *this_rq)
 {
-       struct task_struct *next;
-       struct task_struct *p;
+       int this_cpu = this_rq->cpu, ret = 0, cpu;
+       struct task_struct *p, *next;
        struct rq *src_rq;
-       int this_cpu = this_rq->cpu;
-       int cpu;
-       int ret = 0;
-
-       assert_spin_locked(&this_rq->lock);
 
-       /*
-        * If cpusets are used, and we have overlapping
-        * run queue cpusets, then this algorithm may not catch all.
-        * This is just the price you pay on trying to keep
-        * dirtying caches down on large SMP machines.
-        */
-       if (likely(!rt_overloaded()))
+       if (likely(!rt_overloaded(this_rq)))
                return 0;
 
        next = pick_next_task_rt(this_rq);
 
-       for_each_cpu_mask(cpu, rt_overload_mask) {
+       for_each_cpu_mask(cpu, this_rq->rd->rto_mask) {
                if (this_cpu == cpu)
                        continue;
 
@@ -616,23 +595,25 @@ static int pull_rt_task(struct rq *this_rq)
                        if (double_lock_balance(this_rq, src_rq)) {
                                /* unlocked our runqueue lock */
                                struct task_struct *old_next = next;
+
                                next = pick_next_task_rt(this_rq);
                                if (next != old_next)
                                        ret = 1;
                        }
-                       if (likely(src_rq->rt.rt_nr_running <= 1))
+                       if (likely(src_rq->rt.rt_nr_running <= 1)) {
                                /*
                                 * Small chance that this_rq->curr changed
                                 * but it's really harmless here.
                                 */
                                rt_clear_overload(this_rq);
-                       else
+                       } else {
                                /*
                                 * Heh, the src_rq is now overloaded, since
                                 * we already have the src_rq lock, go straight
                                 * to pulling tasks from it.
                                 */
                                goto try_pulling;
+                       }
                        spin_unlock(&src_rq->lock);
                        continue;
                }
@@ -646,6 +627,7 @@ static int pull_rt_task(struct rq *this_rq)
                 */
                if (double_lock_balance(this_rq, src_rq)) {
                        struct task_struct *old_next = next;
+
                        next = pick_next_task_rt(this_rq);
                        if (next != old_next)
                                ret = 1;
@@ -682,7 +664,7 @@ static int pull_rt_task(struct rq *this_rq)
                         */
                        if (p->prio < src_rq->curr->prio ||
                            (next && next->prio < src_rq->curr->prio))
-                               goto bail;
+                               goto out;
 
                        ret = 1;
 
@@ -694,9 +676,7 @@ static int pull_rt_task(struct rq *this_rq)
                         * case there's an even higher prio task
                         * in another runqueue. (low likelyhood
                         * but possible)
-                        */
-
-                       /*
+                        *
                         * Update next so that we won't pick a task
                         * on another cpu with a priority lower (or equal)
                         * than the one we just picked.
@@ -704,23 +684,21 @@ static int pull_rt_task(struct rq *this_rq)
                        next = p;
 
                }
bail:
out:
                spin_unlock(&src_rq->lock);
        }
 
        return ret;
 }
 
-static void schedule_balance_rt(struct rq *rq,
-                               struct task_struct *prev)
+static void pre_schedule_rt(struct rq *rq, struct task_struct *prev)
 {
        /* Try to pull RT tasks here if we lower this rq's prio */
-       if (unlikely(rt_task(prev)) &&
-           rq->rt.highest_prio > prev->prio)
+       if (unlikely(rt_task(prev)) && rq->rt.highest_prio > prev->prio)
                pull_rt_task(rq);
 }
 
-static void schedule_tail_balance_rt(struct rq *rq)
+static void post_schedule_rt(struct rq *rq)
 {
        /*
         * If we have more than one rt_task queued, then
@@ -737,10 +715,9 @@ static void schedule_tail_balance_rt(struct rq *rq)
 }
 
 
-static void wakeup_balance_rt(struct rq *rq, struct task_struct *p)
+static void task_wake_up_rt(struct rq *rq, struct task_struct *p)
 {
-       if (unlikely(rt_task(p)) &&
-           !task_running(rq, p) &&
+       if (!task_running(rq, p) &&
            (p->prio >= rq->rt.highest_prio) &&
            rq->rt.overloaded)
                push_rt_tasks(rq);
@@ -791,11 +768,105 @@ static void set_cpus_allowed_rt(struct task_struct *p, cpumask_t *new_mask)
        p->nr_cpus_allowed = weight;
 }
 
-#else /* CONFIG_SMP */
-# define schedule_tail_balance_rt(rq)  do { } while (0)
-# define schedule_balance_rt(rq, prev) do { } while (0)
-# define wakeup_balance_rt(rq, p)      do { } while (0)
+/* Assumes rq->lock is held */
+static void join_domain_rt(struct rq *rq)
+{
+       if (rq->rt.overloaded)
+               rt_set_overload(rq);
+}
+
+/* Assumes rq->lock is held */
+static void leave_domain_rt(struct rq *rq)
+{
+       if (rq->rt.overloaded)
+               rt_clear_overload(rq);
+}
+
+/*
+ * When switch from the rt queue, we bring ourselves to a position
+ * that we might want to pull RT tasks from other runqueues.
+ */
+static void switched_from_rt(struct rq *rq, struct task_struct *p,
+                          int running)
+{
+       /*
+        * If there are other RT tasks then we will reschedule
+        * and the scheduling of the other RT tasks will handle
+        * the balancing. But if we are the last RT task
+        * we may need to handle the pulling of RT tasks
+        * now.
+        */
+       if (!rq->rt.rt_nr_running)
+               pull_rt_task(rq);
+}
+#endif /* CONFIG_SMP */
+
+/*
+ * When switching a task to RT, we may overload the runqueue
+ * with RT tasks. In this case we try to push them off to
+ * other runqueues.
+ */
+static void switched_to_rt(struct rq *rq, struct task_struct *p,
+                          int running)
+{
+       int check_resched = 1;
+
+       /*
+        * If we are already running, then there's nothing
+        * that needs to be done. But if we are not running
+        * we may need to preempt the current running task.
+        * If that current running task is also an RT task
+        * then see if we can move to another run queue.
+        */
+       if (!running) {
+#ifdef CONFIG_SMP
+               if (rq->rt.overloaded && push_rt_task(rq) &&
+                   /* Don't resched if we changed runqueues */
+                   rq != task_rq(p))
+                       check_resched = 0;
+#endif /* CONFIG_SMP */
+               if (check_resched && p->prio < rq->curr->prio)
+                       resched_task(rq->curr);
+       }
+}
+
+/*
+ * Priority of the task has changed. This may cause
+ * us to initiate a push or pull.
+ */
+static void prio_changed_rt(struct rq *rq, struct task_struct *p,
+                           int oldprio, int running)
+{
+       if (running) {
+#ifdef CONFIG_SMP
+               /*
+                * If our priority decreases while running, we
+                * may need to pull tasks to this runqueue.
+                */
+               if (oldprio < p->prio)
+                       pull_rt_task(rq);
+               /*
+                * If there's a higher priority task waiting to run
+                * then reschedule.
+                */
+               if (p->prio > rq->rt.highest_prio)
+                       resched_task(p);
+#else
+               /* For UP simply resched on drop of prio */
+               if (oldprio < p->prio)
+                       resched_task(p);
 #endif /* CONFIG_SMP */
+       } else {
+               /*
+                * This task is not running, but if it is
+                * greater than the current running task
+                * then reschedule.
+                */
+               if (p->prio < rq->curr->prio)
+                       resched_task(rq->curr);
+       }
+}
+
 
 static void task_tick_rt(struct rq *rq, struct task_struct *p)
 {
@@ -848,8 +919,17 @@ const struct sched_class rt_sched_class = {
        .load_balance           = load_balance_rt,
        .move_one_task          = move_one_task_rt,
        .set_cpus_allowed       = set_cpus_allowed_rt,
+       .join_domain            = join_domain_rt,
+       .leave_domain           = leave_domain_rt,
+       .pre_schedule           = pre_schedule_rt,
+       .post_schedule          = post_schedule_rt,
+       .task_wake_up           = task_wake_up_rt,
+       .switched_from          = switched_from_rt,
 #endif
 
        .set_curr_task          = set_curr_task_rt,
        .task_tick              = task_tick_rt,
+
+       .prio_changed           = prio_changed_rt,
+       .switched_to            = switched_to_rt,
 };