*/
#ifdef CONFIG_SMP
-static cpumask_t rt_overload_mask;
-static atomic_t rto_count;
-static inline int rt_overloaded(void)
-{
- return atomic_read(&rto_count);
-}
-static inline cpumask_t *rt_overload(void)
+
+static inline int rt_overloaded(struct rq *rq)
{
- return &rt_overload_mask;
+ return atomic_read(&rq->rd->rto_count);
}
+
static inline void rt_set_overload(struct rq *rq)
{
- rq->rt.overloaded = 1;
- cpu_set(rq->cpu, rt_overload_mask);
+ cpu_set(rq->cpu, rq->rd->rto_mask);
/*
* Make sure the mask is visible before we set
* the overload count. That is checked to determine
* updated yet.
*/
wmb();
- atomic_inc(&rto_count);
+ atomic_inc(&rq->rd->rto_count);
}
+
static inline void rt_clear_overload(struct rq *rq)
{
/* the order here really doesn't matter */
- atomic_dec(&rto_count);
- cpu_clear(rq->cpu, rt_overload_mask);
- rq->rt.overloaded = 0;
+ atomic_dec(&rq->rd->rto_count);
+ cpu_clear(rq->cpu, rq->rd->rto_mask);
}
static void update_rt_migration(struct rq *rq)
{
- if (rq->rt.rt_nr_migratory && (rq->rt.rt_nr_running > 1))
- rt_set_overload(rq);
- else
+ if (rq->rt.rt_nr_migratory && (rq->rt.rt_nr_running > 1)) {
+ if (!rq->rt.overloaded) {
+ rt_set_overload(rq);
+ rq->rt.overloaded = 1;
+ }
+ } else if (rq->rt.overloaded) {
rt_clear_overload(rq);
+ rq->rt.overloaded = 0;
+ }
}
#endif /* CONFIG_SMP */
* that is just being woken and probably will have
* cold cache anyway.
*/
- if (unlikely(rt_task(rq->curr))) {
+ if (unlikely(rt_task(rq->curr)) &&
+ (p->nr_cpus_allowed > 1)) {
int cpu = find_lowest_rq(p);
return (cpu == -1) ? task_cpu(p) : cpu;
}
/* Return the second highest RT task, NULL otherwise */
-static struct task_struct *pick_next_highest_task_rt(struct rq *rq,
- int cpu)
+static struct task_struct *pick_next_highest_task_rt(struct rq *rq, int cpu)
{
struct rt_prio_array *array = &rq->rt.active;
struct task_struct *next;
struct list_head *queue;
int idx;
- assert_spin_locked(&rq->lock);
-
if (likely(rq->rt.rt_nr_running < 2))
return NULL;
if (queue->next->next != queue) {
/* same prio task */
- next = list_entry(queue->next->next, struct task_struct, run_list);
+ next = list_entry(queue->next->next, struct task_struct,
+ run_list);
if (pick_rt_task(rq, next, cpu))
goto out;
}
}
static DEFINE_PER_CPU(cpumask_t, local_cpu_mask);
-static DEFINE_PER_CPU(cpumask_t, valid_cpu_mask);
static int find_lowest_cpus(struct task_struct *task, cpumask_t *lowest_mask)
{
- int cpu;
- cpumask_t *valid_mask = &__get_cpu_var(valid_cpu_mask);
int lowest_prio = -1;
- int ret = 0;
+ int lowest_cpu = -1;
+ int count = 0;
+ int cpu;
- cpus_clear(*lowest_mask);
- cpus_and(*valid_mask, cpu_online_map, task->cpus_allowed);
+ cpus_and(*lowest_mask, task_rq(task)->rd->online, task->cpus_allowed);
/*
* Scan each rq for the lowest prio.
*/
- for_each_cpu_mask(cpu, *valid_mask) {
+ for_each_cpu_mask(cpu, *lowest_mask) {
struct rq *rq = cpu_rq(cpu);
/* We look for lowest RT prio or non-rt CPU */
if (rq->rt.highest_prio >= MAX_RT_PRIO) {
- if (ret)
+ /*
+ * if we already found a low RT queue
+ * and now we found this non-rt queue
+ * clear the mask and set our bit.
+ * Otherwise just return the queue as is
+ * and the count==1 will cause the algorithm
+ * to use the first bit found.
+ */
+ if (lowest_cpu != -1) {
cpus_clear(*lowest_mask);
- cpu_set(rq->cpu, *lowest_mask);
+ cpu_set(rq->cpu, *lowest_mask);
+ }
return 1;
}
if (rq->rt.highest_prio > lowest_prio) {
/* new low - clear old data */
lowest_prio = rq->rt.highest_prio;
- cpus_clear(*lowest_mask);
+ lowest_cpu = cpu;
+ count = 0;
}
- cpu_set(rq->cpu, *lowest_mask);
- ret = 1;
+ count++;
+ } else
+ cpu_clear(cpu, *lowest_mask);
+ }
+
+ /*
+ * Clear out all the set bits that represent
+ * runqueues that were of higher prio than
+ * the lowest_prio.
+ */
+ if (lowest_cpu > 0) {
+ /*
+ * Perhaps we could add another cpumask op to
+ * zero out bits. Like cpu_zero_bits(cpumask, nrbits);
+ * Then that could be optimized to use memset and such.
+ */
+ for_each_cpu_mask(cpu, *lowest_mask) {
+ if (cpu >= lowest_cpu)
+ break;
+ cpu_clear(cpu, *lowest_mask);
}
}
- return ret;
+ return count;
}
static inline int pick_optimal_cpu(int this_cpu, cpumask_t *mask)
cpumask_t *lowest_mask = &__get_cpu_var(local_cpu_mask);
int this_cpu = smp_processor_id();
int cpu = task_cpu(task);
+ int count = find_lowest_cpus(task, lowest_mask);
+
+ if (!count)
+ return -1; /* No targets found */
- if (!find_lowest_cpus(task, lowest_mask))
- return -1;
+ /*
+ * There is no sense in performing an optimal search if only one
+ * target is found.
+ */
+ if (count == 1)
+ return first_cpu(*lowest_mask);
/*
* At this point we have built a mask of cpus representing the
}
/* Will lock the rq it finds */
-static struct rq *find_lock_lowest_rq(struct task_struct *task,
- struct rq *rq)
+static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq)
{
struct rq *lowest_rq = NULL;
- int cpu;
int tries;
+ int cpu;
for (tries = 0; tries < RT_MAX_TRIES; tries++) {
cpu = find_lowest_rq(task);
* Also make sure that it wasn't scheduled on its rq.
*/
if (unlikely(task_rq(task) != rq ||
- !cpu_isset(lowest_rq->cpu, task->cpus_allowed) ||
+ !cpu_isset(lowest_rq->cpu,
+ task->cpus_allowed) ||
task_running(rq, task) ||
!task->se.on_rq)) {
+
spin_unlock(&lowest_rq->lock);
lowest_rq = NULL;
break;
int ret = 0;
int paranoid = RT_MAX_TRIES;
- assert_spin_locked(&rq->lock);
-
if (!rq->rt.overloaded)
return 0;
goto out;
}
- assert_spin_locked(&lowest_rq->lock);
-
deactivate_task(rq, next_task, 0);
set_task_cpu(next_task, lowest_rq->cpu);
activate_task(lowest_rq, next_task, 0);
static int pull_rt_task(struct rq *this_rq)
{
- struct task_struct *next;
- struct task_struct *p;
+ int this_cpu = this_rq->cpu, ret = 0, cpu;
+ struct task_struct *p, *next;
struct rq *src_rq;
- cpumask_t *rto_cpumask;
- int this_cpu = this_rq->cpu;
- int cpu;
- int ret = 0;
- assert_spin_locked(&this_rq->lock);
-
- /*
- * If cpusets are used, and we have overlapping
- * run queue cpusets, then this algorithm may not catch all.
- * This is just the price you pay on trying to keep
- * dirtying caches down on large SMP machines.
- */
- if (likely(!rt_overloaded()))
+ if (likely(!rt_overloaded(this_rq)))
return 0;
next = pick_next_task_rt(this_rq);
- rto_cpumask = rt_overload();
-
- for_each_cpu_mask(cpu, *rto_cpumask) {
+ for_each_cpu_mask(cpu, this_rq->rd->rto_mask) {
if (this_cpu == cpu)
continue;
if (double_lock_balance(this_rq, src_rq)) {
/* unlocked our runqueue lock */
struct task_struct *old_next = next;
+
next = pick_next_task_rt(this_rq);
if (next != old_next)
ret = 1;
}
- if (likely(src_rq->rt.rt_nr_running <= 1))
+ if (likely(src_rq->rt.rt_nr_running <= 1)) {
/*
* Small chance that this_rq->curr changed
* but it's really harmless here.
*/
rt_clear_overload(this_rq);
- else
+ } else {
/*
* Heh, the src_rq is now overloaded, since
* we already have the src_rq lock, go straight
* to pulling tasks from it.
*/
goto try_pulling;
+ }
spin_unlock(&src_rq->lock);
continue;
}
*/
if (double_lock_balance(this_rq, src_rq)) {
struct task_struct *old_next = next;
+
next = pick_next_task_rt(this_rq);
if (next != old_next)
ret = 1;
*/
if (p->prio < src_rq->curr->prio ||
(next && next->prio < src_rq->curr->prio))
- goto bail;
+ goto out;
ret = 1;
* case there's an even higher prio task
* in another runqueue. (low likelyhood
* but possible)
- */
-
- /*
+ *
* Update next so that we won't pick a task
* on another cpu with a priority lower (or equal)
* than the one we just picked.
next = p;
}
- bail:
+ out:
spin_unlock(&src_rq->lock);
}
return ret;
}
-static void schedule_balance_rt(struct rq *rq,
- struct task_struct *prev)
+static void pre_schedule_rt(struct rq *rq, struct task_struct *prev)
{
/* Try to pull RT tasks here if we lower this rq's prio */
- if (unlikely(rt_task(prev)) &&
- rq->rt.highest_prio > prev->prio)
+ if (unlikely(rt_task(prev)) && rq->rt.highest_prio > prev->prio)
pull_rt_task(rq);
}
-static void schedule_tail_balance_rt(struct rq *rq)
+static void post_schedule_rt(struct rq *rq)
{
/*
* If we have more than one rt_task queued, then
}
-static void wakeup_balance_rt(struct rq *rq, struct task_struct *p)
+static void task_wake_up_rt(struct rq *rq, struct task_struct *p)
{
- if (unlikely(rt_task(p)) &&
- !task_running(rq, p) &&
+ if (!task_running(rq, p) &&
(p->prio >= rq->rt.highest_prio) &&
rq->rt.overloaded)
push_rt_tasks(rq);
/* don't touch RT tasks */
return 0;
}
+
static void set_cpus_allowed_rt(struct task_struct *p, cpumask_t *new_mask)
{
int weight = cpus_weight(*new_mask);
if (p->se.on_rq && (weight != p->nr_cpus_allowed)) {
struct rq *rq = task_rq(p);
- if ((p->nr_cpus_allowed <= 1) && (weight > 1))
+ if ((p->nr_cpus_allowed <= 1) && (weight > 1)) {
rq->rt.rt_nr_migratory++;
- else if((p->nr_cpus_allowed > 1) && (weight <= 1)) {
+ } else if ((p->nr_cpus_allowed > 1) && (weight <= 1)) {
BUG_ON(!rq->rt.rt_nr_migratory);
rq->rt.rt_nr_migratory--;
}
p->cpus_allowed = *new_mask;
p->nr_cpus_allowed = weight;
}
-#else /* CONFIG_SMP */
-# define schedule_tail_balance_rt(rq) do { } while (0)
-# define schedule_balance_rt(rq, prev) do { } while (0)
-# define wakeup_balance_rt(rq, p) do { } while (0)
+
+/* Assumes rq->lock is held */
+static void join_domain_rt(struct rq *rq)
+{
+ if (rq->rt.overloaded)
+ rt_set_overload(rq);
+}
+
+/* Assumes rq->lock is held */
+static void leave_domain_rt(struct rq *rq)
+{
+ if (rq->rt.overloaded)
+ rt_clear_overload(rq);
+}
+
+/*
+ * When switch from the rt queue, we bring ourselves to a position
+ * that we might want to pull RT tasks from other runqueues.
+ */
+static void switched_from_rt(struct rq *rq, struct task_struct *p,
+ int running)
+{
+ /*
+ * If there are other RT tasks then we will reschedule
+ * and the scheduling of the other RT tasks will handle
+ * the balancing. But if we are the last RT task
+ * we may need to handle the pulling of RT tasks
+ * now.
+ */
+ if (!rq->rt.rt_nr_running)
+ pull_rt_task(rq);
+}
#endif /* CONFIG_SMP */
+/*
+ * When switching a task to RT, we may overload the runqueue
+ * with RT tasks. In this case we try to push them off to
+ * other runqueues.
+ */
+static void switched_to_rt(struct rq *rq, struct task_struct *p,
+ int running)
+{
+ int check_resched = 1;
+
+ /*
+ * If we are already running, then there's nothing
+ * that needs to be done. But if we are not running
+ * we may need to preempt the current running task.
+ * If that current running task is also an RT task
+ * then see if we can move to another run queue.
+ */
+ if (!running) {
+#ifdef CONFIG_SMP
+ if (rq->rt.overloaded && push_rt_task(rq) &&
+ /* Don't resched if we changed runqueues */
+ rq != task_rq(p))
+ check_resched = 0;
+#endif /* CONFIG_SMP */
+ if (check_resched && p->prio < rq->curr->prio)
+ resched_task(rq->curr);
+ }
+}
+
+/*
+ * Priority of the task has changed. This may cause
+ * us to initiate a push or pull.
+ */
+static void prio_changed_rt(struct rq *rq, struct task_struct *p,
+ int oldprio, int running)
+{
+ if (running) {
+#ifdef CONFIG_SMP
+ /*
+ * If our priority decreases while running, we
+ * may need to pull tasks to this runqueue.
+ */
+ if (oldprio < p->prio)
+ pull_rt_task(rq);
+ /*
+ * If there's a higher priority task waiting to run
+ * then reschedule.
+ */
+ if (p->prio > rq->rt.highest_prio)
+ resched_task(p);
+#else
+ /* For UP simply resched on drop of prio */
+ if (oldprio < p->prio)
+ resched_task(p);
+#endif /* CONFIG_SMP */
+ } else {
+ /*
+ * This task is not running, but if it is
+ * greater than the current running task
+ * then reschedule.
+ */
+ if (p->prio < rq->curr->prio)
+ resched_task(rq->curr);
+ }
+}
+
+
static void task_tick_rt(struct rq *rq, struct task_struct *p)
{
update_curr_rt(rq);
.load_balance = load_balance_rt,
.move_one_task = move_one_task_rt,
.set_cpus_allowed = set_cpus_allowed_rt,
+ .join_domain = join_domain_rt,
+ .leave_domain = leave_domain_rt,
+ .pre_schedule = pre_schedule_rt,
+ .post_schedule = post_schedule_rt,
+ .task_wake_up = task_wake_up_rt,
+ .switched_from = switched_from_rt,
#endif
.set_curr_task = set_curr_task_rt,
.task_tick = task_tick_rt,
+
+ .prio_changed = prio_changed_rt,
+ .switched_to = switched_to_rt,
};