cpumask: remove cpumask allocation from idle_balance

[linux-2.6-omap-h63xx.git] / kernel / sched.c
diff --git a/kernel/sched.c b/kernel/sched.c

index e72485033c48e0317a22d376e2c290440b40cce9..48862d418bed6c2203af4aa8e0fba1e1a698b2bd 100644 (file)
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -223,7 +223,7 @@ static void start_rt_bandwidth(struct rt_bandwidth *rt_b)
  {
         ktime_t now;
  
-       if (rt_bandwidth_enabled() && rt_b->rt_runtime == RUNTIME_INF)
+       if (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF)
                 return;
  
         if (hrtimer_active(&rt_b->rt_period_timer))
@@ -2266,16 +2266,6 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync)
         if (!sched_feat(SYNC_WAKEUPS))
                 sync = 0;
  
-       if (!sync) {
-               if (current->se.avg_overlap < sysctl_sched_migration_cost &&
-                         p->se.avg_overlap < sysctl_sched_migration_cost)
-                       sync = 1;
-       } else {
-               if (current->se.avg_overlap >= sysctl_sched_migration_cost ||
-                         p->se.avg_overlap >= sysctl_sched_migration_cost)
-                       sync = 0;
-       }
-
  #ifdef CONFIG_SMP
         if (sched_feat(LB_WAKEUP_UPDATE)) {
                 struct sched_domain *sd;
@@ -3458,19 +3448,23 @@ find_busiest_queue(struct sched_group *group, enum cpu_idle_type idle,
   */
  #define MAX_PINNED_INTERVAL    512
  
+/* Working cpumask for load_balance and load_balance_newidle. */
+static DEFINE_PER_CPU(cpumask_var_t, load_balance_tmpmask);
+
  /*
   * Check this_cpu to ensure it is balanced within domain. Attempt to move
   * tasks if there is an imbalance.
   */
  static int load_balance(int this_cpu, struct rq *this_rq,
                         struct sched_domain *sd, enum cpu_idle_type idle,
-                       int *balance, struct cpumask *cpus)
+                       int *balance)
  {
         int ld_moved, all_pinned = 0, active_balance = 0, sd_idle = 0;
         struct sched_group *group;
         unsigned long imbalance;
         struct rq *busiest;
         unsigned long flags;
+       struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask);
  
         cpumask_setall(cpus);
  
@@ -3625,8 +3619,7 @@ out:
   * this_rq is locked.
   */
  static int
-load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd,
-                       struct cpumask *cpus)
+load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd)
  {
         struct sched_group *group;
         struct rq *busiest = NULL;
@@ -3634,6 +3627,7 @@ load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd,
         int ld_moved = 0;
         int sd_idle = 0;
         int all_pinned = 0;
+       struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask);
  
         cpumask_setall(cpus);
  
@@ -3774,10 +3768,6 @@ static void idle_balance(int this_cpu, struct rq *this_rq)
         struct sched_domain *sd;
         int pulled_task = 0;
         unsigned long next_balance = jiffies + HZ;
-       cpumask_var_t tmpmask;
-
-       if (!alloc_cpumask_var(&tmpmask, GFP_ATOMIC))
-               return;
  
         for_each_domain(this_cpu, sd) {
                 unsigned long interval;
@@ -3788,7 +3778,7 @@ static void idle_balance(int this_cpu, struct rq *this_rq)
                 if (sd->flags & SD_BALANCE_NEWIDLE)
                         /* If we've pulled tasks over stop searching: */
                         pulled_task = load_balance_newidle(this_cpu, this_rq,
-                                                          sd, tmpmask);
+                                                          sd);
  
                 interval = msecs_to_jiffies(sd->balance_interval);
                 if (time_after(next_balance, sd->last_balance + interval))
@@ -3803,7 +3793,6 @@ static void idle_balance(int this_cpu, struct rq *this_rq)
                  */
                 this_rq->next_balance = next_balance;
         }
-       free_cpumask_var(tmpmask);
  }
  
  /*
@@ -3953,11 +3942,6 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle)
         unsigned long next_balance = jiffies + 60*HZ;
         int update_next_balance = 0;
         int need_serialize;
-       cpumask_var_t tmp;
-
-       /* Fails alloc?  Rebalancing probably not a priority right now. */
-       if (!alloc_cpumask_var(&tmp, GFP_ATOMIC))
-               return;
  
         for_each_domain(cpu, sd) {
                 if (!(sd->flags & SD_LOAD_BALANCE))
@@ -3982,7 +3966,7 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle)
                 }
  
                 if (time_after_eq(jiffies, sd->last_balance + interval)) {
-                       if (load_balance(cpu, rq, sd, idle, &balance, tmp)) {
+                       if (load_balance(cpu, rq, sd, idle, &balance)) {
                                 /*
                                  * We've pulled tasks over so either we're no
                                  * longer idle, or one of our SMT siblings is
@@ -4016,8 +4000,6 @@ out:
          */
         if (likely(update_next_balance))
                 rq->next_balance = next_balance;
-
-       free_cpumask_var(tmp);
  }
  
  /*
@@ -5954,12 +5936,7 @@ void sched_show_task(struct task_struct *p)
                 printk(KERN_CONT " %016lx ", thread_saved_pc(p));
  #endif
  #ifdef CONFIG_DEBUG_STACK_USAGE
-       {
-               unsigned long *n = end_of_stack(p);
-               while (!*n)
-                       n++;
-               free = (unsigned long)n - (unsigned long)end_of_stack(p);
-       }
+       free = stack_not_used(p);
  #endif
         printk(KERN_CONT "%5lu %5d %6d\n", free,
                 task_pid_nr(p), task_pid_nr(p->real_parent));
@@ -6954,20 +6931,26 @@ static void free_rootdomain(struct root_domain *rd)
  
  static void rq_attach_root(struct rq *rq, struct root_domain *rd)
  {
+       struct root_domain *old_rd = NULL;
         unsigned long flags;
  
         spin_lock_irqsave(&rq->lock, flags);
  
         if (rq->rd) {
-               struct root_domain *old_rd = rq->rd;
+               old_rd = rq->rd;
  
                 if (cpumask_test_cpu(rq->cpu, old_rd->online))
                         set_rq_offline(rq);
  
                 cpumask_clear_cpu(rq->cpu, old_rd->span);
  
-               if (atomic_dec_and_test(&old_rd->refcount))
-                       free_rootdomain(old_rd);
+               /*
+                * If we dont want to free the old_rt yet then
+                * set old_rd to NULL to skip the freeing later
+                * in this function:
+                */
+               if (!atomic_dec_and_test(&old_rd->refcount))
+                       old_rd = NULL;
         }
  
         atomic_inc(&rd->refcount);
@@ -6978,6 +6961,9 @@ static void rq_attach_root(struct rq *rq, struct root_domain *rd)
                 set_rq_online(rq);
  
         spin_unlock_irqrestore(&rq->lock, flags);
+
+       if (old_rd)
+               free_rootdomain(old_rd);
  }
  
  static int __init_refok init_rootdomain(struct root_domain *rd, bool bootmem)
@@ -7255,7 +7241,7 @@ cpu_to_core_group(int cpu, const struct cpumask *cpu_map,
  {
         int group;
  
-       cpumask_and(mask, &per_cpu(cpu_sibling_map, cpu), cpu_map);
+       cpumask_and(mask, topology_thread_cpumask(cpu), cpu_map);
         group = cpumask_first(mask);
         if (sg)
                 *sg = &per_cpu(sched_group_core, group).sg;
@@ -7284,7 +7270,7 @@ cpu_to_phys_group(int cpu, const struct cpumask *cpu_map,
         cpumask_and(mask, cpu_coregroup_mask(cpu), cpu_map);
         group = cpumask_first(mask);
  #elif defined(CONFIG_SCHED_SMT)
-       cpumask_and(mask, &per_cpu(cpu_sibling_map, cpu), cpu_map);
+       cpumask_and(mask, topology_thread_cpumask(cpu), cpu_map);
         group = cpumask_first(mask);
  #else
         group = cpu;
@@ -7627,7 +7613,7 @@ static int __build_sched_domains(const struct cpumask *cpu_map,
                 SD_INIT(sd, SIBLING);
                 set_domain_attribute(sd, attr);
                 cpumask_and(sched_domain_span(sd),
-                           &per_cpu(cpu_sibling_map, i), cpu_map);
+                           topology_thread_cpumask(i), cpu_map);
                 sd->parent = p;
                 p->child = sd;
                 cpu_to_cpu_group(i, cpu_map, &sd->groups, tmpmask);
@@ -7638,7 +7624,7 @@ static int __build_sched_domains(const struct cpumask *cpu_map,
         /* Set up CPU (sibling) groups */
         for_each_cpu(i, cpu_map) {
                 cpumask_and(this_sibling_map,
-                           &per_cpu(cpu_sibling_map, i), cpu_map);
+                           topology_thread_cpumask(i), cpu_map);
                 if (i != cpumask_first(this_sibling_map))
                         continue;
  
@@ -8309,6 +8295,9 @@ void __init sched_init(void)
  #endif
  #ifdef CONFIG_USER_SCHED
         alloc_size *= 2;
+#endif
+#ifdef CONFIG_CPUMASK_OFFSTACK
+       alloc_size *= num_possible_cpus() * cpumask_size();
  #endif
         /*
          * As sched_init() is called before page_alloc is setup,
@@ -8347,6 +8336,12 @@ void __init sched_init(void)
                 ptr += nr_cpu_ids * sizeof(void **);
  #endif /* CONFIG_USER_SCHED */
  #endif /* CONFIG_RT_GROUP_SCHED */
+#ifdef CONFIG_CPUMASK_OFFSTACK
+               for_each_possible_cpu(i) {
+                       per_cpu(load_balance_tmpmask, i) = (void *)ptr;
+                       ptr += cpumask_size();
+               }
+#endif /* CONFIG_CPUMASK_OFFSTACK */
         }
  
  #ifdef CONFIG_SMP
@@ -9225,6 +9220,16 @@ static int sched_rt_global_constraints(void)
  
         return ret;
  }
+
+int sched_rt_can_attach(struct task_group *tg, struct task_struct *tsk)
+{
+       /* Don't accept realtime tasks when there is no way for them to run */
+       if (rt_task(tsk) && tg->rt_bandwidth.rt_runtime == 0)
+               return 0;
+
+       return 1;
+}
+
  #else /* !CONFIG_RT_GROUP_SCHED */
  static int sched_rt_global_constraints(void)
  {
@@ -9318,8 +9323,7 @@ cpu_cgroup_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
                       struct task_struct *tsk)
  {
  #ifdef CONFIG_RT_GROUP_SCHED
-       /* Don't accept realtime tasks when there is no way for them to run */
-       if (rt_task(tsk) && cgroup_tg(cgrp)->rt_bandwidth.rt_runtime == 0)
+       if (!sched_rt_can_attach(cgroup_tg(cgrp), tsk))
                 return -EINVAL;
  #else
         /* We don't support RT-tasks being in separate groups */
@@ -9482,7 +9486,7 @@ cpuacct_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp)
  
  static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu)
  {
-       u64 *cpuusage = percpu_ptr(ca->cpuusage, cpu);
+       u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
         u64 data;
  
  #ifndef CONFIG_64BIT
@@ -9501,7 +9505,7 @@ static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu)
  
  static void cpuacct_cpuusage_write(struct cpuacct *ca, int cpu, u64 val)
  {
-       u64 *cpuusage = percpu_ptr(ca->cpuusage, cpu);
+       u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
  
  #ifndef CONFIG_64BIT
         /*
@@ -9597,7 +9601,7 @@ static void cpuacct_charge(struct task_struct *tsk, u64 cputime)
         ca = task_ca(tsk);
  
         for (; ca; ca = ca->parent) {
-               u64 *cpuusage = percpu_ptr(ca->cpuusage, cpu);
+               u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
                 *cpuusage += cputime;
         }
  }