]> pilppa.org Git - linux-2.6-omap-h63xx.git/blobdiff - kernel/sched.c
[S390] Misaligned wait PSW at memory detection.
[linux-2.6-omap-h63xx.git] / kernel / sched.c
index 0feeacb9149749a04219eba1f6f36c4aed30065c..3399701c680e392f46e21829cee8da0bf5482303 100644 (file)
 #define TASK_PREEMPTS_CURR(p, rq) \
        ((p)->prio < (rq)->curr->prio)
 
-/*
- * task_timeslice() scales user-nice values [ -20 ... 0 ... 19 ]
- * to time slice values: [800ms ... 100ms ... 5ms]
- *
- * The higher a thread's priority, the bigger timeslices
- * it gets during one round of execution. But even the lowest
- * priority thread gets MIN_TIMESLICE worth of execution time.
- */
-
 #define SCALE_PRIO(x, prio) \
        max(x * (MAX_PRIO - prio) / (MAX_USER_PRIO / 2), MIN_TIMESLICE)
 
@@ -180,6 +171,15 @@ static unsigned int static_prio_timeslice(int static_prio)
                return SCALE_PRIO(DEF_TIMESLICE, static_prio);
 }
 
+/*
+ * task_timeslice() scales user-nice values [ -20 ... 0 ... 19 ]
+ * to time slice values: [800ms ... 100ms ... 5ms]
+ *
+ * The higher a thread's priority, the bigger timeslices
+ * it gets during one round of execution. But even the lowest
+ * priority thread gets MIN_TIMESLICE worth of execution time.
+ */
+
 static inline unsigned int task_timeslice(struct task_struct *p)
 {
        return static_prio_timeslice(p->static_prio);
@@ -1232,7 +1232,7 @@ nextgroup:
 }
 
 /*
- * find_idlest_queue - find the idlest runqueue among the cpus in group.
+ * find_idlest_cpu - find the idlest cpu among the cpus in group.
  */
 static int
 find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu)
@@ -1822,14 +1822,14 @@ context_switch(struct rq *rq, struct task_struct *prev,
        struct mm_struct *mm = next->mm;
        struct mm_struct *oldmm = prev->active_mm;
 
-       if (unlikely(!mm)) {
+       if (!mm) {
                next->active_mm = oldmm;
                atomic_inc(&oldmm->mm_count);
                enter_lazy_tlb(oldmm, next);
        } else
                switch_mm(oldmm, mm, next);
 
-       if (unlikely(!prev->mm)) {
+       if (!prev->mm) {
                prev->active_mm = NULL;
                WARN_ON(rq->prev_mm);
                rq->prev_mm = oldmm;
@@ -2541,8 +2541,14 @@ static int load_balance(int this_cpu, struct rq *this_rq,
        struct rq *busiest;
        cpumask_t cpus = CPU_MASK_ALL;
 
+       /*
+        * When power savings policy is enabled for the parent domain, idle
+        * sibling can pick up load irrespective of busy siblings. In this case,
+        * let the state of idle sibling percolate up as IDLE, instead of
+        * portraying it as NOT_IDLE.
+        */
        if (idle != NOT_IDLE && sd->flags & SD_SHARE_CPUPOWER &&
-           !sched_smt_power_savings)
+           !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE))
                sd_idle = 1;
 
        schedstat_inc(sd, lb_cnt[idle]);
@@ -2638,7 +2644,7 @@ redo:
        }
 
        if (!nr_moved && !sd_idle && sd->flags & SD_SHARE_CPUPOWER &&
-           !sched_smt_power_savings)
+           !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE))
                return -1;
        return nr_moved;
 
@@ -2654,7 +2660,7 @@ out_one_pinned:
                sd->balance_interval *= 2;
 
        if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER &&
-                       !sched_smt_power_savings)
+           !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE))
                return -1;
        return 0;
 }
@@ -2676,7 +2682,14 @@ load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd)
        int sd_idle = 0;
        cpumask_t cpus = CPU_MASK_ALL;
 
-       if (sd->flags & SD_SHARE_CPUPOWER && !sched_smt_power_savings)
+       /*
+        * When power savings policy is enabled for the parent domain, idle
+        * sibling can pick up load irrespective of busy siblings. In this case,
+        * let the state of idle sibling percolate up as IDLE, instead of
+        * portraying it as NOT_IDLE.
+        */
+       if (sd->flags & SD_SHARE_CPUPOWER &&
+           !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE))
                sd_idle = 1;
 
        schedstat_inc(sd, lb_cnt[NEWLY_IDLE]);
@@ -2717,7 +2730,8 @@ redo:
 
        if (!nr_moved) {
                schedstat_inc(sd, lb_failed[NEWLY_IDLE]);
-               if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER)
+               if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER &&
+                   !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE))
                        return -1;
        } else
                sd->nr_balance_failed = 0;
@@ -2727,7 +2741,7 @@ redo:
 out_balanced:
        schedstat_inc(sd, lb_balanced[NEWLY_IDLE]);
        if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER &&
-                                       !sched_smt_power_savings)
+           !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE))
                return -1;
        sd->nr_balance_failed = 0;
 
@@ -3477,7 +3491,7 @@ asmlinkage void __sched preempt_schedule(void)
         * If there is a non-zero preempt_count or interrupts are disabled,
         * we do not want to preempt the current task.  Just return..
         */
-       if (unlikely(ti->preempt_count || irqs_disabled()))
+       if (likely(ti->preempt_count || irqs_disabled()))
                return;
 
 need_resched:
@@ -5400,7 +5414,9 @@ static int sd_degenerate(struct sched_domain *sd)
        if (sd->flags & (SD_LOAD_BALANCE |
                         SD_BALANCE_NEWIDLE |
                         SD_BALANCE_FORK |
-                        SD_BALANCE_EXEC)) {
+                        SD_BALANCE_EXEC |
+                        SD_SHARE_CPUPOWER |
+                        SD_SHARE_PKG_RESOURCES)) {
                if (sd->groups != sd->groups->next)
                        return 0;
        }
@@ -5434,7 +5450,9 @@ sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent)
                pflags &= ~(SD_LOAD_BALANCE |
                                SD_BALANCE_NEWIDLE |
                                SD_BALANCE_FORK |
-                               SD_BALANCE_EXEC);
+                               SD_BALANCE_EXEC |
+                               SD_SHARE_CPUPOWER |
+                               SD_SHARE_PKG_RESOURCES);
        }
        if (~cflags & pflags)
                return 0;
@@ -6240,6 +6258,58 @@ static void free_sched_groups(const cpumask_t *cpu_map)
 }
 #endif
 
+/*
+ * Initialize sched groups cpu_power.
+ *
+ * cpu_power indicates the capacity of sched group, which is used while
+ * distributing the load between different sched groups in a sched domain.
+ * Typically cpu_power for all the groups in a sched domain will be same unless
+ * there are asymmetries in the topology. If there are asymmetries, group
+ * having more cpu_power will pickup more load compared to the group having
+ * less cpu_power.
+ *
+ * cpu_power will be a multiple of SCHED_LOAD_SCALE. This multiple represents
+ * the maximum number of tasks a group can handle in the presence of other idle
+ * or lightly loaded groups in the same sched domain.
+ */
+static void init_sched_groups_power(int cpu, struct sched_domain *sd)
+{
+       struct sched_domain *child;
+       struct sched_group *group;
+
+       WARN_ON(!sd || !sd->groups);
+
+       if (cpu != first_cpu(sd->groups->cpumask))
+               return;
+
+       child = sd->child;
+
+       /*
+        * For perf policy, if the groups in child domain share resources
+        * (for example cores sharing some portions of the cache hierarchy
+        * or SMT), then set this domain groups cpu_power such that each group
+        * can handle only one task, when there are other idle groups in the
+        * same sched domain.
+        */
+       if (!child || (!(sd->flags & SD_POWERSAVINGS_BALANCE) &&
+                      (child->flags &
+                       (SD_SHARE_CPUPOWER | SD_SHARE_PKG_RESOURCES)))) {
+               sd->groups->cpu_power = SCHED_LOAD_SCALE;
+               return;
+       }
+
+       sd->groups->cpu_power = 0;
+
+       /*
+        * add cpu_power of each child group to this groups cpu_power
+        */
+       group = child->groups;
+       do {
+               sd->groups->cpu_power += group->cpu_power;
+               group = group->next;
+       } while (group != child->groups);
+}
+
 /*
  * Build sched domains for a given set of cpus and attach the sched domains
  * to the individual cpus
@@ -6247,6 +6317,7 @@ static void free_sched_groups(const cpumask_t *cpu_map)
 static int build_sched_domains(const cpumask_t *cpu_map)
 {
        int i;
+       struct sched_domain *sd;
 #ifdef CONFIG_NUMA
        struct sched_group **sched_group_nodes = NULL;
        struct sched_group *sched_group_allnodes = NULL;
@@ -6278,9 +6349,10 @@ static int build_sched_domains(const cpumask_t *cpu_map)
                                > SD_NODES_PER_DOMAIN*cpus_weight(nodemask)) {
                        if (!sched_group_allnodes) {
                                sched_group_allnodes
-                                       = kmalloc(sizeof(struct sched_group)
-                                                       * MAX_NUMNODES,
-                                                 GFP_KERNEL);
+                                       = kmalloc_node(sizeof(struct sched_group)
+                                                       * MAX_NUMNODES,
+                                                 GFP_KERNEL,
+                                                 cpu_to_node(i));
                                if (!sched_group_allnodes) {
                                        printk(KERN_WARNING
                                        "Can not alloc allnodes sched group\n");
@@ -6456,72 +6528,20 @@ static int build_sched_domains(const cpumask_t *cpu_map)
        /* Calculate CPU power for physical packages and nodes */
 #ifdef CONFIG_SCHED_SMT
        for_each_cpu_mask(i, *cpu_map) {
-               struct sched_domain *sd;
                sd = &per_cpu(cpu_domains, i);
-               sd->groups->cpu_power = SCHED_LOAD_SCALE;
+               init_sched_groups_power(i, sd);
        }
 #endif
 #ifdef CONFIG_SCHED_MC
        for_each_cpu_mask(i, *cpu_map) {
-               int power;
-               struct sched_domain *sd;
                sd = &per_cpu(core_domains, i);
-               if (sched_smt_power_savings)
-                       power = SCHED_LOAD_SCALE * cpus_weight(sd->groups->cpumask);
-               else
-                       power = SCHED_LOAD_SCALE + (cpus_weight(sd->groups->cpumask)-1)
-                                           * SCHED_LOAD_SCALE / 10;
-               sd->groups->cpu_power = power;
+               init_sched_groups_power(i, sd);
        }
 #endif
 
        for_each_cpu_mask(i, *cpu_map) {
-               struct sched_domain *sd;
-#ifdef CONFIG_SCHED_MC
                sd = &per_cpu(phys_domains, i);
-               if (i != first_cpu(sd->groups->cpumask))
-                       continue;
-
-               sd->groups->cpu_power = 0;
-               if (sched_mc_power_savings || sched_smt_power_savings) {
-                       int j;
-
-                       for_each_cpu_mask(j, sd->groups->cpumask) {
-                               struct sched_domain *sd1;
-                               sd1 = &per_cpu(core_domains, j);
-                               /*
-                                * for each core we will add once
-                                * to the group in physical domain
-                                */
-                               if (j != first_cpu(sd1->groups->cpumask))
-                                       continue;
-
-                               if (sched_smt_power_savings)
-                                       sd->groups->cpu_power += sd1->groups->cpu_power;
-                               else
-                                       sd->groups->cpu_power += SCHED_LOAD_SCALE;
-                       }
-               } else
-                       /*
-                        * This has to be < 2 * SCHED_LOAD_SCALE
-                        * Lets keep it SCHED_LOAD_SCALE, so that
-                        * while calculating NUMA group's cpu_power
-                        * we can simply do
-                        *  numa_group->cpu_power += phys_group->cpu_power;
-                        *
-                        * See "only add power once for each physical pkg"
-                        * comment below
-                        */
-                       sd->groups->cpu_power = SCHED_LOAD_SCALE;
-#else
-               int power;
-               sd = &per_cpu(phys_domains, i);
-               if (sched_smt_power_savings)
-                       power = SCHED_LOAD_SCALE * cpus_weight(sd->groups->cpumask);
-               else
-                       power = SCHED_LOAD_SCALE;
-               sd->groups->cpu_power = power;
-#endif
+               init_sched_groups_power(i, sd);
        }
 
 #ifdef CONFIG_NUMA