Driver core: let request_module() send a /sys/modules/kmod/-uevent

[linux-2.6-omap-h63xx.git] / kernel / sched.c
diff --git a/kernel/sched.c b/kernel/sched.c

index 48e35c9163263cc1c7813a715ddfcf32f8dff31f..0dc757246d89b02c96593df44ff8d75acb93e711 100644 (file)
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -56,6 +56,16 @@
  
  #include <asm/unistd.h>
  
+/*
+ * Scheduler clock - returns current time in nanosec units.
+ * This is default implementation.
+ * Architectures and sub-architectures can override this.
+ */
+unsigned long long __attribute__((weak)) sched_clock(void)
+{
+       return (unsigned long long)jiffies * (1000000000 / HZ);
+}
+
  /*
   * Convert user-nice values [ -20 ... 0 ... 19 ]
   * to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ],
@@ -466,7 +476,8 @@ static int show_schedstat(struct seq_file *seq, void *v)
                         seq_printf(seq, "domain%d %s", dcnt++, mask_str);
                         for (itype = SCHED_IDLE; itype < MAX_IDLE_TYPES;
                                         itype++) {
-                               seq_printf(seq, " %lu %lu %lu %lu %lu %lu %lu %lu",
+                               seq_printf(seq, " %lu %lu %lu %lu %lu %lu %lu "
+                                               "%lu",
                                     sd->lb_cnt[itype],
                                     sd->lb_balanced[itype],
                                     sd->lb_failed[itype],
@@ -476,11 +487,13 @@ static int show_schedstat(struct seq_file *seq, void *v)
                                     sd->lb_nobusyq[itype],
                                     sd->lb_nobusyg[itype]);
                         }
-                       seq_printf(seq, " %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu\n",
+                       seq_printf(seq, " %lu %lu %lu %lu %lu %lu %lu %lu %lu"
+                           " %lu %lu %lu\n",
                             sd->alb_cnt, sd->alb_failed, sd->alb_pushed,
                             sd->sbe_cnt, sd->sbe_balanced, sd->sbe_pushed,
                             sd->sbf_cnt, sd->sbf_balanced, sd->sbf_pushed,
-                           sd->ttwu_wake_remote, sd->ttwu_move_affine, sd->ttwu_move_balance);
+                           sd->ttwu_wake_remote, sd->ttwu_move_affine,
+                           sd->ttwu_move_balance);
                 }
                 preempt_enable();
  #endif
@@ -1454,7 +1467,9 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync)
  
                 if (this_sd->flags & SD_WAKE_AFFINE) {
                         unsigned long tl = this_load;
-                       unsigned long tl_per_task = cpu_avg_load_per_task(this_cpu);
+                       unsigned long tl_per_task;
+
+                       tl_per_task = cpu_avg_load_per_task(this_cpu);
  
                         /*
                          * If sync wakeup then subtract the (maximum possible)
@@ -1562,6 +1577,7 @@ int fastcall wake_up_state(struct task_struct *p, unsigned int state)
         return try_to_wake_up(p, state, 0);
  }
  
+static void task_running_tick(struct rq *rq, struct task_struct *p);
  /*
   * Perform scheduler related setup for a newly forked process p.
   * p is forked by current.
@@ -1622,7 +1638,7 @@ void fastcall sched_fork(struct task_struct *p, int clone_flags)
                  * runqueue lock is not a problem.
                  */
                 current->time_slice = 1;
-               scheduler_tick();
+               task_running_tick(cpu_rq(cpu), current);
         }
         local_irq_enable();
         put_cpu();
@@ -1837,6 +1853,13 @@ context_switch(struct rq *rq, struct task_struct *prev,
         struct mm_struct *mm = next->mm;
         struct mm_struct *oldmm = prev->active_mm;
  
+       /*
+        * For paravirt, this is coupled with an exit in switch_to to
+        * combine the page table reload and the switch backend into
+        * one hypercall.
+        */
+       arch_enter_lazy_cpu_mode();
+
         if (!mm) {
                 next->active_mm = oldmm;
                 atomic_inc(&oldmm->mm_count);
@@ -2487,18 +2510,21 @@ small_imbalance:
                 pwr_now /= SCHED_LOAD_SCALE;
  
                 /* Amount of load we'd subtract */
-               tmp = busiest_load_per_task*SCHED_LOAD_SCALE/busiest->cpu_power;
+               tmp = busiest_load_per_task * SCHED_LOAD_SCALE /
+                       busiest->cpu_power;
                 if (max_load > tmp)
                         pwr_move += busiest->cpu_power *
                                 min(busiest_load_per_task, max_load - tmp);
  
                 /* Amount of load we'd add */
-               if (max_load*busiest->cpu_power <
-                               busiest_load_per_task*SCHED_LOAD_SCALE)
-                       tmp = max_load*busiest->cpu_power/this->cpu_power;
+               if (max_load * busiest->cpu_power <
+                               busiest_load_per_task * SCHED_LOAD_SCALE)
+                       tmp = max_load * busiest->cpu_power / this->cpu_power;
                 else
-                       tmp = busiest_load_per_task*SCHED_LOAD_SCALE/this->cpu_power;
-               pwr_move += this->cpu_power*min(this_load_per_task, this_load + tmp);
+                       tmp = busiest_load_per_task * SCHED_LOAD_SCALE /
+                               this->cpu_power;
+               pwr_move += this->cpu_power *
+                       min(this_load_per_task, this_load + tmp);
                 pwr_move /= SCHED_LOAD_SCALE;
  
                 /* Move if we gain throughput */
@@ -2878,14 +2904,16 @@ static void active_load_balance(struct rq *busiest_rq, int busiest_cpu)
  static void update_load(struct rq *this_rq)
  {
         unsigned long this_load;
-       int i, scale;
+       unsigned int i, scale;
  
         this_load = this_rq->raw_weighted_load;
  
         /* Update our load: */
-       for (i = 0, scale = 1; i < 3; i++, scale <<= 1) {
+       for (i = 0, scale = 1; i < 3; i++, scale += scale) {
                 unsigned long old_load, new_load;
  
+               /* scale is effectively 1 << i now, and >> i divides by scale */
+
                 old_load = this_rq->cpu_load[i];
                 new_load = this_load;
                 /*
@@ -2895,7 +2923,7 @@ static void update_load(struct rq *this_rq)
                  */
                 if (new_load > old_load)
                         new_load += scale-1;
-               this_rq->cpu_load[i] = (old_load*(scale-1) + new_load) / scale;
+               this_rq->cpu_load[i] = (old_load*(scale-1) + new_load) >> i;
         }
  }
  
@@ -3366,7 +3394,8 @@ void fastcall add_preempt_count(int val)
         /*
          * Spinlock count overflowing soon?
          */
-       DEBUG_LOCKS_WARN_ON((preempt_count() & PREEMPT_MASK) >= PREEMPT_MASK-10);
+       DEBUG_LOCKS_WARN_ON((preempt_count() & PREEMPT_MASK) >=
+                               PREEMPT_MASK - 10);
  }
  EXPORT_SYMBOL(add_preempt_count);
  
@@ -3420,6 +3449,8 @@ asmlinkage void __sched schedule(void)
                         "%s/0x%08x/%d\n",
                         current->comm, preempt_count(), current->pid);
                 debug_show_held_locks(current);
+               if (irqs_disabled())
+                       print_irqtrace_events(current);
                 dump_stack();
         }
         profile_hit(SCHED_PROFILING, __builtin_return_address(0));
@@ -4181,13 +4212,12 @@ static void __setscheduler(struct task_struct *p, int policy, int prio)
  }
  
  /**
- * sched_setscheduler - change the scheduling policy and/or RT priority of
- * a thread.
+ * sched_setscheduler - change the scheduling policy and/or RT priority of a thread.
   * @p: the task in question.
   * @policy: new policy.
   * @param: structure containing the new RT priority.
   *
- * NOTE: the task may be already dead
+ * NOTE that the task may be already dead.
   */
  int sched_setscheduler(struct task_struct *p, int policy,
                        struct sched_param *param)
@@ -4555,7 +4585,7 @@ asmlinkage long sys_sched_getaffinity(pid_t pid, unsigned int len,
  /**
   * sys_sched_yield - yield the current processor to other threads.
   *
- * this function yields the current CPU by moving the calling thread
+ * This function yields the current CPU by moving the calling thread
   * to the expired array. If there are no other threads running on this
   * CPU then this function will return.
   */
@@ -4605,15 +4635,6 @@ asmlinkage long sys_sched_yield(void)
         return 0;
  }
  
-static inline int __resched_legal(int expected_preempt_count)
-{
-       if (unlikely(preempt_count() != expected_preempt_count))
-               return 0;
-       if (unlikely(system_state != SYSTEM_RUNNING))
-               return 0;
-       return 1;
-}
-
  static void __cond_resched(void)
  {
  #ifdef CONFIG_DEBUG_SPINLOCK_SLEEP
@@ -4633,7 +4654,8 @@ static void __cond_resched(void)
  
  int __sched cond_resched(void)
  {
-       if (need_resched() && __resched_legal(0)) {
+       if (need_resched() && !(preempt_count() & PREEMPT_ACTIVE) &&
+                                       system_state == SYSTEM_RUNNING) {
                 __cond_resched();
                 return 1;
         }
@@ -4659,7 +4681,7 @@ int cond_resched_lock(spinlock_t *lock)
                 ret = 1;
                 spin_lock(lock);
         }
-       if (need_resched() && __resched_legal(1)) {
+       if (need_resched() && system_state == SYSTEM_RUNNING) {
                 spin_release(&lock->dep_map, 1, _THIS_IP_);
                 _raw_spin_unlock(lock);
                 preempt_enable_no_resched();
@@ -4675,7 +4697,7 @@ int __sched cond_resched_softirq(void)
  {
         BUG_ON(!in_softirq());
  
-       if (need_resched() && __resched_legal(0)) {
+       if (need_resched() && system_state == SYSTEM_RUNNING) {
                 raw_local_irq_disable();
                 _local_bh_enable();
                 raw_local_irq_enable();
@@ -4690,7 +4712,7 @@ EXPORT_SYMBOL(cond_resched_softirq);
  /**
   * yield - yield the current processor to other threads.
   *
- * this is a shortcut for kernel-space yielding - it marks the
+ * This is a shortcut for kernel-space yielding - it marks the
   * thread runnable and calls sys_sched_yield().
   */
  void __sched yield(void)
@@ -5439,16 +5461,19 @@ static void sched_domain_debug(struct sched_domain *sd, int cpu)
                 if (!(sd->flags & SD_LOAD_BALANCE)) {
                         printk("does not load-balance\n");
                         if (sd->parent)
-                               printk(KERN_ERR "ERROR: !SD_LOAD_BALANCE domain has parent");
+                               printk(KERN_ERR "ERROR: !SD_LOAD_BALANCE domain"
+                                               " has parent");
                         break;
                 }
  
                 printk("span %s\n", str);
  
                 if (!cpu_isset(cpu, sd->span))
-                       printk(KERN_ERR "ERROR: domain->span does not contain CPU%d\n", cpu);
+                       printk(KERN_ERR "ERROR: domain->span does not contain "
+                                       "CPU%d\n", cpu);
                 if (!cpu_isset(cpu, group->cpumask))
-                       printk(KERN_ERR "ERROR: domain->groups does not contain CPU%d\n", cpu);
+                       printk(KERN_ERR "ERROR: domain->groups does not contain"
+                                       " CPU%d\n", cpu);
  
                 printk(KERN_DEBUG);
                 for (i = 0; i < level + 2; i++)
@@ -5463,7 +5488,8 @@ static void sched_domain_debug(struct sched_domain *sd, int cpu)
  
                         if (!group->cpu_power) {
                                 printk("\n");
-                               printk(KERN_ERR "ERROR: domain->cpu_power not set\n");
+                               printk(KERN_ERR "ERROR: domain->cpu_power not "
+                                               "set\n");
                         }
  
                         if (!cpus_weight(group->cpumask)) {
@@ -5486,15 +5512,17 @@ static void sched_domain_debug(struct sched_domain *sd, int cpu)
                 printk("\n");
  
                 if (!cpus_equal(sd->span, groupmask))
-                       printk(KERN_ERR "ERROR: groups don't span domain->span\n");
+                       printk(KERN_ERR "ERROR: groups don't span "
+                                       "domain->span\n");
  
                 level++;
                 sd = sd->parent;
+               if (!sd)
+                       continue;
  
-               if (sd) {
-                       if (!cpus_subset(groupmask, sd->span))
-                               printk(KERN_ERR "ERROR: parent span is not a superset of domain->span\n");
-               }
+               if (!cpus_subset(groupmask, sd->span))
+                       printk(KERN_ERR "ERROR: parent span is not a superset "
+                               "of domain->span\n");
  
         } while (sd);
  }
@@ -5590,7 +5618,7 @@ static void cpu_attach_domain(struct sched_domain *sd, int cpu)
  }
  
  /* cpus with isolated domains */
-static cpumask_t __cpuinitdata cpu_isolated_map = CPU_MASK_NONE;
+static cpumask_t cpu_isolated_map = CPU_MASK_NONE;
  
  /* Setup the mask of cpus configured for isolated domains */
  static int __init isolated_cpu_setup(char *str)
@@ -5812,8 +5840,9 @@ __setup("max_cache_size=", setup_max_cache_size);
   */
  static void touch_cache(void *__cache, unsigned long __size)
  {
-       unsigned long size = __size/sizeof(long), chunk1 = size/3,
-                       chunk2 = 2*size/3;
+       unsigned long size = __size / sizeof(long);
+       unsigned long chunk1 = size / 3;
+       unsigned long chunk2 = 2 * size / 3;
         unsigned long *cache = __cache;
         int i;
  
@@ -5922,11 +5951,11 @@ measure_cost(int cpu1, int cpu2, void *cache, unsigned int size)
          */
         measure_one(cache, size, cpu1, cpu2);
         for (i = 0; i < ITERATIONS; i++)
-               cost1 += measure_one(cache, size - i*1024, cpu1, cpu2);
+               cost1 += measure_one(cache, size - i * 1024, cpu1, cpu2);
  
         measure_one(cache, size, cpu2, cpu1);
         for (i = 0; i < ITERATIONS; i++)
-               cost1 += measure_one(cache, size - i*1024, cpu2, cpu1);
+               cost1 += measure_one(cache, size - i * 1024, cpu2, cpu1);
  
         /*
          * (We measure the non-migrating [cached] cost on both
@@ -5936,17 +5965,17 @@ measure_cost(int cpu1, int cpu2, void *cache, unsigned int size)
  
         measure_one(cache, size, cpu1, cpu1);
         for (i = 0; i < ITERATIONS; i++)
-               cost2 += measure_one(cache, size - i*1024, cpu1, cpu1);
+               cost2 += measure_one(cache, size - i * 1024, cpu1, cpu1);
  
         measure_one(cache, size, cpu2, cpu2);
         for (i = 0; i < ITERATIONS; i++)
-               cost2 += measure_one(cache, size - i*1024, cpu2, cpu2);
+               cost2 += measure_one(cache, size - i * 1024, cpu2, cpu2);
  
         /*
          * Get the per-iteration migration cost:
          */
-       do_div(cost1, 2*ITERATIONS);
-       do_div(cost2, 2*ITERATIONS);
+       do_div(cost1, 2 * ITERATIONS);
+       do_div(cost2, 2 * ITERATIONS);
  
         return cost1 - cost2;
  }
@@ -5984,7 +6013,7 @@ static unsigned long long measure_migration_cost(int cpu1, int cpu2)
          */
         cache = vmalloc(max_size);
         if (!cache) {
-               printk("could not vmalloc %d bytes for cache!\n", 2*max_size);
+               printk("could not vmalloc %d bytes for cache!\n", 2 * max_size);
                 return 1000000; /* return 1 msec on very small boxen */
         }
  
@@ -6009,7 +6038,8 @@ static unsigned long long measure_migration_cost(int cpu1, int cpu2)
                 avg_fluct = (avg_fluct + fluct)/2;
  
                 if (migration_debug)
-                       printk("-> [%d][%d][%7d] %3ld.%ld [%3ld.%ld] (%ld): (%8Ld %8Ld)\n",
+                       printk("-> [%d][%d][%7d] %3ld.%ld [%3ld.%ld] (%ld): "
+                               "(%8Ld %8Ld)\n",
                                 cpu1, cpu2, size,
                                 (long)cost / 1000000,
                                 ((long)cost / 100000) % 10,
@@ -6104,20 +6134,18 @@ static void calibrate_migration_costs(const cpumask_t *cpu_map)
                         -1
  #endif
                 );
-       if (system_state == SYSTEM_BOOTING) {
-               if (num_online_cpus() > 1) {
-                       printk("migration_cost=");
-                       for (distance = 0; distance <= max_distance; distance++) {
-                               if (distance)
-                                       printk(",");
-                               printk("%ld", (long)migration_cost[distance] / 1000);
-                       }
-                       printk("\n");
+       if (system_state == SYSTEM_BOOTING && num_online_cpus() > 1) {
+               printk("migration_cost=");
+               for (distance = 0; distance <= max_distance; distance++) {
+                       if (distance)
+                               printk(",");
+                       printk("%ld", (long)migration_cost[distance] / 1000);
                 }
+               printk("\n");
         }
         j1 = jiffies;
         if (migration_debug)
-               printk("migration: %ld seconds\n", (j1-j0)/HZ);
+               printk("migration: %ld seconds\n", (j1-j0) / HZ);
  
         /*
          * Move back to the original CPU. NUMA-Q gets confused
@@ -6855,7 +6883,7 @@ void __init sched_init_smp(void)
  
         lock_cpu_hotplug();
         arch_init_sched_domains(&cpu_online_map);
-       cpus_andnot(non_isolated_cpus, cpu_online_map, cpu_isolated_map);
+       cpus_andnot(non_isolated_cpus, cpu_possible_map, cpu_isolated_map);
         if (cpus_empty(non_isolated_cpus))
                 cpu_set(smp_processor_id(), non_isolated_cpus);
         unlock_cpu_hotplug();
@@ -6962,6 +6990,8 @@ void __might_sleep(char *file, int line)
                 printk("in_atomic():%d, irqs_disabled():%d\n",
                         in_atomic(), irqs_disabled());
                 debug_show_held_locks(current);
+               if (irqs_disabled())
+                       print_irqtrace_events(current);
                 dump_stack();
         }
  #endif