timers: fix itimer/many thread hang, fix

[linux-2.6-omap-h63xx.git] / kernel / sched.c
diff --git a/kernel/sched.c b/kernel/sched.c

index 95e6ad3c231deb8ed43ca27476400b85ecd1eb24..ebb03def564bfe93c17953a7e68fbed55de7abd1 100644 (file)
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -808,9 +808,9 @@ const_debug unsigned int sysctl_sched_nr_migrate = 32;
  
  /*
   * ratelimit for updating the group shares.
- * default: 0.5ms
+ * default: 0.25ms
   */
-const_debug unsigned int sysctl_sched_shares_ratelimit = 500000;
+unsigned int sysctl_sched_shares_ratelimit = 250000;
  
  /*
   * period over which we measure -rt task cpu usage in us.
@@ -4037,23 +4037,26 @@ DEFINE_PER_CPU(struct kernel_stat, kstat);
  EXPORT_PER_CPU_SYMBOL(kstat);
  
  /*
- * Return p->sum_exec_runtime plus any more ns on the sched_clock
- * that have not yet been banked in case the task is currently running.
+ * Return any ns on the sched_clock that have not yet been banked in
+ * @p in case that task is currently running.
   */
-unsigned long long task_sched_runtime(struct task_struct *p)
+unsigned long long task_delta_exec(struct task_struct *p)
  {
         unsigned long flags;
-       u64 ns, delta_exec;
         struct rq *rq;
+       u64 ns = 0;
  
         rq = task_rq_lock(p, &flags);
-       ns = p->se.sum_exec_runtime;
+
         if (task_current(rq, p)) {
+               u64 delta_exec;
+
                 update_rq_clock(rq);
                 delta_exec = rq->clock - p->se.exec_start;
                 if ((s64)delta_exec > 0)
-                       ns += delta_exec;
+                       ns = delta_exec;
         }
+
         task_rq_unlock(rq, &flags);
  
         return ns;
@@ -4070,6 +4073,7 @@ void account_user_time(struct task_struct *p, cputime_t cputime)
         cputime64_t tmp;
  
         p->utime = cputime_add(p->utime, cputime);
+       account_group_user_time(p, cputime);
  
         /* Add user time to cpustat. */
         tmp = cputime_to_cputime64(cputime);
@@ -4094,6 +4098,7 @@ static void account_guest_time(struct task_struct *p, cputime_t cputime)
         tmp = cputime_to_cputime64(cputime);
  
         p->utime = cputime_add(p->utime, cputime);
+       account_group_user_time(p, cputime);
         p->gtime = cputime_add(p->gtime, cputime);
  
         cpustat->user = cputime64_add(cpustat->user, tmp);
@@ -4129,6 +4134,7 @@ void account_system_time(struct task_struct *p, int hardirq_offset,
         }
  
         p->stime = cputime_add(p->stime, cputime);
+       account_group_system_time(p, cputime);
  
         /* Add system time to cpustat. */
         tmp = cputime_to_cputime64(cputime);
@@ -4170,6 +4176,7 @@ void account_steal_time(struct task_struct *p, cputime_t steal)
  
         if (p == rq->idle) {
                 p->stime = cputime_add(p->stime, steal);
+               account_group_system_time(p, steal);
                 if (atomic_read(&rq->nr_iowait) > 0)
                         cpustat->iowait = cputime64_add(cpustat->iowait, tmp);
                 else
@@ -4178,6 +4185,65 @@ void account_steal_time(struct task_struct *p, cputime_t steal)
                 cpustat->steal = cputime64_add(cpustat->steal, tmp);
  }
  
+/*
+ * Use precise platform statistics if available:
+ */
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+cputime_t task_utime(struct task_struct *p)
+{
+       return p->utime;
+}
+
+cputime_t task_stime(struct task_struct *p)
+{
+       return p->stime;
+}
+#else
+cputime_t task_utime(struct task_struct *p)
+{
+       clock_t utime = cputime_to_clock_t(p->utime),
+               total = utime + cputime_to_clock_t(p->stime);
+       u64 temp;
+
+       /*
+        * Use CFS's precise accounting:
+        */
+       temp = (u64)nsec_to_clock_t(p->se.sum_exec_runtime);
+
+       if (total) {
+               temp *= utime;
+               do_div(temp, total);
+       }
+       utime = (clock_t)temp;
+
+       p->prev_utime = max(p->prev_utime, clock_t_to_cputime(utime));
+       return p->prev_utime;
+}
+
+cputime_t task_stime(struct task_struct *p)
+{
+       clock_t stime;
+
+       /*
+        * Use CFS's precise accounting. (we subtract utime from
+        * the total, to make sure the total observed by userspace
+        * grows monotonically - apps rely on that):
+        */
+       stime = nsec_to_clock_t(p->se.sum_exec_runtime) -
+                       cputime_to_clock_t(task_utime(p));
+
+       if (stime >= 0)
+               p->prev_stime = max(p->prev_stime, clock_t_to_cputime(stime));
+
+       return p->prev_stime;
+}
+#endif
+
+inline cputime_t task_gtime(struct task_struct *p)
+{
+       return p->gtime;
+}
+
  /*
   * This function gets called by the timer code, with HZ frequency.
   * We call it with interrupts disabled.
@@ -5786,6 +5852,8 @@ static inline void sched_init_granularity(void)
                 sysctl_sched_latency = limit;
  
         sysctl_sched_wakeup_granularity *= factor;
+
+       sysctl_sched_shares_ratelimit *= factor;
  }
  
  #ifdef CONFIG_SMP
@@ -7635,24 +7703,27 @@ static int dattrs_equal(struct sched_domain_attr *cur, int idx_cur,
   * and partition_sched_domains() will fallback to the single partition
   * 'fallback_doms', it also forces the domains to be rebuilt.
   *
+ * If doms_new==NULL it will be replaced with cpu_online_map.
+ * ndoms_new==0 is a special case for destroying existing domains.
+ * It will not create the default domain.
+ *
   * Call with hotplug lock held
   */
  void partition_sched_domains(int ndoms_new, cpumask_t *doms_new,
                              struct sched_domain_attr *dattr_new)
  {
-       int i, j;
+       int i, j, n;
  
         mutex_lock(&sched_domains_mutex);
  
         /* always unregister in case we don't destroy any domains */
         unregister_sched_domain_sysctl();
  
-       if (doms_new == NULL)
-               ndoms_new = 0;
+       n = doms_new ? ndoms_new : 0;
  
         /* Destroy deleted domains */
         for (i = 0; i < ndoms_cur; i++) {
-               for (j = 0; j < ndoms_new; j++) {
+               for (j = 0; j < n; j++) {
                         if (cpus_equal(doms_cur[i], doms_new[j])
                             && dattrs_equal(dattr_cur, i, dattr_new, j))
                                 goto match1;
@@ -7665,7 +7736,6 @@ match1:
  
         if (doms_new == NULL) {
                 ndoms_cur = 0;
-               ndoms_new = 1;
                 doms_new = &fallback_doms;
                 cpus_andnot(doms_new[0], cpu_online_map, cpu_isolated_map);
                 dattr_new = NULL;
@@ -7702,8 +7772,13 @@ match2:
  int arch_reinit_sched_domains(void)
  {
         get_online_cpus();
+
+       /* Destroy domains first to force the rebuild */
+       partition_sched_domains(0, NULL, NULL);
+
         rebuild_sched_domains();
         put_online_cpus();
+
         return 0;
  }
  
@@ -7787,7 +7862,7 @@ static int update_sched_domains(struct notifier_block *nfb,
         case CPU_ONLINE_FROZEN:
         case CPU_DEAD:
         case CPU_DEAD_FROZEN:
-               partition_sched_domains(0, NULL, NULL);
+               partition_sched_domains(1, NULL, NULL);
                 return NOTIFY_OK;
  
         default:
@@ -8508,8 +8583,8 @@ struct task_group *sched_create_group(struct task_group *parent)
         WARN_ON(!parent); /* root should already exist */
  
         tg->parent = parent;
-       list_add_rcu(&tg->siblings, &parent->children);
         INIT_LIST_HEAD(&tg->children);
+       list_add_rcu(&tg->siblings, &parent->children);
         spin_unlock_irqrestore(&task_group_lock, flags);
  
         return tg;