block: fix partial read() of /proc/{partitions,diskstats}

[linux-2.6-omap-h63xx.git] / kernel / cpuset.c
diff --git a/kernel/cpuset.c b/kernel/cpuset.c

index 531b235e546f2af14dc79231124f22b446906039..d5ab79cf516d7edf77f8e45afe15c04cdae2cda4 100644 (file)
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -54,7 +54,6 @@
  #include <asm/uaccess.h>
  #include <asm/atomic.h>
  #include <linux/mutex.h>
-#include <linux/kfifo.h>
  #include <linux/workqueue.h>
  #include <linux/cgroup.h>
  
@@ -365,7 +364,7 @@ void cpuset_update_task_memory_state(void)
                 my_cpusets_mem_gen = top_cpuset.mems_generation;
         } else {
                 rcu_read_lock();
-               my_cpusets_mem_gen = task_cs(current)->mems_generation;
+               my_cpusets_mem_gen = task_cs(tsk)->mems_generation;
                 rcu_read_unlock();
         }
  
@@ -486,13 +485,38 @@ static int cpusets_overlap(struct cpuset *a, struct cpuset *b)
  static void
  update_domain_attr(struct sched_domain_attr *dattr, struct cpuset *c)
  {
-       if (!dattr)
-               return;
         if (dattr->relax_domain_level < c->relax_domain_level)
                 dattr->relax_domain_level = c->relax_domain_level;
         return;
  }
  
+static void
+update_domain_attr_tree(struct sched_domain_attr *dattr, struct cpuset *c)
+{
+       LIST_HEAD(q);
+
+       list_add(&c->stack_list, &q);
+       while (!list_empty(&q)) {
+               struct cpuset *cp;
+               struct cgroup *cont;
+               struct cpuset *child;
+
+               cp = list_first_entry(&q, struct cpuset, stack_list);
+               list_del(q.next);
+
+               if (cpus_empty(cp->cpus_allowed))
+                       continue;
+
+               if (is_sched_load_balance(cp))
+                       update_domain_attr(dattr, cp);
+
+               list_for_each_entry(cont, &cp->css.cgroup->children, sibling) {
+                       child = cgroup_cs(cont);
+                       list_add_tail(&child->stack_list, &q);
+               }
+       }
+}
+
  /*
   * rebuild_sched_domains()
   *
@@ -532,7 +556,7 @@ update_domain_attr(struct sched_domain_attr *dattr, struct cpuset *c)
   * So the reverse nesting would risk an ABBA deadlock.
   *
   * The three key local variables below are:
- *    q  - a kfifo queue of cpuset pointers, used to implement a
+ *    q  - a linked-list queue of cpuset pointers, used to implement a
   *        top-down scan of all cpusets.  This scan loads a pointer
   *        to each cpuset marked is_sched_load_balance into the
   *        array 'csa'.  For our purposes, rebuilding the schedulers
@@ -567,7 +591,7 @@ update_domain_attr(struct sched_domain_attr *dattr, struct cpuset *c)
  
  void rebuild_sched_domains(void)
  {
-       struct kfifo *q;        /* queue of cpusets to be scanned */
+       LIST_HEAD(q);           /* queue of cpusets to be scanned*/
         struct cpuset *cp;      /* scans q */
         struct cpuset **csa;    /* array of all cpuset ptrs */
         int csn;                /* how many cpuset ptrs in csa so far */
@@ -577,7 +601,6 @@ void rebuild_sched_domains(void)
         int ndoms;              /* number of sched domains in result */
         int nslot;              /* next empty doms[] cpumask_t slot */
  
-       q = NULL;
         csa = NULL;
         doms = NULL;
         dattr = NULL;
@@ -591,35 +614,42 @@ void rebuild_sched_domains(void)
                 dattr = kmalloc(sizeof(struct sched_domain_attr), GFP_KERNEL);
                 if (dattr) {
                         *dattr = SD_ATTR_INIT;
-                       update_domain_attr(dattr, &top_cpuset);
+                       update_domain_attr_tree(dattr, &top_cpuset);
                 }
                 *doms = top_cpuset.cpus_allowed;
                 goto rebuild;
         }
  
-       q = kfifo_alloc(number_of_cpusets * sizeof(cp), GFP_KERNEL, NULL);
-       if (IS_ERR(q))
-               goto done;
         csa = kmalloc(number_of_cpusets * sizeof(cp), GFP_KERNEL);
         if (!csa)
                 goto done;
         csn = 0;
  
-       cp = &top_cpuset;
-       __kfifo_put(q, (void *)&cp, sizeof(cp));
-       while (__kfifo_get(q, (void *)&cp, sizeof(cp))) {
+       list_add(&top_cpuset.stack_list, &q);
+       while (!list_empty(&q)) {
                 struct cgroup *cont;
                 struct cpuset *child;   /* scans child cpusets of cp */
  
+               cp = list_first_entry(&q, struct cpuset, stack_list);
+               list_del(q.next);
+
                 if (cpus_empty(cp->cpus_allowed))
                         continue;
  
-               if (is_sched_load_balance(cp))
+               /*
+                * All child cpusets contain a subset of the parent's cpus, so
+                * just skip them, and then we call update_domain_attr_tree()
+                * to calc relax_domain_level of the corresponding sched
+                * domain.
+                */
+               if (is_sched_load_balance(cp)) {
                         csa[csn++] = cp;
+                       continue;
+               }
  
                 list_for_each_entry(cont, &cp->css.cgroup->children, sibling) {
                         child = cgroup_cs(cont);
-                       __kfifo_put(q, (void *)&child, sizeof(cp));
+                       list_add_tail(&child->stack_list, &q);
                 }
         }
  
@@ -686,7 +716,7 @@ restart:
                                         cpus_or(*dp, *dp, b->cpus_allowed);
                                         b->pn = -1;
                                         if (dattr)
-                                               update_domain_attr(dattr
+                                               update_domain_attr_tree(dattr
                                                                    + nslot, b);
                                 }
                         }
@@ -702,43 +732,11 @@ rebuild:
         put_online_cpus();
  
  done:
-       if (q && !IS_ERR(q))
-               kfifo_free(q);
         kfree(csa);
         /* Don't kfree(doms) -- partition_sched_domains() does that. */
         /* Don't kfree(dattr) -- partition_sched_domains() does that. */
  }
  
-static inline int started_after_time(struct task_struct *t1,
-                                    struct timespec *time,
-                                    struct task_struct *t2)
-{
-       int start_diff = timespec_compare(&t1->start_time, time);
-       if (start_diff > 0) {
-               return 1;
-       } else if (start_diff < 0) {
-               return 0;
-       } else {
-               /*
-                * Arbitrarily, if two processes started at the same
-                * time, we'll say that the lower pointer value
-                * started first. Note that t2 may have exited by now
-                * so this may not be a valid pointer any longer, but
-                * that's fine - it still serves to distinguish
-                * between two tasks started (effectively)
-                * simultaneously.
-                */
-               return t1 > t2;
-       }
-}
-
-static inline int started_after(void *p1, void *p2)
-{
-       struct task_struct *t1 = p1;
-       struct task_struct *t2 = p2;
-       return started_after_time(t1, &t2->start_time, t2);
-}
-
  /**
   * cpuset_test_cpumask - test a task's cpus_allowed versus its cpuset's
   * @tsk: task to test
@@ -790,7 +788,12 @@ static int update_tasks_cpumask(struct cpuset *cs)
         struct ptr_heap heap;
         int retval;
  
-       retval = heap_init(&heap, PAGE_SIZE, GFP_KERNEL, &started_after);
+       /*
+        * cgroup_scan_tasks() will initialize heap->gt for us.
+        * heap_init() is still needed here for we should not change
+        * cs->cpus_allowed when heap_init() fails.
+        */
+       retval = heap_init(&heap, PAGE_SIZE, GFP_KERNEL, NULL);
         if (retval)
                 return retval;
  
@@ -1802,7 +1805,7 @@ static void move_member_tasks_to_cpuset(struct cpuset *from, struct cpuset *to)
         scan.scan.heap = NULL;
         scan.to = to->css.cgroup;
  
-       if (cgroup_scan_tasks((struct cgroup_scanner *)&scan))
+       if (cgroup_scan_tasks(&scan.scan))
                 printk(KERN_ERR "move_member_tasks_to_cpuset: "
                                 "cgroup_scan_tasks failed\n");
  }
@@ -1858,24 +1861,21 @@ static void remove_tasks_in_empty_cpuset(struct cpuset *cs)
   */
  static void scan_for_empty_cpusets(const struct cpuset *root)
  {
+       LIST_HEAD(queue);
         struct cpuset *cp;      /* scans cpusets being updated */
         struct cpuset *child;   /* scans child cpusets of cp */
-       struct list_head queue;
         struct cgroup *cont;
         nodemask_t oldmems;
  
-       INIT_LIST_HEAD(&queue);
-
         list_add_tail((struct list_head *)&root->stack_list, &queue);
  
         while (!list_empty(&queue)) {
-               cp = container_of(queue.next, struct cpuset, stack_list);
+               cp = list_first_entry(&queue, struct cpuset, stack_list);
                 list_del(queue.next);
                 list_for_each_entry(cont, &cp->css.cgroup->children, sibling) {
                         child = cgroup_cs(cont);
                         list_add_tail(&child->stack_list, &queue);
                 }
-               cont = cp->css.cgroup;
  
                 /* Continue past cpusets with all cpus, mems online */
                 if (cpus_subset(cp->cpus_allowed, cpu_online_map) &&