]> pilppa.org Git - linux-2.6-omap-h63xx.git/commitdiff
Merge commit 'v2.6.27-rc6' into core/rcu
authorIngo Molnar <mingo@elte.hu>
Wed, 10 Sep 2008 06:35:40 +0000 (08:35 +0200)
committerIngo Molnar <mingo@elte.hu>
Wed, 10 Sep 2008 06:35:40 +0000 (08:35 +0200)
Documentation/RCU/checklist.txt
Documentation/RCU/whatisRCU.txt
include/linux/compiler.h
include/linux/rcuclassic.h
include/linux/rculist.h
include/linux/rcupreempt.h
kernel/rcuclassic.c
kernel/rcupreempt.c
kernel/rcupreempt_trace.c
lib/Kconfig.debug

index cf5562cbe35642834f28d6fca3409dffebb5c0c4..6e253407b3dc1f83d85076dae39d405a29ec6716 100644 (file)
@@ -210,7 +210,7 @@ over a rather long period of time, but improvements are always welcome!
                number of updates per grace period.
 
 9.     All RCU list-traversal primitives, which include
-       rcu_dereference(), list_for_each_rcu(), list_for_each_entry_rcu(),
+       rcu_dereference(), list_for_each_entry_rcu(),
        list_for_each_continue_rcu(), and list_for_each_safe_rcu(),
        must be either within an RCU read-side critical section or
        must be protected by appropriate update-side locks.  RCU
index e04d643a9f57a802e59057165f719f36b5ebf5ab..96170824a717059962b85c2400491f58883f3bde 100644 (file)
@@ -786,8 +786,6 @@ RCU pointer/list traversal:
        list_for_each_entry_rcu
        hlist_for_each_entry_rcu
 
-       list_for_each_rcu               (to be deprecated in favor of
-                                        list_for_each_entry_rcu)
        list_for_each_continue_rcu      (to be deprecated in favor of new
                                         list_for_each_entry_continue_rcu)
 
index c8bd2daf95ec51e0f42ad05e6b8c13be1300ce16..8322141ee480c802ee6919d0f13a86218e45bfc4 100644 (file)
@@ -190,7 +190,9 @@ extern void __chk_io_ptr(const volatile void __iomem *);
  * ACCESS_ONCE() in different C statements.
  *
  * This macro does absolutely -nothing- to prevent the CPU from reordering,
- * merging, or refetching absolutely anything at any time.
+ * merging, or refetching absolutely anything at any time.  Its main intended
+ * use is to mediate communication between process-level code and irq/NMI
+ * handlers, all running on the same CPU.
  */
 #define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x))
 
index 4ab8436227276322042c5b7374e6d0c561e80486..29bf528c7dccbd595167399b7d25f5641a1485b8 100644 (file)
 struct rcu_ctrlblk {
        long    cur;            /* Current batch number.                      */
        long    completed;      /* Number of the last completed batch         */
-       int     next_pending;   /* Is the next batch already waiting?         */
+       long    pending;        /* Number of the last pending batch           */
+#ifdef CONFIG_DEBUG_RCU_STALL
+       unsigned long gp_check; /* Time grace period should end, in seconds.  */
+#endif /* #ifdef CONFIG_DEBUG_RCU_STALL */
 
        int     signaled;
 
@@ -66,11 +69,7 @@ static inline int rcu_batch_after(long a, long b)
        return (a - b) > 0;
 }
 
-/*
- * Per-CPU data for Read-Copy UPdate.
- * nxtlist - new callbacks are added here
- * curlist - current batch for which quiescent cycle started if any
- */
+/* Per-CPU data for Read-Copy UPdate. */
 struct rcu_data {
        /* 1) quiescent state handling : */
        long            quiescbatch;     /* Batch # for grace period */
@@ -78,12 +77,24 @@ struct rcu_data {
        int             qs_pending;      /* core waits for quiesc state */
 
        /* 2) batch handling */
-       long            batch;           /* Batch # for current RCU batch */
+       /*
+        * if nxtlist is not NULL, then:
+        * batch:
+        *      The batch # for the last entry of nxtlist
+        * [*nxttail[1], NULL = *nxttail[2]):
+        *      Entries that batch # <= batch
+        * [*nxttail[0], *nxttail[1]):
+        *      Entries that batch # <= batch - 1
+        * [nxtlist, *nxttail[0]):
+        *      Entries that batch # <= batch - 2
+        *      The grace period for these entries has completed, and
+        *      the other grace-period-completed entries may be moved
+        *      here temporarily in rcu_process_callbacks().
+        */
+       long            batch;
        struct rcu_head *nxtlist;
-       struct rcu_head **nxttail;
+       struct rcu_head **nxttail[3];
        long            qlen;            /* # of queued callbacks */
-       struct rcu_head *curlist;
-       struct rcu_head **curtail;
        struct rcu_head *donelist;
        struct rcu_head **donetail;
        long            blimit;          /* Upper limit on a processed batch */
index eb4443c7e05be213f49596b81b50f209923ab4da..e649bd3f2c976c3f5bed58c067c351a336403e75 100644 (file)
@@ -198,20 +198,6 @@ static inline void list_splice_init_rcu(struct list_head *list,
        at->prev = last;
 }
 
-/**
- * list_for_each_rcu   -       iterate over an rcu-protected list
- * @pos:       the &struct list_head to use as a loop cursor.
- * @head:      the head for your list.
- *
- * This list-traversal primitive may safely run concurrently with
- * the _rcu list-mutation primitives such as list_add_rcu()
- * as long as the traversal is guarded by rcu_read_lock().
- */
-#define list_for_each_rcu(pos, head) \
-       for (pos = rcu_dereference((head)->next); \
-               prefetch(pos->next), pos != (head); \
-               pos = rcu_dereference(pos->next))
-
 #define __list_for_each_rcu(pos, head) \
        for (pos = rcu_dereference((head)->next); \
                pos != (head); \
index 0967f03b07056928c4176826d4bf216f329cbde9..3e05c09b54a22408db83e0f0a87a5a8bf9a40e8f 100644 (file)
@@ -57,7 +57,13 @@ static inline void rcu_qsctr_inc(int cpu)
        rdssp->sched_qs++;
 }
 #define rcu_bh_qsctr_inc(cpu)
-#define call_rcu_bh(head, rcu) call_rcu(head, rcu)
+
+/*
+ * Someone might want to pass call_rcu_bh as a function pointer.
+ * So this needs to just be a rename and not a macro function.
+ *  (no parentheses)
+ */
+#define call_rcu_bh            call_rcu
 
 /**
  * call_rcu_sched - Queue RCU callback for invocation after sched grace period.
@@ -111,7 +117,6 @@ extern struct rcupreempt_trace *rcupreempt_trace_cpu(int cpu);
 struct softirq_action;
 
 #ifdef CONFIG_NO_HZ
-DECLARE_PER_CPU(struct rcu_dyntick_sched, rcu_dyntick_sched);
 
 static inline void rcu_enter_nohz(void)
 {
@@ -126,8 +131,8 @@ static inline void rcu_exit_nohz(void)
 {
        static DEFINE_RATELIMIT_STATE(rs, 10 * HZ, 1);
 
-       smp_mb(); /* CPUs seeing ++ must see later RCU read-side crit sects */
        __get_cpu_var(rcu_dyntick_sched).dynticks++;
+       smp_mb(); /* CPUs seeing ++ must see later RCU read-side crit sects */
        WARN_ON_RATELIMIT(!(__get_cpu_var(rcu_dyntick_sched).dynticks & 0x1),
                                &rs);
 }
index aad93cdc9f68656b95eb496003c593d5d03beed4..743cf0550ff49d75c16e1d3ba331bcc8fe78c2eb 100644 (file)
@@ -47,6 +47,7 @@
 #include <linux/notifier.h>
 #include <linux/cpu.h>
 #include <linux/mutex.h>
+#include <linux/time.h>
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 static struct lock_class_key rcu_lock_key;
@@ -60,12 +61,14 @@ EXPORT_SYMBOL_GPL(rcu_lock_map);
 static struct rcu_ctrlblk rcu_ctrlblk = {
        .cur = -300,
        .completed = -300,
+       .pending = -300,
        .lock = __SPIN_LOCK_UNLOCKED(&rcu_ctrlblk.lock),
        .cpumask = CPU_MASK_NONE,
 };
 static struct rcu_ctrlblk rcu_bh_ctrlblk = {
        .cur = -300,
        .completed = -300,
+       .pending = -300,
        .lock = __SPIN_LOCK_UNLOCKED(&rcu_bh_ctrlblk.lock),
        .cpumask = CPU_MASK_NONE,
 };
@@ -83,7 +86,10 @@ static void force_quiescent_state(struct rcu_data *rdp,
 {
        int cpu;
        cpumask_t cpumask;
+       unsigned long flags;
+
        set_need_resched();
+       spin_lock_irqsave(&rcp->lock, flags);
        if (unlikely(!rcp->signaled)) {
                rcp->signaled = 1;
                /*
@@ -109,6 +115,7 @@ static void force_quiescent_state(struct rcu_data *rdp,
                for_each_cpu_mask_nr(cpu, cpumask)
                        smp_send_reschedule(cpu);
        }
+       spin_unlock_irqrestore(&rcp->lock, flags);
 }
 #else
 static inline void force_quiescent_state(struct rcu_data *rdp,
@@ -118,6 +125,45 @@ static inline void force_quiescent_state(struct rcu_data *rdp,
 }
 #endif
 
+static void __call_rcu(struct rcu_head *head, struct rcu_ctrlblk *rcp,
+               struct rcu_data *rdp)
+{
+       long batch;
+
+       head->next = NULL;
+       smp_mb(); /* Read of rcu->cur must happen after any change by caller. */
+
+       /*
+        * Determine the batch number of this callback.
+        *
+        * Using ACCESS_ONCE to avoid the following error when gcc eliminates
+        * local variable "batch" and emits codes like this:
+        *      1) rdp->batch = rcp->cur + 1 # gets old value
+        *      ......
+        *      2)rcu_batch_after(rcp->cur + 1, rdp->batch) # gets new value
+        * then [*nxttail[0], *nxttail[1]) may contain callbacks
+        * that batch# = rdp->batch, see the comment of struct rcu_data.
+        */
+       batch = ACCESS_ONCE(rcp->cur) + 1;
+
+       if (rdp->nxtlist && rcu_batch_after(batch, rdp->batch)) {
+               /* process callbacks */
+               rdp->nxttail[0] = rdp->nxttail[1];
+               rdp->nxttail[1] = rdp->nxttail[2];
+               if (rcu_batch_after(batch - 1, rdp->batch))
+                       rdp->nxttail[0] = rdp->nxttail[2];
+       }
+
+       rdp->batch = batch;
+       *rdp->nxttail[2] = head;
+       rdp->nxttail[2] = &head->next;
+
+       if (unlikely(++rdp->qlen > qhimark)) {
+               rdp->blimit = INT_MAX;
+               force_quiescent_state(rdp, &rcu_ctrlblk);
+       }
+}
+
 /**
  * call_rcu - Queue an RCU callback for invocation after a grace period.
  * @head: structure to be used for queueing the RCU updates.
@@ -133,18 +179,10 @@ void call_rcu(struct rcu_head *head,
                                void (*func)(struct rcu_head *rcu))
 {
        unsigned long flags;
-       struct rcu_data *rdp;
 
        head->func = func;
-       head->next = NULL;
        local_irq_save(flags);
-       rdp = &__get_cpu_var(rcu_data);
-       *rdp->nxttail = head;
-       rdp->nxttail = &head->next;
-       if (unlikely(++rdp->qlen > qhimark)) {
-               rdp->blimit = INT_MAX;
-               force_quiescent_state(rdp, &rcu_ctrlblk);
-       }
+       __call_rcu(head, &rcu_ctrlblk, &__get_cpu_var(rcu_data));
        local_irq_restore(flags);
 }
 EXPORT_SYMBOL_GPL(call_rcu);
@@ -169,20 +207,10 @@ void call_rcu_bh(struct rcu_head *head,
                                void (*func)(struct rcu_head *rcu))
 {
        unsigned long flags;
-       struct rcu_data *rdp;
 
        head->func = func;
-       head->next = NULL;
        local_irq_save(flags);
-       rdp = &__get_cpu_var(rcu_bh_data);
-       *rdp->nxttail = head;
-       rdp->nxttail = &head->next;
-
-       if (unlikely(++rdp->qlen > qhimark)) {
-               rdp->blimit = INT_MAX;
-               force_quiescent_state(rdp, &rcu_bh_ctrlblk);
-       }
-
+       __call_rcu(head, &rcu_bh_ctrlblk, &__get_cpu_var(rcu_bh_data));
        local_irq_restore(flags);
 }
 EXPORT_SYMBOL_GPL(call_rcu_bh);
@@ -211,12 +239,6 @@ EXPORT_SYMBOL_GPL(rcu_batches_completed_bh);
 static inline void raise_rcu_softirq(void)
 {
        raise_softirq(RCU_SOFTIRQ);
-       /*
-        * The smp_mb() here is required to ensure that this cpu's
-        * __rcu_process_callbacks() reads the most recently updated
-        * value of rcu->cur.
-        */
-       smp_mb();
 }
 
 /*
@@ -225,6 +247,7 @@ static inline void raise_rcu_softirq(void)
  */
 static void rcu_do_batch(struct rcu_data *rdp)
 {
+       unsigned long flags;
        struct rcu_head *next, *list;
        int count = 0;
 
@@ -239,9 +262,9 @@ static void rcu_do_batch(struct rcu_data *rdp)
        }
        rdp->donelist = list;
 
-       local_irq_disable();
+       local_irq_save(flags);
        rdp->qlen -= count;
-       local_irq_enable();
+       local_irq_restore(flags);
        if (rdp->blimit == INT_MAX && rdp->qlen <= qlowmark)
                rdp->blimit = blimit;
 
@@ -269,6 +292,85 @@ static void rcu_do_batch(struct rcu_data *rdp)
  *   rcu_check_quiescent_state calls rcu_start_batch(0) to start the next grace
  *   period (if necessary).
  */
+
+#ifdef CONFIG_DEBUG_RCU_STALL
+
+static inline void record_gp_check_time(struct rcu_ctrlblk *rcp)
+{
+       rcp->gp_check = get_seconds() + 3;
+}
+
+static void print_other_cpu_stall(struct rcu_ctrlblk *rcp)
+{
+       int cpu;
+       long delta;
+       unsigned long flags;
+
+       /* Only let one CPU complain about others per time interval. */
+
+       spin_lock_irqsave(&rcp->lock, flags);
+       delta = get_seconds() - rcp->gp_check;
+       if (delta < 2L || cpus_empty(rcp->cpumask)) {
+               spin_unlock(&rcp->lock);
+               return;
+       }
+       rcp->gp_check = get_seconds() + 30;
+       spin_unlock_irqrestore(&rcp->lock, flags);
+
+       /* OK, time to rat on our buddy... */
+
+       printk(KERN_ERR "RCU detected CPU stalls:");
+       for_each_cpu_mask(cpu, rcp->cpumask)
+               printk(" %d", cpu);
+       printk(" (detected by %d, t=%lu/%lu)\n",
+              smp_processor_id(), get_seconds(), rcp->gp_check);
+}
+
+static void print_cpu_stall(struct rcu_ctrlblk *rcp)
+{
+       unsigned long flags;
+
+       printk(KERN_ERR "RCU detected CPU %d stall (t=%lu/%lu)\n",
+                       smp_processor_id(), get_seconds(), rcp->gp_check);
+       dump_stack();
+       spin_lock_irqsave(&rcp->lock, flags);
+       if ((long)(get_seconds() - rcp->gp_check) >= 0L)
+               rcp->gp_check = get_seconds() + 30;
+       spin_unlock_irqrestore(&rcp->lock, flags);
+}
+
+static void check_cpu_stall(struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
+{
+       long delta;
+
+       delta = get_seconds() - rcp->gp_check;
+       if (cpu_isset(smp_processor_id(), rcp->cpumask) && delta >= 0L) {
+
+               /* We haven't checked in, so go dump stack. */
+
+               print_cpu_stall(rcp);
+
+       } else {
+               if (!cpus_empty(rcp->cpumask) && delta >= 2L) {
+                       /* They had two seconds to dump stack, so complain. */
+                       print_other_cpu_stall(rcp);
+               }
+       }
+}
+
+#else /* #ifdef CONFIG_DEBUG_RCU_STALL */
+
+static inline void record_gp_check_time(struct rcu_ctrlblk *rcp)
+{
+}
+
+static inline void
+check_cpu_stall(struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
+{
+}
+
+#endif /* #else #ifdef CONFIG_DEBUG_RCU_STALL */
+
 /*
  * Register a new batch of callbacks, and start it up if there is currently no
  * active batch and the batch to be registered has not already occurred.
@@ -276,15 +378,10 @@ static void rcu_do_batch(struct rcu_data *rdp)
  */
 static void rcu_start_batch(struct rcu_ctrlblk *rcp)
 {
-       if (rcp->next_pending &&
+       if (rcp->cur != rcp->pending &&
                        rcp->completed == rcp->cur) {
-               rcp->next_pending = 0;
-               /*
-                * next_pending == 0 must be visible in
-                * __rcu_process_callbacks() before it can see new value of cur.
-                */
-               smp_wmb();
                rcp->cur++;
+               record_gp_check_time(rcp);
 
                /*
                 * Accessing nohz_cpu_mask before incrementing rcp->cur needs a
@@ -322,6 +419,8 @@ static void cpu_quiet(int cpu, struct rcu_ctrlblk *rcp)
 static void rcu_check_quiescent_state(struct rcu_ctrlblk *rcp,
                                        struct rcu_data *rdp)
 {
+       unsigned long flags;
+
        if (rdp->quiescbatch != rcp->cur) {
                /* start new grace period: */
                rdp->qs_pending = 1;
@@ -345,7 +444,7 @@ static void rcu_check_quiescent_state(struct rcu_ctrlblk *rcp,
                return;
        rdp->qs_pending = 0;
 
-       spin_lock(&rcp->lock);
+       spin_lock_irqsave(&rcp->lock, flags);
        /*
         * rdp->quiescbatch/rcp->cur and the cpu bitmap can come out of sync
         * during cpu startup. Ignore the quiescent state.
@@ -353,7 +452,7 @@ static void rcu_check_quiescent_state(struct rcu_ctrlblk *rcp,
        if (likely(rdp->quiescbatch == rcp->cur))
                cpu_quiet(rdp->cpu, rcp);
 
-       spin_unlock(&rcp->lock);
+       spin_unlock_irqrestore(&rcp->lock, flags);
 }
 
 
@@ -364,33 +463,38 @@ static void rcu_check_quiescent_state(struct rcu_ctrlblk *rcp,
  * which is dead and hence not processing interrupts.
  */
 static void rcu_move_batch(struct rcu_data *this_rdp, struct rcu_head *list,
-                               struct rcu_head **tail)
+                               struct rcu_head **tail, long batch)
 {
-       local_irq_disable();
-       *this_rdp->nxttail = list;
-       if (list)
-               this_rdp->nxttail = tail;
-       local_irq_enable();
+       unsigned long flags;
+
+       if (list) {
+               local_irq_save(flags);
+               this_rdp->batch = batch;
+               *this_rdp->nxttail[2] = list;
+               this_rdp->nxttail[2] = tail;
+               local_irq_restore(flags);
+       }
 }
 
 static void __rcu_offline_cpu(struct rcu_data *this_rdp,
                                struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
 {
-       /* if the cpu going offline owns the grace period
+       unsigned long flags;
+
+       /*
+        * if the cpu going offline owns the grace period
         * we can block indefinitely waiting for it, so flush
         * it here
         */
-       spin_lock_bh(&rcp->lock);
+       spin_lock_irqsave(&rcp->lock, flags);
        if (rcp->cur != rcp->completed)
                cpu_quiet(rdp->cpu, rcp);
-       spin_unlock_bh(&rcp->lock);
-       rcu_move_batch(this_rdp, rdp->donelist, rdp->donetail);
-       rcu_move_batch(this_rdp, rdp->curlist, rdp->curtail);
-       rcu_move_batch(this_rdp, rdp->nxtlist, rdp->nxttail);
+       rcu_move_batch(this_rdp, rdp->donelist, rdp->donetail, rcp->cur + 1);
+       rcu_move_batch(this_rdp, rdp->nxtlist, rdp->nxttail[2], rcp->cur + 1);
+       spin_unlock(&rcp->lock);
 
-       local_irq_disable();
        this_rdp->qlen += rdp->qlen;
-       local_irq_enable();
+       local_irq_restore(flags);
 }
 
 static void rcu_offline_cpu(int cpu)
@@ -420,38 +524,52 @@ static void rcu_offline_cpu(int cpu)
 static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp,
                                        struct rcu_data *rdp)
 {
-       if (rdp->curlist && !rcu_batch_before(rcp->completed, rdp->batch)) {
-               *rdp->donetail = rdp->curlist;
-               rdp->donetail = rdp->curtail;
-               rdp->curlist = NULL;
-               rdp->curtail = &rdp->curlist;
-       }
+       unsigned long flags;
+       long completed_snap;
 
-       if (rdp->nxtlist && !rdp->curlist) {
-               local_irq_disable();
-               rdp->curlist = rdp->nxtlist;
-               rdp->curtail = rdp->nxttail;
-               rdp->nxtlist = NULL;
-               rdp->nxttail = &rdp->nxtlist;
-               local_irq_enable();
+       if (rdp->nxtlist) {
+               local_irq_save(flags);
+               completed_snap = ACCESS_ONCE(rcp->completed);
 
                /*
-                * start the next batch of callbacks
+                * move the other grace-period-completed entries to
+                * [rdp->nxtlist, *rdp->nxttail[0]) temporarily
                 */
+               if (!rcu_batch_before(completed_snap, rdp->batch))
+                       rdp->nxttail[0] = rdp->nxttail[1] = rdp->nxttail[2];
+               else if (!rcu_batch_before(completed_snap, rdp->batch - 1))
+                       rdp->nxttail[0] = rdp->nxttail[1];
 
-               /* determine batch number */
-               rdp->batch = rcp->cur + 1;
-               /* see the comment and corresponding wmb() in
-                * the rcu_start_batch()
+               /*
+                * the grace period for entries in
+                * [rdp->nxtlist, *rdp->nxttail[0]) has completed and
+                * move these entries to donelist
                 */
-               smp_rmb();
+               if (rdp->nxttail[0] != &rdp->nxtlist) {
+                       *rdp->donetail = rdp->nxtlist;
+                       rdp->donetail = rdp->nxttail[0];
+                       rdp->nxtlist = *rdp->nxttail[0];
+                       *rdp->donetail = NULL;
+
+                       if (rdp->nxttail[1] == rdp->nxttail[0])
+                               rdp->nxttail[1] = &rdp->nxtlist;
+                       if (rdp->nxttail[2] == rdp->nxttail[0])
+                               rdp->nxttail[2] = &rdp->nxtlist;
+                       rdp->nxttail[0] = &rdp->nxtlist;
+               }
+
+               local_irq_restore(flags);
+
+               if (rcu_batch_after(rdp->batch, rcp->pending)) {
+                       unsigned long flags;
 
-               if (!rcp->next_pending) {
                        /* and start it/schedule start if it's a new batch */
-                       spin_lock(&rcp->lock);
-                       rcp->next_pending = 1;
-                       rcu_start_batch(rcp);
-                       spin_unlock(&rcp->lock);
+                       spin_lock_irqsave(&rcp->lock, flags);
+                       if (rcu_batch_after(rdp->batch, rcp->pending)) {
+                               rcp->pending = rdp->batch;
+                               rcu_start_batch(rcp);
+                       }
+                       spin_unlock_irqrestore(&rcp->lock, flags);
                }
        }
 
@@ -462,21 +580,53 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp,
 
 static void rcu_process_callbacks(struct softirq_action *unused)
 {
+       /*
+        * Memory references from any prior RCU read-side critical sections
+        * executed by the interrupted code must be see before any RCU
+        * grace-period manupulations below.
+        */
+
+       smp_mb(); /* See above block comment. */
+
        __rcu_process_callbacks(&rcu_ctrlblk, &__get_cpu_var(rcu_data));
        __rcu_process_callbacks(&rcu_bh_ctrlblk, &__get_cpu_var(rcu_bh_data));
+
+       /*
+        * Memory references from any later RCU read-side critical sections
+        * executed by the interrupted code must be see after any RCU
+        * grace-period manupulations above.
+        */
+
+       smp_mb(); /* See above block comment. */
 }
 
 static int __rcu_pending(struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
 {
-       /* This cpu has pending rcu entries and the grace period
-        * for them has completed.
-        */
-       if (rdp->curlist && !rcu_batch_before(rcp->completed, rdp->batch))
-               return 1;
+       /* Check for CPU stalls, if enabled. */
+       check_cpu_stall(rcp, rdp);
 
-       /* This cpu has no pending entries, but there are new entries */
-       if (!rdp->curlist && rdp->nxtlist)
-               return 1;
+       if (rdp->nxtlist) {
+               long completed_snap = ACCESS_ONCE(rcp->completed);
+
+               /*
+                * This cpu has pending rcu entries and the grace period
+                * for them has completed.
+                */
+               if (!rcu_batch_before(completed_snap, rdp->batch))
+                       return 1;
+               if (!rcu_batch_before(completed_snap, rdp->batch - 1) &&
+                               rdp->nxttail[0] != rdp->nxttail[1])
+                       return 1;
+               if (rdp->nxttail[0] != &rdp->nxtlist)
+                       return 1;
+
+               /*
+                * This cpu has pending rcu entries and the new batch
+                * for then hasn't been started nor scheduled start
+                */
+               if (rcu_batch_after(rdp->batch, rcp->pending))
+                       return 1;
+       }
 
        /* This cpu has finished callbacks to invoke */
        if (rdp->donelist)
@@ -512,9 +662,15 @@ int rcu_needs_cpu(int cpu)
        struct rcu_data *rdp = &per_cpu(rcu_data, cpu);
        struct rcu_data *rdp_bh = &per_cpu(rcu_bh_data, cpu);
 
-       return (!!rdp->curlist || !!rdp_bh->curlist || rcu_pending(cpu));
+       return !!rdp->nxtlist || !!rdp_bh->nxtlist || rcu_pending(cpu);
 }
 
+/*
+ * Top-level function driving RCU grace-period detection, normally
+ * invoked from the scheduler-clock interrupt.  This function simply
+ * increments counters that are read only from softirq by this same
+ * CPU, so there are no memory barriers required.
+ */
 void rcu_check_callbacks(int cpu, int user)
 {
        if (user ||
@@ -558,14 +714,17 @@ void rcu_check_callbacks(int cpu, int user)
 static void rcu_init_percpu_data(int cpu, struct rcu_ctrlblk *rcp,
                                                struct rcu_data *rdp)
 {
+       unsigned long flags;
+
+       spin_lock_irqsave(&rcp->lock, flags);
        memset(rdp, 0, sizeof(*rdp));
-       rdp->curtail = &rdp->curlist;
-       rdp->nxttail = &rdp->nxtlist;
+       rdp->nxttail[0] = rdp->nxttail[1] = rdp->nxttail[2] = &rdp->nxtlist;
        rdp->donetail = &rdp->donelist;
        rdp->quiescbatch = rcp->completed;
        rdp->qs_pending = 0;
        rdp->cpu = cpu;
        rdp->blimit = blimit;
+       spin_unlock_irqrestore(&rcp->lock, flags);
 }
 
 static void __cpuinit rcu_online_cpu(int cpu)
index 27827931ca0dd6ca905040955616c970b2e7539d..ca4bbbe04aa4db9d150a503c9025dbe83e0d62a3 100644 (file)
 #include <linux/cpumask.h>
 #include <linux/rcupreempt_trace.h>
 
-/*
- * Macro that prevents the compiler from reordering accesses, but does
- * absolutely -nothing- to prevent CPUs from reordering.  This is used
- * only to mediate communication between mainline code and hardware
- * interrupt and NMI handlers.
- */
-#define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x))
-
 /*
  * PREEMPT_RCU data structures.
  */
index 5edf82c34bbceab891ecbc6e23d576a03c5763e8..35c2d3360ecf750be63922a89101f6cfac249107 100644 (file)
@@ -308,11 +308,16 @@ out:
 
 static int __init rcupreempt_trace_init(void)
 {
+       int ret;
+
        mutex_init(&rcupreempt_trace_mutex);
        rcupreempt_trace_buf = kmalloc(RCUPREEMPT_TRACE_BUF_SIZE, GFP_KERNEL);
        if (!rcupreempt_trace_buf)
                return 1;
-       return rcupreempt_debugfs_init();
+       ret = rcupreempt_debugfs_init();
+       if (ret)
+               kfree(rcupreempt_trace_buf);
+       return ret;
 }
 
 static void __exit rcupreempt_trace_cleanup(void)
index 0b504814e378067ff120b266e5b26c9fdd6fb90e..ccede1aeab3865d468de95e2e97796ed564ce36d 100644 (file)
@@ -597,6 +597,19 @@ config RCU_TORTURE_TEST_RUNNABLE
          Say N here if you want the RCU torture tests to start only
          after being manually enabled via /proc.
 
+config RCU_CPU_STALL
+       bool "Check for stalled CPUs delaying RCU grace periods"
+       depends on CLASSIC_RCU
+       default n
+       help
+         This option causes RCU to printk information on which
+         CPUs are delaying the current grace period, but only when
+         the grace period extends for excessive time periods.
+
+         Say Y if you want RCU to perform such checks.
+
+         Say N if you are unsure.
+
 config KPROBES_SANITY_TEST
        bool "Kprobes sanity tests"
        depends on DEBUG_KERNEL