init_completion(&mm->core_done);
        init_completion(&core_state->startup);
+       core_state->dumper.task = tsk;
+       core_state->dumper.next = NULL;
        core_waiters = zap_threads(tsk, mm, core_state, exit_code);
        up_write(&mm->mmap_sem);
 
 
 #endif
 };
 
+struct core_thread {
+       struct task_struct *task;
+       struct core_thread *next;
+};
+
 struct core_state {
        atomic_t nr_threads;
+       struct core_thread dumper;
        struct completion startup;
 };
 
 
 static void exit_mm(struct task_struct * tsk)
 {
        struct mm_struct *mm = tsk->mm;
+       struct core_state *core_state;
 
        mm_release(tsk, mm);
        if (!mm)
         * group with ->mm != NULL.
         */
        down_read(&mm->mmap_sem);
-       if (mm->core_state) {
+       core_state = mm->core_state;
+       if (core_state) {
+               struct core_thread self;
                up_read(&mm->mmap_sem);
 
-               if (atomic_dec_and_test(&mm->core_state->nr_threads))
-                       complete(&mm->core_state->startup);
+               self.task = tsk;
+               self.next = xchg(&core_state->dumper.next, &self);
+               /*
+                * Implies mb(), the result of xchg() must be visible
+                * to core_state->dumper.
+                */
+               if (atomic_dec_and_test(&core_state->nr_threads))
+                       complete(&core_state->startup);
 
                wait_for_completion(&mm->core_done);
                down_read(&mm->mmap_sem);