struct workqueue_struct *wq;
        struct task_struct *thread;
+       struct work_struct *current_work;
 
        int run_depth;          /* Detect run_workqueue() recursion depth */
 
            && work_pending(work)
            && !list_empty(&work->entry)) {
                work_func_t f = work->func;
+               cwq->current_work = work;
                list_del_init(&work->entry);
                spin_unlock_irqrestore(&cwq->lock, flags);
 
                f(work);
 
                spin_lock_irqsave(&cwq->lock, flags);
+               cwq->current_work = NULL;
                ret = 1;
        }
        spin_unlock_irqrestore(&cwq->lock, flags);
 }
 EXPORT_SYMBOL(run_scheduled_work);
 
+static void insert_work(struct cpu_workqueue_struct *cwq,
+                               struct work_struct *work, int tail)
+{
+       set_wq_data(work, cwq);
+       if (tail)
+               list_add_tail(&work->entry, &cwq->worklist);
+       else
+               list_add(&work->entry, &cwq->worklist);
+       wake_up(&cwq->more_work);
+}
+
 /* Preempt must be disabled. */
 static void __queue_work(struct cpu_workqueue_struct *cwq,
                         struct work_struct *work)
        unsigned long flags;
 
        spin_lock_irqsave(&cwq->lock, flags);
-       set_wq_data(work, cwq);
-       list_add_tail(&work->entry, &cwq->worklist);
-       wake_up(&cwq->more_work);
+       insert_work(cwq, work, 1);
        spin_unlock_irqrestore(&cwq->lock, flags);
 }
 
                                                struct work_struct, entry);
                work_func_t f = work->func;
 
+               cwq->current_work = work;
                list_del_init(cwq->worklist.next);
                spin_unlock_irqrestore(&cwq->lock, flags);
 
                }
 
                spin_lock_irqsave(&cwq->lock, flags);
+               cwq->current_work = NULL;
        }
        cwq->run_depth--;
        spin_unlock_irqrestore(&cwq->lock, flags);
 }
 EXPORT_SYMBOL_GPL(flush_workqueue);
 
+static void wait_on_work(struct cpu_workqueue_struct *cwq,
+                               struct work_struct *work)
+{
+       struct wq_barrier barr;
+       int running = 0;
+
+       spin_lock_irq(&cwq->lock);
+       if (unlikely(cwq->current_work == work)) {
+               init_wq_barrier(&barr);
+               insert_work(cwq, &barr.work, 0);
+               running = 1;
+       }
+       spin_unlock_irq(&cwq->lock);
+
+       if (unlikely(running)) {
+               mutex_unlock(&workqueue_mutex);
+               wait_for_completion(&barr.done);
+               mutex_lock(&workqueue_mutex);
+       }
+}
+
+/**
+ * flush_work - block until a work_struct's callback has terminated
+ * @wq: the workqueue on which the work is queued
+ * @work: the work which is to be flushed
+ *
+ * flush_work() will attempt to cancel the work if it is queued.  If the work's
+ * callback appears to be running, flush_work() will block until it has
+ * completed.
+ *
+ * flush_work() is designed to be used when the caller is tearing down data
+ * structures which the callback function operates upon.  It is expected that,
+ * prior to calling flush_work(), the caller has arranged for the work to not
+ * be requeued.
+ */
+void flush_work(struct workqueue_struct *wq, struct work_struct *work)
+{
+       struct cpu_workqueue_struct *cwq;
+
+       mutex_lock(&workqueue_mutex);
+       cwq = get_wq_data(work);
+       /* Was it ever queued ? */
+       if (!cwq)
+               goto out;
+
+       /*
+        * This work can't be re-queued, and the lock above protects us
+        * from take_over_work(), no need to re-check that get_wq_data()
+        * is still the same when we take cwq->lock.
+        */
+       spin_lock_irq(&cwq->lock);
+       list_del_init(&work->entry);
+       work_release(work);
+       spin_unlock_irq(&cwq->lock);
+
+       if (is_single_threaded(wq)) {
+               /* Always use first cpu's area. */
+               wait_on_work(per_cpu_ptr(wq->cpu_wq, singlethread_cpu), work);
+       } else {
+               int cpu;
+
+               for_each_online_cpu(cpu)
+                       wait_on_work(per_cpu_ptr(wq->cpu_wq, cpu), work);
+       }
+out:
+       mutex_unlock(&workqueue_mutex);
+}
+EXPORT_SYMBOL_GPL(flush_work);
+
 static struct task_struct *create_workqueue_thread(struct workqueue_struct *wq,
                                                   int cpu, int freezeable)
 {
 }
 EXPORT_SYMBOL(flush_scheduled_work);
 
+void flush_work_keventd(struct work_struct *work)
+{
+       flush_work(keventd_wq, work);
+}
+EXPORT_SYMBOL(flush_work_keventd);
+
 /**
  * cancel_rearming_delayed_workqueue - reliably kill off a delayed work whose handler rearms the delayed work.
  * @wq:   the controlling workqueue structure