if (blk_queue_stopped(q))
                return;
 
-       if (!test_and_set_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags)) {
+       if (!test_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags)) {
+               __set_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags);
                mod_timer(&q->unplug_timer, jiffies + q->unplug_delay);
                blk_add_trace_generic(q, NULL, 0, BLK_TA_PLUG);
        }
 {
        WARN_ON(!irqs_disabled());
 
-       if (!test_and_clear_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags))
+       if (!test_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags))
                return 0;
 
+       queue_flag_clear(QUEUE_FLAG_PLUGGED, q);
        del_timer(&q->unplug_timer);
        return 1;
 }
 {
        WARN_ON(!irqs_disabled());
 
-       clear_bit(QUEUE_FLAG_STOPPED, &q->queue_flags);
+       queue_flag_clear(QUEUE_FLAG_STOPPED, q);
 
        /*
         * one level of recursion is ok and is much faster than kicking
         * the unplug handling
         */
-       if (!test_and_set_bit(QUEUE_FLAG_REENTER, &q->queue_flags)) {
+       if (!test_bit(QUEUE_FLAG_REENTER, &q->queue_flags)) {
+               queue_flag_set(QUEUE_FLAG_REENTER, q);
                q->request_fn(q);
-               clear_bit(QUEUE_FLAG_REENTER, &q->queue_flags);
+               queue_flag_clear(QUEUE_FLAG_REENTER, q);
        } else {
                blk_plug_device(q);
                kblockd_schedule_work(&q->unplug_work);
 void blk_stop_queue(struct request_queue *q)
 {
        blk_remove_plug(q);
-       set_bit(QUEUE_FLAG_STOPPED, &q->queue_flags);
+       queue_flag_set(QUEUE_FLAG_STOPPED, q);
 }
 EXPORT_SYMBOL(blk_stop_queue);
 
  * blk_run_queue - run a single device queue
  * @q: The queue to run
  */
-void blk_run_queue(struct request_queue *q)
+void __blk_run_queue(struct request_queue *q)
 {
-       unsigned long flags;
-
-       spin_lock_irqsave(q->queue_lock, flags);
        blk_remove_plug(q);
 
        /*
         * handling reinvoke the handler shortly if we already got there.
         */
        if (!elv_queue_empty(q)) {
-               if (!test_and_set_bit(QUEUE_FLAG_REENTER, &q->queue_flags)) {
+               if (!test_bit(QUEUE_FLAG_REENTER, &q->queue_flags)) {
+                       queue_flag_set(QUEUE_FLAG_REENTER, q);
                        q->request_fn(q);
-                       clear_bit(QUEUE_FLAG_REENTER, &q->queue_flags);
+                       queue_flag_clear(QUEUE_FLAG_REENTER, q);
                } else {
                        blk_plug_device(q);
                        kblockd_schedule_work(&q->unplug_work);
                }
        }
+}
+EXPORT_SYMBOL(__blk_run_queue);
 
+/**
+ * blk_run_queue - run a single device queue
+ * @q: The queue to run
+ */
+void blk_run_queue(struct request_queue *q)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(q->queue_lock, flags);
+       __blk_run_queue(q);
        spin_unlock_irqrestore(q->queue_lock, flags);
 }
 EXPORT_SYMBOL(blk_run_queue);
 void blk_cleanup_queue(struct request_queue *q)
 {
        mutex_lock(&q->sysfs_lock);
-       set_bit(QUEUE_FLAG_DEAD, &q->queue_flags);
+       queue_flag_set_unlocked(QUEUE_FLAG_DEAD, q);
        mutex_unlock(&q->sysfs_lock);
 
        if (q->elevator)
 
        if (!rq->bio)
                return;
 
-       cluster = q->queue_flags & (1 << QUEUE_FLAG_CLUSTER);
+       cluster = test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags);
        hw_seg_size = seg_size = 0;
        phys_size = hw_size = nr_phys_segs = nr_hw_segs = 0;
        rq_for_each_segment(bv, rq, iter) {
 static int blk_phys_contig_segment(struct request_queue *q, struct bio *bio,
                                   struct bio *nxt)
 {
-       if (!(q->queue_flags & (1 << QUEUE_FLAG_CLUSTER)))
+       if (!test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags))
                return 0;
 
        if (!BIOVEC_PHYS_MERGEABLE(__BVEC_END(bio), __BVEC_START(nxt)))
        int nsegs, cluster;
 
        nsegs = 0;
-       cluster = q->queue_flags & (1 << QUEUE_FLAG_CLUSTER);
+       cluster = test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags);
 
        /*
         * for each bio in rq
 
        t->max_segment_size = min(t->max_segment_size, b->max_segment_size);
        t->hardsect_size = max(t->hardsect_size, b->hardsect_size);
        if (!test_bit(QUEUE_FLAG_CLUSTER, &b->queue_flags))
-               clear_bit(QUEUE_FLAG_CLUSTER, &t->queue_flags);
+               queue_flag_clear(QUEUE_FLAG_CLUSTER, t);
 }
 EXPORT_SYMBOL(blk_queue_stack_limits);
 
 
        __blk_free_tags(bqt);
 
        q->queue_tags = NULL;
-       q->queue_flags &= ~(1 << QUEUE_FLAG_QUEUED);
+       queue_flag_clear(QUEUE_FLAG_QUEUED, q);
 }
 
 /**
  **/
 void blk_queue_free_tags(struct request_queue *q)
 {
-       clear_bit(QUEUE_FLAG_QUEUED, &q->queue_flags);
+       queue_flag_clear(QUEUE_FLAG_QUEUED, q);
 }
 EXPORT_SYMBOL(blk_queue_free_tags);
 
                rc = blk_queue_resize_tags(q, depth);
                if (rc)
                        return rc;
-               set_bit(QUEUE_FLAG_QUEUED, &q->queue_flags);
+               queue_flag_set(QUEUE_FLAG_QUEUED, q);
                return 0;
        } else
                atomic_inc(&tags->refcnt);
         * assign it, all done
         */
        q->queue_tags = tags;
-       q->queue_flags |= (1 << QUEUE_FLAG_QUEUED);
+       queue_flag_set(QUEUE_FLAG_QUEUED, q);
        INIT_LIST_HEAD(&q->tag_busy_list);
        return 0;
 fail:
 
         */
        spin_lock_irq(q->queue_lock);
 
-       set_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);
+       queue_flag_set(QUEUE_FLAG_ELVSWITCH, q);
 
        elv_drain_elevator(q);
 
         * finally exit old elevator and turn off BYPASS.
         */
        elevator_exit(old_elevator);
-       clear_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);
+       spin_lock_irq(q->queue_lock);
+       queue_flag_clear(QUEUE_FLAG_ELVSWITCH, q);
+       spin_unlock_irq(q->queue_lock);
+
        return 1;
 
 fail_register:
        elevator_exit(e);
        q->elevator = old_elevator;
        elv_register_queue(q);
-       clear_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);
+
+       spin_lock_irq(q->queue_lock);
+       queue_flag_clear(QUEUE_FLAG_ELVSWITCH, q);
+       spin_unlock_irq(q->queue_lock);
+
        return 0;
 }
 
 
 {
        struct loop_device *lo = q->queuedata;
 
-       clear_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags);
+       queue_flag_clear_unlocked(QUEUE_FLAG_PLUGGED, q);
        blk_run_address_space(lo->lo_backing_file->f_mapping);
 }
 
 
                del_gendisk(lun->disk);
                /*
                 * I wish I could do:
-                *    set_bit(QUEUE_FLAG_DEAD, &q->queue_flags);
+                *    queue_flag_set(QUEUE_FLAG_DEAD, q);
                 * As it is, we rely on our internal poisoning and let
                 * the upper levels to spin furiously failing all the I/O.
                 */
 
        q->max_hw_sectors = t->limits.max_hw_sectors;
        q->seg_boundary_mask = t->limits.seg_boundary_mask;
        q->bounce_pfn = t->limits.bounce_pfn;
+       /* XXX: the below will probably go bug. must ensure there can be no
+        * concurrency on queue_flags, and use the unlocked versions...
+        */
        if (t->limits.no_cluster)
-               q->queue_flags &= ~(1 << QUEUE_FLAG_CLUSTER);
+               queue_flag_clear(QUEUE_FLAG_CLUSTER, q);
        else
-               q->queue_flags |= (1 << QUEUE_FLAG_CLUSTER);
+               queue_flag_set(QUEUE_FLAG_CLUSTER, q);
 
 }
 
 
                kfree(new);
                return NULL;
        }
-       set_bit(QUEUE_FLAG_CLUSTER, &new->queue->queue_flags);
+       /* Can be unlocked because the queue is new: no concurrency */
+       queue_flag_set_unlocked(QUEUE_FLAG_CLUSTER, new->queue);
 
        blk_queue_make_request(new->queue, md_fail_request);
 
 
        if (SCSI_DEBUG_OPT_NOISE & scsi_debug_opts)
                printk(KERN_INFO "scsi_debug: slave_alloc <%u %u %u %u>\n",
                       sdp->host->host_no, sdp->channel, sdp->id, sdp->lun);
-       set_bit(QUEUE_FLAG_BIDI, &sdp->request_queue->queue_flags);
+       queue_flag_set_unlocked(QUEUE_FLAG_BIDI, sdp->request_queue);
        return 0;
 }
 
 
               !shost->host_blocked && !shost->host_self_blocked &&
                !((shost->can_queue > 0) &&
                  (shost->host_busy >= shost->can_queue))) {
+
+               int flagset;
+
                /*
                 * As long as shost is accepting commands and we have
                 * starved queues, call blk_run_queue. scsi_request_fn
                sdev = list_entry(shost->starved_list.next,
                                          struct scsi_device, starved_entry);
                list_del_init(&sdev->starved_entry);
-               spin_unlock_irqrestore(shost->host_lock, flags);
-
+               spin_unlock(shost->host_lock);
+
+               spin_lock(sdev->request_queue->queue_lock);
+               flagset = test_bit(QUEUE_FLAG_REENTER, &q->queue_flags) &&
+                               !test_bit(QUEUE_FLAG_REENTER,
+                                       &sdev->request_queue->queue_flags);
+               if (flagset)
+                       queue_flag_set(QUEUE_FLAG_REENTER, sdev->request_queue);
+               __blk_run_queue(sdev->request_queue);
+               if (flagset)
+                       queue_flag_clear(QUEUE_FLAG_REENTER, sdev->request_queue);
+               spin_unlock(sdev->request_queue->queue_lock);
 
-               if (test_bit(QUEUE_FLAG_REENTER, &q->queue_flags) &&
-                   !test_and_set_bit(QUEUE_FLAG_REENTER,
-                                     &sdev->request_queue->queue_flags)) {
-                       blk_run_queue(sdev->request_queue);
-                       clear_bit(QUEUE_FLAG_REENTER,
-                                 &sdev->request_queue->queue_flags);
-               } else
-                       blk_run_queue(sdev->request_queue);
-
-               spin_lock_irqsave(shost->host_lock, flags);
+               spin_lock(shost->host_lock);
                if (unlikely(!list_empty(&sdev->starved_entry)))
                        /*
                         * sdev lost a race, and was put back on the
 
        blk_queue_max_segment_size(q, dma_get_max_seg_size(dev));
 
+       /* New queue, no concurrency on queue_flags */
        if (!shost->use_clustering)
-               clear_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags);
+               queue_flag_clear_unlocked(QUEUE_FLAG_CLUSTER, q);
 
        /*
         * set a reasonable default alignment on word boundaries: the
 
        else
                q->queuedata = shost;
 
-       set_bit(QUEUE_FLAG_BIDI, &q->queue_flags);
-
+       queue_flag_set_unlocked(QUEUE_FLAG_BIDI, q);
        return 0;
 }
 
 
 #define QUEUE_FLAG_ELVSWITCH   8       /* don't use elevator, just do FIFO */
 #define QUEUE_FLAG_BIDI                9       /* queue supports bidi requests */
 
+static inline void queue_flag_set_unlocked(unsigned int flag,
+                                          struct request_queue *q)
+{
+       __set_bit(flag, &q->queue_flags);
+}
+
+static inline void queue_flag_set(unsigned int flag, struct request_queue *q)
+{
+       WARN_ON_ONCE(!spin_is_locked(q->queue_lock));
+       __set_bit(flag, &q->queue_flags);
+}
+
+static inline void queue_flag_clear_unlocked(unsigned int flag,
+                                            struct request_queue *q)
+{
+       __clear_bit(flag, &q->queue_flags);
+}
+
+static inline void queue_flag_clear(unsigned int flag, struct request_queue *q)
+{
+       WARN_ON_ONCE(!spin_is_locked(q->queue_lock));
+       __clear_bit(flag, &q->queue_flags);
+}
+
 enum {
        /*
         * Hardbarrier is supported with one of the following methods.
 static inline void blk_set_queue_full(struct request_queue *q, int rw)
 {
        if (rw == READ)
-               set_bit(QUEUE_FLAG_READFULL, &q->queue_flags);
+               queue_flag_set(QUEUE_FLAG_READFULL, q);
        else
-               set_bit(QUEUE_FLAG_WRITEFULL, &q->queue_flags);
+               queue_flag_set(QUEUE_FLAG_WRITEFULL, q);
 }
 
 static inline void blk_clear_queue_full(struct request_queue *q, int rw)
 {
        if (rw == READ)
-               clear_bit(QUEUE_FLAG_READFULL, &q->queue_flags);
+               queue_flag_clear(QUEUE_FLAG_READFULL, q);
        else
-               clear_bit(QUEUE_FLAG_WRITEFULL, &q->queue_flags);
+               queue_flag_clear(QUEUE_FLAG_WRITEFULL, q);
 }
 
 
 extern void blk_stop_queue(struct request_queue *q);
 extern void blk_sync_queue(struct request_queue *q);
 extern void __blk_stop_queue(struct request_queue *q);
+extern void __blk_run_queue(struct request_queue *);
 extern void blk_run_queue(struct request_queue *);
 extern void blk_start_queueing(struct request_queue *);
 extern int blk_rq_map_user(struct request_queue *, struct request *, void __user *, unsigned long);