]> pilppa.org Git - linux-2.6-omap-h63xx.git/blobdiff - drivers/md/dm.c
[XFS] remove some easy bhv_vnode_t instances
[linux-2.6-omap-h63xx.git] / drivers / md / dm.c
index 5f0f559d3b92a94ada9c0ddd29c67ceeccd273ad..bca448e118786a78a5e07b54f3cb3a7e3aca85d5 100644 (file)
@@ -37,8 +37,8 @@ static DEFINE_SPINLOCK(_minor_lock);
 struct dm_io {
        struct mapped_device *md;
        int error;
-       struct bio *bio;
        atomic_t io_count;
+       struct bio *bio;
        unsigned long start_time;
 };
 
@@ -71,9 +71,22 @@ union map_info *dm_get_mapinfo(struct bio *bio)
 #define DMF_DELETING 4
 #define DMF_NOFLUSH_SUSPENDING 5
 
+/*
+ * Work processed by per-device workqueue.
+ */
+struct dm_wq_req {
+       enum {
+               DM_WQ_FLUSH_ALL,
+               DM_WQ_FLUSH_DEFERRED,
+       } type;
+       struct work_struct work;
+       struct mapped_device *md;
+       void *context;
+};
+
 struct mapped_device {
        struct rw_semaphore io_lock;
-       struct semaphore suspend_lock;
+       struct mutex suspend_lock;
        spinlock_t pushback_lock;
        rwlock_t map_lock;
        atomic_t holders;
@@ -95,6 +108,11 @@ struct mapped_device {
        struct bio_list deferred;
        struct bio_list pushback;
 
+       /*
+        * Processing queue (flush/barriers)
+        */
+       struct workqueue_struct *wq;
+
        /*
         * The current mapping.
         */
@@ -186,6 +204,7 @@ static int (*_inits[])(void) __initdata = {
        dm_target_init,
        dm_linear_init,
        dm_stripe_init,
+       dm_kcopyd_init,
        dm_interface_init,
 };
 
@@ -194,6 +213,7 @@ static void (*_exits[])(void) = {
        dm_target_exit,
        dm_linear_exit,
        dm_stripe_exit,
+       dm_kcopyd_exit,
        dm_interface_exit,
 };
 
@@ -809,6 +829,49 @@ static int __split_bio(struct mapped_device *md, struct bio *bio)
  * CRUD END
  *---------------------------------------------------------------*/
 
+static int dm_merge_bvec(struct request_queue *q,
+                        struct bvec_merge_data *bvm,
+                        struct bio_vec *biovec)
+{
+       struct mapped_device *md = q->queuedata;
+       struct dm_table *map = dm_get_table(md);
+       struct dm_target *ti;
+       sector_t max_sectors;
+       int max_size;
+
+       if (unlikely(!map))
+               return 0;
+
+       ti = dm_table_find_target(map, bvm->bi_sector);
+
+       /*
+        * Find maximum amount of I/O that won't need splitting
+        */
+       max_sectors = min(max_io_len(md, bvm->bi_sector, ti),
+                         (sector_t) BIO_MAX_SECTORS);
+       max_size = (max_sectors << SECTOR_SHIFT) - bvm->bi_size;
+       if (max_size < 0)
+               max_size = 0;
+
+       /*
+        * merge_bvec_fn() returns number of bytes
+        * it can accept at this offset
+        * max is precomputed maximal io size
+        */
+       if (max_size && ti->type->merge)
+               max_size = ti->type->merge(ti, bvm, biovec, max_size);
+
+       /*
+        * Always allow an entire first page
+        */
+       if (max_size <= biovec->bv_len && !(bvm->bi_size >> SECTOR_SHIFT))
+               max_size = biovec->bv_len;
+
+       dm_table_put(map);
+
+       return max_size;
+}
+
 /*
  * The request function that just remaps the bio built up by
  * dm_merge_bvec.
@@ -904,7 +967,7 @@ static void free_minor(int minor)
 /*
  * See if the device with a specific minor # is free.
  */
-static int specific_minor(struct mapped_device *md, int minor)
+static int specific_minor(int minor)
 {
        int r, m;
 
@@ -937,7 +1000,7 @@ out:
        return r;
 }
 
-static int next_free_minor(struct mapped_device *md, int *minor)
+static int next_free_minor(int *minor)
 {
        int r, m;
 
@@ -948,9 +1011,8 @@ static int next_free_minor(struct mapped_device *md, int *minor)
        spin_lock(&_minor_lock);
 
        r = idr_get_new(&_minor_idr, MINOR_ALLOCED, &m);
-       if (r) {
+       if (r)
                goto out;
-       }
 
        if (m >= (1 << MINORBITS)) {
                idr_remove(&_minor_idr, m);
@@ -973,7 +1035,7 @@ static struct block_device_operations dm_blk_dops;
 static struct mapped_device *alloc_dev(int minor)
 {
        int r;
-       struct mapped_device *md = kmalloc(sizeof(*md), GFP_KERNEL);
+       struct mapped_device *md = kzalloc(sizeof(*md), GFP_KERNEL);
        void *old_md;
 
        if (!md) {
@@ -982,19 +1044,18 @@ static struct mapped_device *alloc_dev(int minor)
        }
 
        if (!try_module_get(THIS_MODULE))
-               goto bad0;
+               goto bad_module_get;
 
        /* get a minor number for the dev */
        if (minor == DM_ANY_MINOR)
-               r = next_free_minor(md, &minor);
+               r = next_free_minor(&minor);
        else
-               r = specific_minor(md, minor);
+               r = specific_minor(minor);
        if (r < 0)
-               goto bad1;
+               goto bad_minor;
 
-       memset(md, 0, sizeof(*md));
        init_rwsem(&md->io_lock);
-       init_MUTEX(&md->suspend_lock);
+       mutex_init(&md->suspend_lock);
        spin_lock_init(&md->pushback_lock);
        rwlock_init(&md->map_lock);
        atomic_set(&md->holders, 1);
@@ -1006,7 +1067,7 @@ static struct mapped_device *alloc_dev(int minor)
 
        md->queue = blk_alloc_queue(GFP_KERNEL);
        if (!md->queue)
-               goto bad1_free_minor;
+               goto bad_queue;
 
        md->queue->queuedata = md;
        md->queue->backing_dev_info.congested_fn = dm_any_congested;
@@ -1014,14 +1075,15 @@ static struct mapped_device *alloc_dev(int minor)
        blk_queue_make_request(md->queue, dm_request);
        blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY);
        md->queue->unplug_fn = dm_unplug_all;
+       blk_queue_merge_bvec(md->queue, dm_merge_bvec);
 
        md->io_pool = mempool_create_slab_pool(MIN_IOS, _io_cache);
        if (!md->io_pool)
-               goto bad2;
+               goto bad_io_pool;
 
        md->tio_pool = mempool_create_slab_pool(MIN_IOS, _tio_cache);
        if (!md->tio_pool)
-               goto bad3;
+               goto bad_tio_pool;
 
        md->bs = bioset_create(16, 16);
        if (!md->bs)
@@ -1029,7 +1091,7 @@ static struct mapped_device *alloc_dev(int minor)
 
        md->disk = alloc_disk(1);
        if (!md->disk)
-               goto bad4;
+               goto bad_disk;
 
        atomic_set(&md->pending, 0);
        init_waitqueue_head(&md->wait);
@@ -1044,6 +1106,10 @@ static struct mapped_device *alloc_dev(int minor)
        add_disk(md->disk);
        format_dev_t(md->name, MKDEV(_major, minor));
 
+       md->wq = create_singlethread_workqueue("kdmflush");
+       if (!md->wq)
+               goto bad_thread;
+
        /* Populate the mapping, nobody knows we exist yet */
        spin_lock(&_minor_lock);
        old_md = idr_replace(&_minor_idr, md, minor);
@@ -1053,19 +1119,21 @@ static struct mapped_device *alloc_dev(int minor)
 
        return md;
 
- bad4:
+bad_thread:
+       put_disk(md->disk);
+bad_disk:
        bioset_free(md->bs);
- bad_no_bioset:
+bad_no_bioset:
        mempool_destroy(md->tio_pool);
- bad3:
+bad_tio_pool:
        mempool_destroy(md->io_pool);
- bad2:
+bad_io_pool:
        blk_cleanup_queue(md->queue);
- bad1_free_minor:
+bad_queue:
        free_minor(minor);
- bad1:
+bad_minor:
        module_put(THIS_MODULE);
- bad0:
+bad_module_get:
        kfree(md);
        return NULL;
 }
@@ -1080,6 +1148,7 @@ static void free_dev(struct mapped_device *md)
                unlock_fs(md);
                bdput(md->suspended_bdev);
        }
+       destroy_workqueue(md->wq);
        mempool_destroy(md->tio_pool);
        mempool_destroy(md->io_pool);
        bioset_free(md->bs);
@@ -1259,20 +1328,91 @@ void dm_put(struct mapped_device *md)
 }
 EXPORT_SYMBOL_GPL(dm_put);
 
+static int dm_wait_for_completion(struct mapped_device *md)
+{
+       int r = 0;
+
+       while (1) {
+               set_current_state(TASK_INTERRUPTIBLE);
+
+               smp_mb();
+               if (!atomic_read(&md->pending))
+                       break;
+
+               if (signal_pending(current)) {
+                       r = -EINTR;
+                       break;
+               }
+
+               io_schedule();
+       }
+       set_current_state(TASK_RUNNING);
+
+       return r;
+}
+
 /*
  * Process the deferred bios
  */
-static void __flush_deferred_io(struct mapped_device *md, struct bio *c)
+static void __flush_deferred_io(struct mapped_device *md)
 {
-       struct bio *n;
+       struct bio *c;
 
-       while (c) {
-               n = c->bi_next;
-               c->bi_next = NULL;
+       while ((c = bio_list_pop(&md->deferred))) {
                if (__split_bio(md, c))
                        bio_io_error(c);
-               c = n;
        }
+
+       clear_bit(DMF_BLOCK_IO, &md->flags);
+}
+
+static void __merge_pushback_list(struct mapped_device *md)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(&md->pushback_lock, flags);
+       clear_bit(DMF_NOFLUSH_SUSPENDING, &md->flags);
+       bio_list_merge_head(&md->deferred, &md->pushback);
+       bio_list_init(&md->pushback);
+       spin_unlock_irqrestore(&md->pushback_lock, flags);
+}
+
+static void dm_wq_work(struct work_struct *work)
+{
+       struct dm_wq_req *req = container_of(work, struct dm_wq_req, work);
+       struct mapped_device *md = req->md;
+
+       down_write(&md->io_lock);
+       switch (req->type) {
+       case DM_WQ_FLUSH_ALL:
+               __merge_pushback_list(md);
+               /* pass through */
+       case DM_WQ_FLUSH_DEFERRED:
+               __flush_deferred_io(md);
+               break;
+       default:
+               DMERR("dm_wq_work: unrecognised work type %d", req->type);
+               BUG();
+       }
+       up_write(&md->io_lock);
+}
+
+static void dm_wq_queue(struct mapped_device *md, int type, void *context,
+                       struct dm_wq_req *req)
+{
+       req->type = type;
+       req->md = md;
+       req->context = context;
+       INIT_WORK(&req->work, dm_wq_work);
+       queue_work(md->wq, &req->work);
+}
+
+static void dm_queue_flush(struct mapped_device *md, int type, void *context)
+{
+       struct dm_wq_req req;
+
+       dm_wq_queue(md, type, context, &req);
+       flush_workqueue(md->wq);
 }
 
 /*
@@ -1282,7 +1422,7 @@ int dm_swap_table(struct mapped_device *md, struct dm_table *table)
 {
        int r = -EINVAL;
 
-       down(&md->suspend_lock);
+       mutex_lock(&md->suspend_lock);
 
        /* device must be suspended */
        if (!dm_suspended(md))
@@ -1297,7 +1437,7 @@ int dm_swap_table(struct mapped_device *md, struct dm_table *table)
        r = __bind(md, table);
 
 out:
-       up(&md->suspend_lock);
+       mutex_unlock(&md->suspend_lock);
        return r;
 }
 
@@ -1346,17 +1486,17 @@ static void unlock_fs(struct mapped_device *md)
 int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
 {
        struct dm_table *map = NULL;
-       unsigned long flags;
        DECLARE_WAITQUEUE(wait, current);
-       struct bio *def;
-       int r = -EINVAL;
+       int r = 0;
        int do_lockfs = suspend_flags & DM_SUSPEND_LOCKFS_FLAG ? 1 : 0;
        int noflush = suspend_flags & DM_SUSPEND_NOFLUSH_FLAG ? 1 : 0;
 
-       down(&md->suspend_lock);
+       mutex_lock(&md->suspend_lock);
 
-       if (dm_suspended(md))
+       if (dm_suspended(md)) {
+               r = -EINVAL;
                goto out_unlock;
+       }
 
        map = dm_get_table(md);
 
@@ -1378,16 +1518,16 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
                        r = -ENOMEM;
                        goto flush_and_out;
                }
-       }
 
-       /*
-        * Flush I/O to the device.
-        * noflush supersedes do_lockfs, because lock_fs() needs to flush I/Os.
-        */
-       if (do_lockfs && !noflush) {
-               r = lock_fs(md);
-               if (r)
-                       goto out;
+               /*
+                * Flush I/O to the device. noflush supersedes do_lockfs,
+                * because lock_fs() needs to flush I/Os.
+                */
+               if (do_lockfs) {
+                       r = lock_fs(md);
+                       if (r)
+                               goto out;
+               }
        }
 
        /*
@@ -1404,67 +1544,36 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
                dm_table_unplug_all(map);
 
        /*
-        * Then we wait for the already mapped ios to
-        * complete.
+        * Wait for the already-mapped ios to complete.
         */
-       while (1) {
-               set_current_state(TASK_INTERRUPTIBLE);
-
-               smp_mb();
-               if (!atomic_read(&md->pending) || signal_pending(current))
-                       break;
-
-               io_schedule();
-       }
-       set_current_state(TASK_RUNNING);
+       r = dm_wait_for_completion(md);
 
        down_write(&md->io_lock);
        remove_wait_queue(&md->wait, &wait);
 
-       if (noflush) {
-               spin_lock_irqsave(&md->pushback_lock, flags);
-               clear_bit(DMF_NOFLUSH_SUSPENDING, &md->flags);
-               bio_list_merge_head(&md->deferred, &md->pushback);
-               bio_list_init(&md->pushback);
-               spin_unlock_irqrestore(&md->pushback_lock, flags);
-       }
+       if (noflush)
+               __merge_pushback_list(md);
+       up_write(&md->io_lock);
 
        /* were we interrupted ? */
-       r = -EINTR;
-       if (atomic_read(&md->pending)) {
-               clear_bit(DMF_BLOCK_IO, &md->flags);
-               def = bio_list_get(&md->deferred);
-               __flush_deferred_io(md, def);
-               up_write(&md->io_lock);
+       if (r < 0) {
+               dm_queue_flush(md, DM_WQ_FLUSH_DEFERRED, NULL);
+
                unlock_fs(md);
                goto out; /* pushback list is already flushed, so skip flush */
        }
-       up_write(&md->io_lock);
 
        dm_table_postsuspend_targets(map);
 
        set_bit(DMF_SUSPENDED, &md->flags);
 
-       r = 0;
-
 flush_and_out:
-       if (r && noflush) {
+       if (r && noflush)
                /*
                 * Because there may be already I/Os in the pushback list,
                 * flush them before return.
                 */
-               down_write(&md->io_lock);
-
-               spin_lock_irqsave(&md->pushback_lock, flags);
-               clear_bit(DMF_NOFLUSH_SUSPENDING, &md->flags);
-               bio_list_merge_head(&md->deferred, &md->pushback);
-               bio_list_init(&md->pushback);
-               spin_unlock_irqrestore(&md->pushback_lock, flags);
-
-               def = bio_list_get(&md->deferred);
-               __flush_deferred_io(md, def);
-               up_write(&md->io_lock);
-       }
+               dm_queue_flush(md, DM_WQ_FLUSH_ALL, NULL);
 
 out:
        if (r && md->suspended_bdev) {
@@ -1475,17 +1584,16 @@ out:
        dm_table_put(map);
 
 out_unlock:
-       up(&md->suspend_lock);
+       mutex_unlock(&md->suspend_lock);
        return r;
 }
 
 int dm_resume(struct mapped_device *md)
 {
        int r = -EINVAL;
-       struct bio *def;
        struct dm_table *map = NULL;
 
-       down(&md->suspend_lock);
+       mutex_lock(&md->suspend_lock);
        if (!dm_suspended(md))
                goto out;
 
@@ -1497,12 +1605,7 @@ int dm_resume(struct mapped_device *md)
        if (r)
                goto out;
 
-       down_write(&md->io_lock);
-       clear_bit(DMF_BLOCK_IO, &md->flags);
-
-       def = bio_list_get(&md->deferred);
-       __flush_deferred_io(md, def);
-       up_write(&md->io_lock);
+       dm_queue_flush(md, DM_WQ_FLUSH_DEFERRED, NULL);
 
        unlock_fs(md);
 
@@ -1521,7 +1624,7 @@ int dm_resume(struct mapped_device *md)
 
 out:
        dm_table_put(map);
-       up(&md->suspend_lock);
+       mutex_unlock(&md->suspend_lock);
 
        return r;
 }