]> pilppa.org Git - linux-2.6-omap-h63xx.git/blobdiff - drivers/md/md.c
[PATCH] md: Allow stripes to be expanded in preparation for expanding an array
[linux-2.6-omap-h63xx.git] / drivers / md / md.c
index 7145cd150f7b2cac4f00c40f9efef5a36b5fad15..c7b7656f9aa5cd4e98ad9999b32d8ddb44199838 100644 (file)
@@ -213,8 +213,11 @@ static void mddev_put(mddev_t *mddev)
                return;
        if (!mddev->raid_disks && list_empty(&mddev->disks)) {
                list_del(&mddev->all_mddevs);
-               blk_put_queue(mddev->queue);
+               /* that blocks */
+               blk_cleanup_queue(mddev->queue);
+               /* that also blocks */
                kobject_unregister(&mddev->kobj);
+               /* result blows... */
        }
        spin_unlock(&all_mddevs_lock);
 }
@@ -263,6 +266,7 @@ static mddev_t * mddev_find(dev_t unit)
                kfree(new);
                return NULL;
        }
+       set_bit(QUEUE_FLAG_CLUSTER, &new->queue->queue_flags);
 
        blk_queue_make_request(new->queue, md_fail_request);
 
@@ -761,7 +765,8 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev)
 
                if (sb->state & (1<<MD_SB_BITMAP_PRESENT) &&
                    mddev->bitmap_file == NULL) {
-                       if (mddev->level != 1 && mddev->level != 5 && mddev->level != 6
+                       if (mddev->level != 1 && mddev->level != 4
+                           && mddev->level != 5 && mddev->level != 6
                            && mddev->level != 10) {
                                /* FIXME use a better test */
                                printk(KERN_WARNING "md: bitmaps not supported for this level.\n");
@@ -890,10 +895,9 @@ static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev)
                        d->raid_disk = rdev2->raid_disk;
                else
                        d->raid_disk = rdev2->desc_nr; /* compatibility */
-               if (test_bit(Faulty, &rdev2->flags)) {
+               if (test_bit(Faulty, &rdev2->flags))
                        d->state = (1<<MD_DISK_FAULTY);
-                       failed++;
-               } else if (test_bit(In_sync, &rdev2->flags)) {
+               else if (test_bit(In_sync, &rdev2->flags)) {
                        d->state = (1<<MD_DISK_ACTIVE);
                        d->state |= (1<<MD_DISK_SYNC);
                        active++;
@@ -1024,7 +1028,7 @@ static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version)
                rdev-> sb_size = (rdev->sb_size | bmask)+1;
 
        if (refdev == 0)
-               return 1;
+               ret = 1;
        else {
                __u64 ev1, ev2;
                struct mdp_superblock_1 *refsb = 
@@ -1044,7 +1048,9 @@ static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version)
                ev2 = le64_to_cpu(refsb->events);
 
                if (ev1 > ev2)
-                       return 1;
+                       ret = 1;
+               else
+                       ret = 0;
        }
        if (minor_version) 
                rdev->size = ((rdev->bdev->bd_inode->i_size>>9) - le64_to_cpu(sb->data_offset)) / 2;
@@ -1058,7 +1064,7 @@ static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version)
 
        if (le32_to_cpu(sb->size) > rdev->size*2)
                return -EINVAL;
-       return 0;
+       return ret;
 }
 
 static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev)
@@ -1081,7 +1087,7 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev)
                mddev->size = le64_to_cpu(sb->size)/2;
                mddev->events = le64_to_cpu(sb->events);
                mddev->bitmap_offset = 0;
-               mddev->default_bitmap_offset = 1024;
+               mddev->default_bitmap_offset = 1024 >> 9;
                
                mddev->recovery_cp = le64_to_cpu(sb->resync_offset);
                memcpy(mddev->uuid, sb->set_uuid, 16);
@@ -1161,6 +1167,9 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev)
 
        sb->cnt_corrected_read = atomic_read(&rdev->corrected_errors);
 
+       sb->raid_disks = cpu_to_le32(mddev->raid_disks);
+       sb->size = cpu_to_le64(mddev->size<<1);
+
        if (mddev->bitmap && mddev->bitmap_file == NULL) {
                sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_offset);
                sb->feature_map = cpu_to_le32(MD_FEATURE_BITMAP_OFFSET);
@@ -1293,6 +1302,7 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev)
        else
                ko = &rdev->bdev->bd_disk->kobj;
        sysfs_create_link(&rdev->kobj, ko, "block");
+       bd_claim_by_disk(rdev->bdev, rdev, mddev->gendisk);
        return 0;
 }
 
@@ -1303,6 +1313,7 @@ static void unbind_rdev_from_array(mdk_rdev_t * rdev)
                MD_BUG();
                return;
        }
+       bd_release_from_disk(rdev->bdev, rdev->mddev->gendisk);
        list_del_init(&rdev->same_set);
        printk(KERN_INFO "md: unbind<%s>\n", bdevname(rdev->bdev,b));
        rdev->mddev = NULL;
@@ -2686,14 +2697,6 @@ static int do_md_stop(mddev_t * mddev, int ro)
                        set_disk_ro(disk, 1);
        }
 
-       bitmap_destroy(mddev);
-       if (mddev->bitmap_file) {
-               atomic_set(&mddev->bitmap_file->f_dentry->d_inode->i_writecount, 1);
-               fput(mddev->bitmap_file);
-               mddev->bitmap_file = NULL;
-       }
-       mddev->bitmap_offset = 0;
-
        /*
         * Free resources if final stop
         */
@@ -2703,6 +2706,14 @@ static int do_md_stop(mddev_t * mddev, int ro)
                struct gendisk *disk;
                printk(KERN_INFO "md: %s stopped.\n", mdname(mddev));
 
+               bitmap_destroy(mddev);
+               if (mddev->bitmap_file) {
+                       atomic_set(&mddev->bitmap_file->f_dentry->d_inode->i_writecount, 1);
+                       fput(mddev->bitmap_file);
+                       mddev->bitmap_file = NULL;
+               }
+               mddev->bitmap_offset = 0;
+
                ITERATE_RDEV(mddev,rdev,tmp)
                        if (rdev->raid_disk >= 0) {
                                char nm[20];
@@ -2764,7 +2775,6 @@ static void autorun_array(mddev_t *mddev)
  */
 static void autorun_devices(int part)
 {
-       struct list_head candidates;
        struct list_head *tmp;
        mdk_rdev_t *rdev0, *rdev;
        mddev_t *mddev;
@@ -2773,6 +2783,7 @@ static void autorun_devices(int part)
        printk(KERN_INFO "md: autorun ...\n");
        while (!list_empty(&pending_raid_disks)) {
                dev_t dev;
+               LIST_HEAD(candidates);
                rdev0 = list_entry(pending_raid_disks.next,
                                         mdk_rdev_t, same_set);
 
@@ -2939,6 +2950,8 @@ static int get_array_info(mddev_t * mddev, void __user * arg)
        info.ctime         = mddev->ctime;
        info.level         = mddev->level;
        info.size          = mddev->size;
+       if (info.size != mddev->size) /* overflow */
+               info.size = -1;
        info.nr_disks      = nr;
        info.raid_disks    = mddev->raid_disks;
        info.md_minor      = mddev->md_minor;
@@ -3465,7 +3478,7 @@ static int update_size(mddev_t *mddev, unsigned long size)
                bdev = bdget_disk(mddev->gendisk, 0);
                if (bdev) {
                        mutex_lock(&bdev->bd_inode->i_mutex);
-                       i_size_write(bdev->bd_inode, mddev->array_size << 10);
+                       i_size_write(bdev->bd_inode, (loff_t)mddev->array_size << 10);
                        mutex_unlock(&bdev->bd_inode->i_mutex);
                        bdput(bdev);
                }
@@ -3485,17 +3498,6 @@ static int update_raid_disks(mddev_t *mddev, int raid_disks)
        if (mddev->sync_thread)
                return -EBUSY;
        rv = mddev->pers->reshape(mddev, raid_disks);
-       if (!rv) {
-               struct block_device *bdev;
-
-               bdev = bdget_disk(mddev->gendisk, 0);
-               if (bdev) {
-                       mutex_lock(&bdev->bd_inode->i_mutex);
-                       i_size_write(bdev->bd_inode, mddev->array_size << 10);
-                       mutex_unlock(&bdev->bd_inode->i_mutex);
-                       bdput(bdev);
-               }
-       }
        return rv;
 }
 
@@ -3531,7 +3533,7 @@ static int update_array_info(mddev_t *mddev, mdu_array_info_t *info)
                )
                return -EINVAL;
        /* Check there is only one change */
-       if (mddev->size != info->size) cnt++;
+       if (info->size >= 0 && mddev->size != info->size) cnt++;
        if (mddev->raid_disks != info->raid_disks) cnt++;
        if (mddev->layout != info->layout) cnt++;
        if ((state ^ info->state) & (1<<MD_SB_BITMAP_PRESENT)) cnt++;
@@ -3548,7 +3550,7 @@ static int update_array_info(mddev_t *mddev, mdu_array_info_t *info)
                else
                        return mddev->pers->reconfig(mddev, info->layout, -1);
        }
-       if (mddev->size != info->size)
+       if (info->size >= 0 && mddev->size != info->size)
                rv = update_size(mddev, info->size);
 
        if (mddev->raid_disks    != info->raid_disks)
@@ -4042,7 +4044,10 @@ static void status_unused(struct seq_file *seq)
 
 static void status_resync(struct seq_file *seq, mddev_t * mddev)
 {
-       unsigned long max_blocks, resync, res, dt, db, rt;
+       sector_t max_blocks, resync, res;
+       unsigned long dt, db, rt;
+       int scale;
+       unsigned int per_milli;
 
        resync = (mddev->curr_resync - atomic_read(&mddev->recovery_active))/2;
 
@@ -4058,9 +4063,22 @@ static void status_resync(struct seq_file *seq, mddev_t * mddev)
                MD_BUG();
                return;
        }
-       res = (resync/1024)*1000/(max_blocks/1024 + 1);
+       /* Pick 'scale' such that (resync>>scale)*1000 will fit
+        * in a sector_t, and (max_blocks>>scale) will fit in a
+        * u32, as those are the requirements for sector_div.
+        * Thus 'scale' must be at least 10
+        */
+       scale = 10;
+       if (sizeof(sector_t) > sizeof(unsigned long)) {
+               while ( max_blocks/2 > (1ULL<<(scale+32)))
+                       scale++;
+       }
+       res = (resync>>scale)*1000;
+       sector_div(res, (u32)((max_blocks>>scale)+1));
+
+       per_milli = res;
        {
-               int i, x = res/50, y = 20-x;
+               int i, x = per_milli/50, y = 20-x;
                seq_printf(seq, "[");
                for (i = 0; i < x; i++)
                        seq_printf(seq, "=");
@@ -4069,10 +4087,12 @@ static void status_resync(struct seq_file *seq, mddev_t * mddev)
                        seq_printf(seq, ".");
                seq_printf(seq, "] ");
        }
-       seq_printf(seq, " %s =%3lu.%lu%% (%lu/%lu)",
+       seq_printf(seq, " %s =%3u.%u%% (%llu/%llu)",
                      (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ?
                       "resync" : "recovery"),
-                     res/10, res % 10, resync, max_blocks);
+                     per_milli/10, per_milli % 10,
+                  (unsigned long long) resync,
+                  (unsigned long long) max_blocks);
 
        /*
         * We do not want to overflow, so the order of operands and
@@ -4086,7 +4106,7 @@ static void status_resync(struct seq_file *seq, mddev_t * mddev)
        dt = ((jiffies - mddev->resync_mark) / HZ);
        if (!dt) dt++;
        db = resync - (mddev->resync_mark_cnt/2);
-       rt = (dt * ((max_blocks-resync) / (db/100+1)))/100;
+       rt = (dt * ((unsigned long)(max_blocks-resync) / (db/100+1)))/100;
 
        seq_printf(seq, " finish=%lu.%lumin", rt / 60, (rt % 60)/6);