X-Git-Url: http://pilppa.org/gitweb/gitweb.cgi?a=blobdiff_plain;f=drivers%2Fmd%2Fmd.c;h=5d6fac1fd39ebbaa05c522d78bcd997d08758018;hb=6c2fce2ef6b4821c21b5c42c7207cb9cf8c87eda;hp=83eb78b00137e98f6624bf72247d8a20bed1f870;hpb=c2a3b233450d5bc426c063ea2d8a74351db29ea4;p=linux-2.6-omap-h63xx.git diff --git a/drivers/md/md.c b/drivers/md/md.c index 83eb78b0013..5d6fac1fd39 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -74,6 +74,8 @@ static DEFINE_SPINLOCK(pers_lock); static void md_print_devices(void); +static DECLARE_WAIT_QUEUE_HEAD(resync_wait); + #define MD_BUG(x...) { printk("md: bug in file %s, line %d\n", __FILE__, __LINE__); md_print_devices(); } /* @@ -274,7 +276,9 @@ static mddev_t * mddev_find(dev_t unit) atomic_set(&new->active, 1); spin_lock_init(&new->write_lock); init_waitqueue_head(&new->sb_wait); + init_waitqueue_head(&new->recovery_wait); new->reshape_position = MaxSector; + new->resync_min = 0; new->resync_max = MaxSector; new->level = LEVEL_NONE; @@ -1928,7 +1932,7 @@ slot_store(mdk_rdev_t *rdev, const char *buf, size_t len) slot = -1; else if (e==buf || (*e && *e!= '\n')) return -EINVAL; - if (rdev->mddev->pers) { + if (rdev->mddev->pers && slot == -1) { /* Setting 'slot' on an active array requires also * updating the 'rd%d' link, and communicating * with the personality with ->hot_*_disk. @@ -1936,8 +1940,6 @@ slot_store(mdk_rdev_t *rdev, const char *buf, size_t len) * failed/spare devices. This normally happens automatically, * but not when the metadata is externally managed. */ - if (slot != -1) - return -EBUSY; if (rdev->raid_disk == -1) return -EEXIST; /* personality does all needed checks */ @@ -1951,6 +1953,44 @@ slot_store(mdk_rdev_t *rdev, const char *buf, size_t len) sysfs_remove_link(&rdev->mddev->kobj, nm); set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery); md_wakeup_thread(rdev->mddev->thread); + } else if (rdev->mddev->pers) { + mdk_rdev_t *rdev2; + struct list_head *tmp; + /* Activating a spare .. or possibly reactivating + * if we every get bitmaps working here. + */ + + if (rdev->raid_disk != -1) + return -EBUSY; + + if (rdev->mddev->pers->hot_add_disk == NULL) + return -EINVAL; + + rdev_for_each(rdev2, tmp, rdev->mddev) + if (rdev2->raid_disk == slot) + return -EEXIST; + + rdev->raid_disk = slot; + if (test_bit(In_sync, &rdev->flags)) + rdev->saved_raid_disk = slot; + else + rdev->saved_raid_disk = -1; + err = rdev->mddev->pers-> + hot_add_disk(rdev->mddev, rdev); + if (err != 1) { + rdev->raid_disk = -1; + if (err == 0) + return -EEXIST; + return err; + } + sprintf(nm, "rd%d", rdev->raid_disk); + if (sysfs_create_link(&rdev->mddev->kobj, &rdev->kobj, nm)) + printk(KERN_WARNING + "md: cannot register " + "%s for %s\n", + nm, mdname(rdev->mddev)); + + /* don't wakeup anyone, leave that to userspace. */ } else { if (slot >= rdev->mddev->raid_disks) return -ENOSPC; @@ -1980,7 +2020,7 @@ offset_store(mdk_rdev_t *rdev, const char *buf, size_t len) unsigned long long offset = simple_strtoull(buf, &e, 10); if (e==buf || (*e && *e != '\n')) return -EINVAL; - if (rdev->mddev->pers) + if (rdev->mddev->pers && rdev->raid_disk >= 0) return -EBUSY; if (rdev->size && rdev->mddev->external) /* Must set offset before size, so overlap checks @@ -2019,7 +2059,7 @@ rdev_size_store(mdk_rdev_t *rdev, const char *buf, size_t len) if (e==buf || (*e && *e != '\n')) return -EINVAL; - if (my_mddev->pers) + if (my_mddev->pers && rdev->raid_disk >= 0) return -EBUSY; rdev->size = size; if (size > oldsize && rdev->mddev->external) { @@ -3012,6 +3052,36 @@ degraded_show(mddev_t *mddev, char *page) } static struct md_sysfs_entry md_degraded = __ATTR_RO(degraded); +static ssize_t +sync_force_parallel_show(mddev_t *mddev, char *page) +{ + return sprintf(page, "%d\n", mddev->parallel_resync); +} + +static ssize_t +sync_force_parallel_store(mddev_t *mddev, const char *buf, size_t len) +{ + long n; + + if (strict_strtol(buf, 10, &n)) + return -EINVAL; + + if (n != 0 && n != 1) + return -EINVAL; + + mddev->parallel_resync = n; + + if (mddev->sync_thread) + wake_up(&resync_wait); + + return len; +} + +/* force parallel resync, even with shared block devices */ +static struct md_sysfs_entry md_sync_force_parallel = +__ATTR(sync_force_parallel, S_IRUGO|S_IWUSR, + sync_force_parallel_show, sync_force_parallel_store); + static ssize_t sync_speed_show(mddev_t *mddev, char *page) { @@ -3041,6 +3111,36 @@ sync_completed_show(mddev_t *mddev, char *page) static struct md_sysfs_entry md_sync_completed = __ATTR_RO(sync_completed); +static ssize_t +min_sync_show(mddev_t *mddev, char *page) +{ + return sprintf(page, "%llu\n", + (unsigned long long)mddev->resync_min); +} +static ssize_t +min_sync_store(mddev_t *mddev, const char *buf, size_t len) +{ + unsigned long long min; + if (strict_strtoull(buf, 10, &min)) + return -EINVAL; + if (min > mddev->resync_max) + return -EINVAL; + if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) + return -EBUSY; + + /* Must be a multiple of chunk_size */ + if (mddev->chunk_size) { + if (min & (sector_t)((mddev->chunk_size>>9)-1)) + return -EINVAL; + } + mddev->resync_min = min; + + return len; +} + +static struct md_sysfs_entry md_min_sync = +__ATTR(sync_min, S_IRUGO|S_IWUSR, min_sync_show, min_sync_store); + static ssize_t max_sync_show(mddev_t *mddev, char *page) { @@ -3056,9 +3156,10 @@ max_sync_store(mddev_t *mddev, const char *buf, size_t len) if (strncmp(buf, "max", 3) == 0) mddev->resync_max = MaxSector; else { - char *ep; - unsigned long long max = simple_strtoull(buf, &ep, 10); - if (ep == buf || (*ep != 0 && *ep != '\n')) + unsigned long long max; + if (strict_strtoull(buf, 10, &max)) + return -EINVAL; + if (max < mddev->resync_min) return -EINVAL; if (max < mddev->resync_max && test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) @@ -3187,7 +3288,9 @@ static struct attribute *md_redundancy_attrs[] = { &md_sync_min.attr, &md_sync_max.attr, &md_sync_speed.attr, + &md_sync_force_parallel.attr, &md_sync_completed.attr, + &md_min_sync.attr, &md_max_sync.attr, &md_suspend_lo.attr, &md_suspend_hi.attr, @@ -3292,9 +3395,9 @@ static struct kobject *md_probe(dev_t dev, int *part, void *data) disk->queue = mddev->queue; add_disk(disk); mddev->gendisk = disk; - mutex_unlock(&disks_mutex); error = kobject_init_and_add(&mddev->kobj, &md_ktype, &disk->dev.kobj, "%s", "md"); + mutex_unlock(&disks_mutex); if (error) printk(KERN_WARNING "md: cannot register %s/md - name in use\n", disk->disk_name); @@ -3691,6 +3794,8 @@ static int do_md_stop(mddev_t * mddev, int mode) module_put(mddev->pers->owner); mddev->pers = NULL; + /* tell userspace to handle 'inactive' */ + sysfs_notify(&mddev->kobj, NULL, "array_state"); set_capacity(disk, 0); mddev->changed = 1; @@ -3741,6 +3846,7 @@ static int do_md_stop(mddev_t * mddev, int mode) mddev->size = 0; mddev->raid_disks = 0; mddev->recovery_cp = 0; + mddev->resync_min = 0; mddev->resync_max = MaxSector; mddev->reshape_position = MaxSector; mddev->external = 0; @@ -3861,8 +3967,10 @@ static void autorun_devices(int part) md_probe(dev, NULL, NULL); mddev = mddev_find(dev); - if (!mddev) { - printk(KERN_ERR + if (!mddev || !mddev->gendisk) { + if (mddev) + mddev_put(mddev); + printk(KERN_ERR "md: cannot allocate memory for md drive.\n"); break; } @@ -3987,8 +4095,8 @@ static int get_bitmap_file(mddev_t * mddev, void __user * arg) if (!buf) goto out; - ptr = file_path(mddev->bitmap->file, buf, sizeof(file->pathname)); - if (!ptr) + ptr = d_path(&mddev->bitmap->file->f_path, buf, sizeof(file->pathname)); + if (IS_ERR(ptr)) goto out; strcpy(file->pathname, ptr); @@ -5399,7 +5507,7 @@ void md_done_sync(mddev_t *mddev, int blocks, int ok) atomic_sub(blocks, &mddev->recovery_active); wake_up(&mddev->recovery_wait); if (!ok) { - set_bit(MD_RECOVERY_ERR, &mddev->recovery); + set_bit(MD_RECOVERY_INTR, &mddev->recovery); md_wakeup_thread(mddev->thread); // stop recovery, signal do_sync .... } @@ -5435,8 +5543,11 @@ void md_write_start(mddev_t *mddev, struct bio *bi) md_wakeup_thread(mddev->thread); } spin_unlock_irq(&mddev->write_lock); + sysfs_notify(&mddev->kobj, NULL, "array_state"); } - wait_event(mddev->sb_wait, mddev->flags==0); + wait_event(mddev->sb_wait, + !test_bit(MD_CHANGE_CLEAN, &mddev->flags) && + !test_bit(MD_CHANGE_PENDING, &mddev->flags)); } void md_write_end(mddev_t *mddev) @@ -5461,6 +5572,8 @@ void md_allow_write(mddev_t *mddev) return; if (mddev->ro) return; + if (!mddev->pers->sync_request) + return; spin_lock_irq(&mddev->write_lock); if (mddev->in_sync) { @@ -5471,13 +5584,17 @@ void md_allow_write(mddev_t *mddev) mddev->safemode = 1; spin_unlock_irq(&mddev->write_lock); md_update_sb(mddev, 0); + + sysfs_notify(&mddev->kobj, NULL, "array_state"); + /* wait for the dirty state to be recorded in the metadata */ + wait_event(mddev->sb_wait, + !test_bit(MD_CHANGE_CLEAN, &mddev->flags) && + !test_bit(MD_CHANGE_PENDING, &mddev->flags)); } else spin_unlock_irq(&mddev->write_lock); } EXPORT_SYMBOL_GPL(md_allow_write); -static DECLARE_WAIT_QUEUE_HEAD(resync_wait); - #define SYNC_MARKS 10 #define SYNC_MARK_STEP (3*HZ) void md_do_sync(mddev_t *mddev) @@ -5541,8 +5658,9 @@ void md_do_sync(mddev_t *mddev) for_each_mddev(mddev2, tmp) { if (mddev2 == mddev) continue; - if (mddev2->curr_resync && - match_mddev_units(mddev,mddev2)) { + if (!mddev->parallel_resync + && mddev2->curr_resync + && match_mddev_units(mddev, mddev2)) { DEFINE_WAIT(wq); if (mddev < mddev2 && mddev->curr_resync == 2) { /* arbitrarily yield */ @@ -5579,9 +5697,11 @@ void md_do_sync(mddev_t *mddev) max_sectors = mddev->resync_max_sectors; mddev->resync_mismatches = 0; /* we don't use the checkpoint if there's a bitmap */ - if (!mddev->bitmap && - !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) + if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) + j = mddev->resync_min; + else if (!mddev->bitmap) j = mddev->recovery_cp; + } else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) max_sectors = mddev->size << 1; else { @@ -5622,7 +5742,6 @@ void md_do_sync(mddev_t *mddev) window/2,(unsigned long long) max_sectors/2); atomic_set(&mddev->recovery_active, 0); - init_waitqueue_head(&mddev->recovery_wait); last_check = 0; if (j>2) { @@ -5647,7 +5766,7 @@ void md_do_sync(mddev_t *mddev) sectors = mddev->pers->sync_request(mddev, j, &skipped, currspeed < speed_min(mddev)); if (sectors == 0) { - set_bit(MD_RECOVERY_ERR, &mddev->recovery); + set_bit(MD_RECOVERY_INTR, &mddev->recovery); goto out; } @@ -5670,8 +5789,7 @@ void md_do_sync(mddev_t *mddev) last_check = io_sectors; - if (test_bit(MD_RECOVERY_INTR, &mddev->recovery) || - test_bit(MD_RECOVERY_ERR, &mddev->recovery)) + if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) break; repeat: @@ -5725,8 +5843,7 @@ void md_do_sync(mddev_t *mddev) /* tell personality that we are finished */ mddev->pers->sync_request(mddev, max_sectors, &skipped, 1); - if (!test_bit(MD_RECOVERY_ERR, &mddev->recovery) && - !test_bit(MD_RECOVERY_CHECK, &mddev->recovery) && + if (!test_bit(MD_RECOVERY_CHECK, &mddev->recovery) && mddev->curr_resync > 2) { if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) { if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) { @@ -5753,6 +5870,7 @@ void md_do_sync(mddev_t *mddev) skip: mddev->curr_resync = 0; + mddev->resync_min = 0; mddev->resync_max = MaxSector; sysfs_notify(&mddev->kobj, NULL, "sync_completed"); wake_up(&resync_wait); @@ -5795,7 +5913,10 @@ static int remove_and_add_spares(mddev_t *mddev) } if (mddev->degraded) { - rdev_for_each(rdev, rtmp, mddev) + rdev_for_each(rdev, rtmp, mddev) { + if (rdev->raid_disk >= 0 && + !test_bit(In_sync, &rdev->flags)) + spares++; if (rdev->raid_disk < 0 && !test_bit(Faulty, &rdev->flags)) { rdev->recovery_offset = 0; @@ -5813,6 +5934,7 @@ static int remove_and_add_spares(mddev_t *mddev) } else break; } + } } return spares; } @@ -5826,7 +5948,7 @@ static int remove_and_add_spares(mddev_t *mddev) * to do that as needed. * When it is determined that resync is needed, we set MD_RECOVERY_RUNNING in * "->recovery" and create a thread at ->sync_thread. - * When the thread finishes it sets MD_RECOVERY_DONE (and might set MD_RECOVERY_ERR) + * When the thread finishes it sets MD_RECOVERY_DONE * and wakeups up this thread which will reap the thread and finish up. * This thread also removes any faulty devices (with nr_pending == 0). * @@ -5901,8 +6023,7 @@ void md_check_recovery(mddev_t *mddev) /* resync has finished, collect result */ md_unregister_thread(mddev->sync_thread); mddev->sync_thread = NULL; - if (!test_bit(MD_RECOVERY_ERR, &mddev->recovery) && - !test_bit(MD_RECOVERY_INTR, &mddev->recovery)) { + if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) { /* success...*/ /* activate any spares */ mddev->pers->spare_active(mddev); @@ -5926,7 +6047,6 @@ void md_check_recovery(mddev_t *mddev) * might be left set */ clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery); - clear_bit(MD_RECOVERY_ERR, &mddev->recovery); clear_bit(MD_RECOVERY_INTR, &mddev->recovery); clear_bit(MD_RECOVERY_DONE, &mddev->recovery);