INIT_LIST_HEAD(&new->all_mddevs);
init_timer(&new->safemode_timer);
atomic_set(&new->active, 1);
- bio_list_init(&new->write_list);
spin_lock_init(&new->write_lock);
+ init_waitqueue_head(&new->sb_wait);
new->queue = blk_alloc_queue(GFP_KERNEL);
if (!new->queue) {
mdp_disk_t *desc;
mdp_super_t *sb = (mdp_super_t *)page_address(rdev->sb_page);
+ rdev->raid_disk = -1;
+ rdev->in_sync = 0;
if (mddev->raid_disks == 0) {
mddev->major_version = 0;
mddev->minor_version = sb->minor_version;
memcpy(mddev->uuid+12,&sb->set_uuid3, 4);
mddev->max_disks = MD_SB_DISKS;
- } else {
- __u64 ev1;
- ev1 = md_event(sb);
+ } else if (mddev->pers == NULL) {
+ /* Insist on good event counter while assembling */
+ __u64 ev1 = md_event(sb);
++ev1;
if (ev1 < mddev->events)
return -EINVAL;
- }
+ } else if (mddev->bitmap) {
+ /* if adding to array with a bitmap, then we can accept an
+ * older device ... but not too old.
+ */
+ __u64 ev1 = md_event(sb);
+ if (ev1 < mddev->bitmap->events_cleared)
+ return 0;
+ } else /* just a hot-add of a new device, leave raid_disk at -1 */
+ return 0;
+
if (mddev->level != LEVEL_MULTIPATH) {
- rdev->raid_disk = -1;
- rdev->in_sync = rdev->faulty = 0;
+ rdev->faulty = 0;
desc = sb->disks + rdev->desc_nr;
if (desc->state & (1<<MD_DISK_FAULTY))
rdev->in_sync = 1;
rdev->raid_disk = desc->raid_disk;
}
- }
+ } else /* MULTIPATH are always insync */
+ rdev->in_sync = 1;
return 0;
}
{
struct mdp_superblock_1 *sb = (struct mdp_superblock_1*)page_address(rdev->sb_page);
+ rdev->raid_disk = -1;
+ rdev->in_sync = 0;
if (mddev->raid_disks == 0) {
mddev->major_version = 1;
mddev->patch_version = 0;
memcpy(mddev->uuid, sb->set_uuid, 16);
mddev->max_disks = (4096-256)/2;
- } else {
- __u64 ev1;
- ev1 = le64_to_cpu(sb->events);
+ } else if (mddev->pers == NULL) {
+ /* Insist of good event counter while assembling */
+ __u64 ev1 = le64_to_cpu(sb->events);
++ev1;
if (ev1 < mddev->events)
return -EINVAL;
- }
+ } else if (mddev->bitmap) {
+ /* If adding to array with a bitmap, then we can accept an
+ * older device, but not too old.
+ */
+ __u64 ev1 = le64_to_cpu(sb->events);
+ if (ev1 < mddev->bitmap->events_cleared)
+ return 0;
+ } else /* just a hot-add of a new device, leave raid_disk at -1 */
+ return 0;
if (mddev->level != LEVEL_MULTIPATH) {
int role;
role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]);
switch(role) {
case 0xffff: /* spare */
- rdev->in_sync = 0;
rdev->faulty = 0;
- rdev->raid_disk = -1;
break;
case 0xfffe: /* faulty */
- rdev->in_sync = 0;
rdev->faulty = 1;
- rdev->raid_disk = -1;
break;
default:
rdev->in_sync = 1;
rdev->raid_disk = role;
break;
}
- }
+ } else /* MULTIPATH are always insync */
+ rdev->in_sync = 1;
+
return 0;
}
if (!mddev->persistent) {
mddev->sb_dirty = 0;
spin_unlock(&mddev->write_lock);
+ wake_up(&mddev->sb_wait);
return;
}
spin_unlock(&mddev->write_lock);
}
mddev->sb_dirty = 0;
spin_unlock(&mddev->write_lock);
+ wake_up(&mddev->sb_wait);
}
PTR_ERR(rdev));
return PTR_ERR(rdev);
}
+ /* set save_raid_disk if appropriate */
+ if (!mddev->persistent) {
+ if (info->state & (1<<MD_DISK_SYNC) &&
+ info->raid_disk < mddev->raid_disks)
+ rdev->raid_disk = info->raid_disk;
+ else
+ rdev->raid_disk = -1;
+ } else
+ super_types[mddev->major_version].
+ validate_super(mddev, rdev);
+ rdev->saved_raid_disk = rdev->raid_disk;
+
rdev->in_sync = 0; /* just to be sure */
rdev->raid_disk = -1;
err = bind_rdev_to_array(rdev, mddev);
/* md_write_start(mddev, bi)
* If we need to update some array metadata (e.g. 'active' flag
- * in superblock) before writing, queue bi for later writing
- * and return 0, else return 1 and it will be written now
+ * in superblock) before writing, schedule a superblock update
+ * and wait for it to complete.
*/
-int md_write_start(mddev_t *mddev, struct bio *bi)
+void md_write_start(mddev_t *mddev, struct bio *bi)
{
+ DEFINE_WAIT(w);
if (bio_data_dir(bi) != WRITE)
- return 1;
+ return;
atomic_inc(&mddev->writes_pending);
- spin_lock(&mddev->write_lock);
- if (mddev->in_sync == 0 && mddev->sb_dirty == 0) {
- spin_unlock(&mddev->write_lock);
- return 1;
- }
- bio_list_add(&mddev->write_list, bi);
-
if (mddev->in_sync) {
- mddev->in_sync = 0;
- mddev->sb_dirty = 1;
+ spin_lock(&mddev->write_lock);
+ if (mddev->in_sync) {
+ mddev->in_sync = 0;
+ mddev->sb_dirty = 1;
+ md_wakeup_thread(mddev->thread);
+ }
+ spin_unlock(&mddev->write_lock);
}
- spin_unlock(&mddev->write_lock);
- md_wakeup_thread(mddev->thread);
- return 0;
+ wait_event(mddev->sb_wait, mddev->sb_dirty==0);
}
void md_write_end(mddev_t *mddev)
mddev->sb_dirty ||
test_bit(MD_RECOVERY_NEEDED, &mddev->recovery) ||
test_bit(MD_RECOVERY_DONE, &mddev->recovery) ||
- mddev->write_list.head ||
(mddev->safemode == 1) ||
(mddev->safemode == 2 && ! atomic_read(&mddev->writes_pending)
&& !mddev->in_sync && mddev->recovery_cp == MaxSector)
if (mddev_trylock(mddev)==0) {
int spares =0;
- struct bio *blist;
spin_lock(&mddev->write_lock);
if (mddev->safemode && !atomic_read(&mddev->writes_pending) &&
}
if (mddev->safemode == 1)
mddev->safemode = 0;
- blist = bio_list_get(&mddev->write_list);
spin_unlock(&mddev->write_lock);
if (mddev->sb_dirty)
md_update_sb(mddev);
- while (blist) {
- struct bio *b = blist;
- blist = blist->bi_next;
- b->bi_next = NULL;
- generic_make_request(b);
- /* we already counted this, so need to un-count */
- md_write_end(mddev);
- }
-
if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) &&
!test_bit(MD_RECOVERY_DONE, &mddev->recovery)) {
mddev->pers->spare_active(mddev);
}
md_update_sb(mddev);
+
+ /* if array is no-longer degraded, then any saved_raid_disk
+ * information must be scrapped
+ */
+ if (!mddev->degraded)
+ ITERATE_RDEV(mddev,rdev,rtmp)
+ rdev->saved_raid_disk = -1;
+
mddev->recovery = 0;
/* flag recovery needed just to double check */
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);