*/
update_head_pos(mirror, r1_bio);
- if (uptodate || (conf->raid_disks - conf->mddev->degraded) <= 1) {
- /*
- * Set R1BIO_Uptodate in our master bio, so that
- * we will return a good error code for to the higher
- * levels even if IO on some other mirrored buffer fails.
- *
- * The 'master' represents the composite IO operation to
- * user-side. So if something waits for IO, then it will
- * wait for the 'master' bio.
+ if (uptodate)
+ set_bit(R1BIO_Uptodate, &r1_bio->state);
+ else {
+ /* If all other devices have failed, we want to return
+ * the error upwards rather than fail the last device.
+ * Here we redefine "uptodate" to mean "Don't want to retry"
*/
- if (uptodate)
- set_bit(R1BIO_Uptodate, &r1_bio->state);
+ unsigned long flags;
+ spin_lock_irqsave(&conf->device_lock, flags);
+ if (r1_bio->mddev->degraded == conf->raid_disks ||
+ (r1_bio->mddev->degraded == conf->raid_disks-1 &&
+ !test_bit(Faulty, &conf->mirrors[mirror].rdev->flags)))
+ uptodate = 1;
+ spin_unlock_irqrestore(&conf->device_lock, flags);
+ }
+ if (uptodate)
raid_end_bio_io(r1_bio);
- } else {
+ else {
/*
* oops, read error:
*/
struct bio_list bl;
struct page **behind_pages = NULL;
const int rw = bio_data_dir(bio);
+ const int do_sync = bio_sync(bio);
int do_barriers;
/*
read_bio->bi_sector = r1_bio->sector + mirror->rdev->data_offset;
read_bio->bi_bdev = mirror->rdev->bdev;
read_bio->bi_end_io = raid1_end_read_request;
- read_bio->bi_rw = READ;
+ read_bio->bi_rw = READ | do_sync;
read_bio->bi_private = r1_bio;
generic_make_request(read_bio);
mbio->bi_sector = r1_bio->sector + conf->mirrors[i].rdev->data_offset;
mbio->bi_bdev = conf->mirrors[i].rdev->bdev;
mbio->bi_end_io = raid1_end_write_request;
- mbio->bi_rw = WRITE | do_barriers;
+ mbio->bi_rw = WRITE | do_barriers | do_sync;
mbio->bi_private = r1_bio;
if (behind_pages) {
blk_plug_device(mddev->queue);
spin_unlock_irqrestore(&conf->device_lock, flags);
+ if (do_sync)
+ md_wakeup_thread(mddev->thread);
#if 0
while ((bio = bio_list_pop(&bl)) != NULL)
generic_make_request(bio);
unsigned long flags;
spin_lock_irqsave(&conf->device_lock, flags);
mddev->degraded++;
+ set_bit(Faulty, &rdev->flags);
spin_unlock_irqrestore(&conf->device_lock, flags);
/*
* if recovery is running, make sure it aborts.
*/
set_bit(MD_RECOVERY_ERR, &mddev->recovery);
- }
- set_bit(Faulty, &rdev->flags);
+ } else
+ set_bit(Faulty, &rdev->flags);
set_bit(MD_CHANGE_DEVS, &mddev->flags);
printk(KERN_ALERT "raid1: Disk failure on %s, disabling device. \n"
" Operation continuing on %d devices\n",
}
r1_bio->read_disk = primary;
for (i=0; i<mddev->raid_disks; i++)
- if (r1_bio->bios[i]->bi_end_io == end_sync_read &&
- test_bit(BIO_UPTODATE, &r1_bio->bios[i]->bi_flags)) {
+ if (r1_bio->bios[i]->bi_end_io == end_sync_read) {
int j;
int vcnt = r1_bio->sectors >> (PAGE_SHIFT- 9);
struct bio *pbio = r1_bio->bios[primary];
struct bio *sbio = r1_bio->bios[i];
- for (j = vcnt; j-- ; )
- if (memcmp(page_address(pbio->bi_io_vec[j].bv_page),
- page_address(sbio->bi_io_vec[j].bv_page),
- PAGE_SIZE))
- break;
+
+ if (test_bit(BIO_UPTODATE, &sbio->bi_flags)) {
+ for (j = vcnt; j-- ; ) {
+ struct page *p, *s;
+ p = pbio->bi_io_vec[j].bv_page;
+ s = sbio->bi_io_vec[j].bv_page;
+ if (memcmp(page_address(p),
+ page_address(s),
+ PAGE_SIZE))
+ break;
+ }
+ } else
+ j = 0;
if (j >= 0)
mddev->resync_mismatches += r1_bio->sectors;
if (j < 0 || test_bit(MD_RECOVERY_CHECK, &mddev->recovery)) {
sbio->bi_sector = r1_bio->sector +
conf->mirrors[i].rdev->data_offset;
sbio->bi_bdev = conf->mirrors[i].rdev->bdev;
+ for (j = 0; j < vcnt ; j++)
+ memcpy(page_address(sbio->bi_io_vec[j].bv_page),
+ page_address(pbio->bi_io_vec[j].bv_page),
+ PAGE_SIZE);
+
}
}
}
* We already have a nr_pending reference on these rdevs.
*/
int i;
+ const int do_sync = bio_sync(r1_bio->master_bio);
clear_bit(R1BIO_BarrierRetry, &r1_bio->state);
clear_bit(R1BIO_Barrier, &r1_bio->state);
for (i=0; i < conf->raid_disks; i++)
conf->mirrors[i].rdev->data_offset;
bio->bi_bdev = conf->mirrors[i].rdev->bdev;
bio->bi_end_io = raid1_end_write_request;
- bio->bi_rw = WRITE;
+ bio->bi_rw = WRITE | do_sync;
bio->bi_private = r1_bio;
r1_bio->bios[i] = bio;
generic_make_request(bio);
(unsigned long long)r1_bio->sector);
raid_end_bio_io(r1_bio);
} else {
+ const int do_sync = bio_sync(r1_bio->master_bio);
r1_bio->bios[r1_bio->read_disk] =
mddev->ro ? IO_BLOCKED : NULL;
r1_bio->read_disk = disk;
bio->bi_sector = r1_bio->sector + rdev->data_offset;
bio->bi_bdev = rdev->bdev;
bio->bi_end_io = raid1_end_read_request;
- bio->bi_rw = READ;
+ bio->bi_rw = READ | do_sync;
bio->bi_private = r1_bio;
unplug = 1;
generic_make_request(bio);
return -EINVAL;
}
+ md_allow_write(mddev);
+
raid_disks = mddev->raid_disks + mddev->delta_disks;
if (raid_disks < conf->raid_disks) {