]> pilppa.org Git - linux-2.6-omap-h63xx.git/blobdiff - drivers/md/raid5.c
md: replace STRIPE_OP_CHECK with 'check_states'
[linux-2.6-omap-h63xx.git] / drivers / md / raid5.c
index 8c4e6149daea1b0c14fed6ddeb841481866fd7bc..544e1600f20828ed24532787ef06b14b69456803 100644 (file)
@@ -373,8 +373,6 @@ static unsigned long get_stripe_work(struct stripe_head *sh)
        test_and_ack_op(STRIPE_OP_BIODRAIN, pending);
        test_and_ack_op(STRIPE_OP_POSTXOR, pending);
        test_and_ack_op(STRIPE_OP_CHECK, pending);
-       if (test_and_clear_bit(STRIPE_OP_IO, &sh->ops.pending))
-               ack++;
 
        sh->ops.count -= ack;
        if (unlikely(sh->ops.count < 0)) {
@@ -392,14 +390,13 @@ raid5_end_read_request(struct bio *bi, int error);
 static void
 raid5_end_write_request(struct bio *bi, int error);
 
-static void ops_run_io(struct stripe_head *sh)
+static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
 {
        raid5_conf_t *conf = sh->raid_conf;
        int i, disks = sh->disks;
 
        might_sleep();
 
-       set_bit(STRIPE_IO_STARTED, &sh->state);
        for (i = disks; i--; ) {
                int rw;
                struct bio *bi;
@@ -428,11 +425,11 @@ static void ops_run_io(struct stripe_head *sh)
                rcu_read_unlock();
 
                if (rdev) {
-                       if (test_bit(STRIPE_SYNCING, &sh->state) ||
-                               test_bit(STRIPE_EXPAND_SOURCE, &sh->state) ||
-                               test_bit(STRIPE_EXPAND_READY, &sh->state))
+                       if (s->syncing || s->expanding || s->expanded)
                                md_sync_acct(rdev->bdev, STRIPE_SECTORS);
 
+                       set_bit(STRIPE_IO_STARTED, &sh->state);
+
                        bi->bi_bdev = rdev->bdev;
                        pr_debug("%s: for %llu schedule op %ld on disc %d\n",
                                __func__, (unsigned long long)sh->sector,
@@ -608,7 +605,11 @@ static void ops_complete_compute5(void *stripe_head_ref)
        set_bit(R5_UPTODATE, &tgt->flags);
        BUG_ON(!test_bit(R5_Wantcompute, &tgt->flags));
        clear_bit(R5_Wantcompute, &tgt->flags);
-       set_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.complete);
+       clear_bit(STRIPE_COMPUTE_RUN, &sh->state);
+       if (sh->check_state == check_state_compute_run)
+               sh->check_state = check_state_compute_result;
+       else
+               set_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.complete);
        set_bit(STRIPE_HANDLE, &sh->state);
        release_stripe(sh);
 }
@@ -837,16 +838,11 @@ ops_run_postxor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx,
 static void ops_complete_check(void *stripe_head_ref)
 {
        struct stripe_head *sh = stripe_head_ref;
-       int pd_idx = sh->pd_idx;
 
        pr_debug("%s: stripe %llu\n", __func__,
                (unsigned long long)sh->sector);
 
-       if (test_and_clear_bit(STRIPE_OP_MOD_DMA_CHECK, &sh->ops.pending) &&
-               sh->ops.zero_sum_result == 0)
-               set_bit(R5_UPTODATE, &sh->dev[pd_idx].flags);
-
-       set_bit(STRIPE_OP_CHECK, &sh->ops.complete);
+       sh->check_state = check_state_check_result;
        set_bit(STRIPE_HANDLE, &sh->state);
        release_stripe(sh);
 }
@@ -873,17 +869,13 @@ static void ops_run_check(struct stripe_head *sh)
        tx = async_xor_zero_sum(xor_dest, xor_srcs, 0, count, STRIPE_SIZE,
                &sh->ops.zero_sum_result, 0, NULL, NULL, NULL);
 
-       if (tx)
-               set_bit(STRIPE_OP_MOD_DMA_CHECK, &sh->ops.pending);
-       else
-               clear_bit(STRIPE_OP_MOD_DMA_CHECK, &sh->ops.pending);
-
        atomic_inc(&sh->count);
        tx = async_trigger_callback(ASYNC_TX_DEP_ACK | ASYNC_TX_ACK, tx,
                ops_complete_check, sh);
 }
 
-static void raid5_run_ops(struct stripe_head *sh, unsigned long pending)
+static void raid5_run_ops(struct stripe_head *sh, unsigned long pending,
+                         unsigned long ops_request)
 {
        int overlap_clear = 0, i, disks = sh->disks;
        struct dma_async_tx_descriptor *tx = NULL;
@@ -893,7 +885,8 @@ static void raid5_run_ops(struct stripe_head *sh, unsigned long pending)
                overlap_clear++;
        }
 
-       if (test_bit(STRIPE_OP_COMPUTE_BLK, &pending))
+       if (test_bit(STRIPE_OP_COMPUTE_BLK, &pending) ||
+           test_bit(STRIPE_OP_COMPUTE_BLK, &ops_request))
                tx = ops_run_compute5(sh, pending);
 
        if (test_bit(STRIPE_OP_PREXOR, &pending))
@@ -907,12 +900,9 @@ static void raid5_run_ops(struct stripe_head *sh, unsigned long pending)
        if (test_bit(STRIPE_OP_POSTXOR, &pending))
                ops_run_postxor(sh, tx, pending);
 
-       if (test_bit(STRIPE_OP_CHECK, &pending))
+       if (test_bit(STRIPE_OP_CHECK, &ops_request))
                ops_run_check(sh);
 
-       if (test_bit(STRIPE_OP_IO, &pending))
-               ops_run_io(sh);
-
        if (overlap_clear)
                for (i = disks; i--; ) {
                        struct r5dev *dev = &sh->dev[i];
@@ -1977,8 +1967,7 @@ static int __handle_issuing_new_read_requests5(struct stripe_head *sh,
        /* don't schedule compute operations or reads on the parity block while
         * a check is in flight
         */
-       if ((disk_idx == sh->pd_idx) &&
-            test_bit(STRIPE_OP_CHECK, &sh->ops.pending))
+       if (disk_idx == sh->pd_idx && sh->check_state)
                return ~0;
 
        /* is the data in this block needed, and can we get it? */
@@ -1999,9 +1988,8 @@ static int __handle_issuing_new_read_requests5(struct stripe_head *sh,
                 * 3/ We hold off parity block re-reads until check operations
                 * have quiesced.
                 */
-               if ((s->uptodate == disks - 1) &&
-                   (s->failed && disk_idx == s->failed_num) &&
-                   !test_bit(STRIPE_OP_CHECK, &sh->ops.pending)) {
+               if ((s->uptodate == disks - 1) && !sh->check_state &&
+                   (s->failed && disk_idx == s->failed_num)) {
                        set_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending);
                        set_bit(R5_Wantcompute, &dev->flags);
                        sh->ops.target = disk_idx;
@@ -2023,8 +2011,6 @@ static int __handle_issuing_new_read_requests5(struct stripe_head *sh,
                         */
                        set_bit(R5_LOCKED, &dev->flags);
                        set_bit(R5_Wantread, &dev->flags);
-                       if (!test_and_set_bit(STRIPE_OP_IO, &sh->ops.pending))
-                               sh->ops.count++;
                        s->locked++;
                        pr_debug("Reading block %d (sync=%d)\n", disk_idx,
                                s->syncing);
@@ -2039,12 +2025,8 @@ static void handle_issuing_new_read_requests5(struct stripe_head *sh,
 {
        int i;
 
-       /* Clear completed compute operations.  Parity recovery
-        * (STRIPE_OP_MOD_REPAIR_PD) implies a write-back which is handled
-        * later on in this routine
-        */
-       if (test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.complete) &&
-               !test_bit(STRIPE_OP_MOD_REPAIR_PD, &sh->ops.pending)) {
+       /* Clear completed compute operations */
+       if (test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.complete)) {
                clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.complete);
                clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.ack);
                clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending);
@@ -2218,9 +2200,6 @@ static void handle_issuing_new_write_requests5(raid5_conf_t *conf,
                                                "%d for r-m-w\n", i);
                                        set_bit(R5_LOCKED, &dev->flags);
                                        set_bit(R5_Wantread, &dev->flags);
-                                       if (!test_and_set_bit(
-                                               STRIPE_OP_IO, &sh->ops.pending))
-                                               sh->ops.count++;
                                        s->locked++;
                                } else {
                                        set_bit(STRIPE_DELAYED, &sh->state);
@@ -2244,9 +2223,6 @@ static void handle_issuing_new_write_requests5(raid5_conf_t *conf,
                                                "%d for Reconstruct\n", i);
                                        set_bit(R5_LOCKED, &dev->flags);
                                        set_bit(R5_Wantread, &dev->flags);
-                                       if (!test_and_set_bit(
-                                               STRIPE_OP_IO, &sh->ops.pending))
-                                               sh->ops.count++;
                                        s->locked++;
                                } else {
                                        set_bit(STRIPE_DELAYED, &sh->state);
@@ -2374,92 +2350,85 @@ static void handle_issuing_new_write_requests6(raid5_conf_t *conf,
 static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh,
                                struct stripe_head_state *s, int disks)
 {
-       int canceled_check = 0;
+       struct r5dev *dev = NULL;
 
        set_bit(STRIPE_HANDLE, &sh->state);
 
-       /* complete a check operation */
-       if (test_and_clear_bit(STRIPE_OP_CHECK, &sh->ops.complete)) {
-               clear_bit(STRIPE_OP_CHECK, &sh->ops.ack);
-               clear_bit(STRIPE_OP_CHECK, &sh->ops.pending);
+       switch (sh->check_state) {
+       case check_state_idle:
+               /* start a new check operation if there are no failures */
                if (s->failed == 0) {
-                       if (sh->ops.zero_sum_result == 0)
-                               /* parity is correct (on disc,
-                                * not in buffer any more)
-                                */
-                               set_bit(STRIPE_INSYNC, &sh->state);
-                       else {
-                               conf->mddev->resync_mismatches +=
-                                       STRIPE_SECTORS;
-                               if (test_bit(
-                                    MD_RECOVERY_CHECK, &conf->mddev->recovery))
-                                       /* don't try to repair!! */
-                                       set_bit(STRIPE_INSYNC, &sh->state);
-                               else {
-                                       set_bit(STRIPE_OP_COMPUTE_BLK,
-                                               &sh->ops.pending);
-                                       set_bit(STRIPE_OP_MOD_REPAIR_PD,
-                                               &sh->ops.pending);
-                                       set_bit(R5_Wantcompute,
-                                               &sh->dev[sh->pd_idx].flags);
-                                       sh->ops.target = sh->pd_idx;
-                                       sh->ops.count++;
-                                       s->uptodate++;
-                               }
-                       }
-               } else
-                       canceled_check = 1; /* STRIPE_INSYNC is not set */
-       }
-
-       /* start a new check operation if there are no failures, the stripe is
-        * not insync, and a repair is not in flight
-        */
-       if (s->failed == 0 &&
-           !test_bit(STRIPE_INSYNC, &sh->state) &&
-           !test_bit(STRIPE_OP_MOD_REPAIR_PD, &sh->ops.pending)) {
-               if (!test_and_set_bit(STRIPE_OP_CHECK, &sh->ops.pending)) {
                        BUG_ON(s->uptodate != disks);
+                       sh->check_state = check_state_run;
+                       set_bit(STRIPE_OP_CHECK, &s->ops_request);
                        clear_bit(R5_UPTODATE, &sh->dev[sh->pd_idx].flags);
-                       sh->ops.count++;
                        s->uptodate--;
+                       break;
                }
-       }
-
-       /* check if we can clear a parity disk reconstruct */
-       if (test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.complete) &&
-           test_bit(STRIPE_OP_MOD_REPAIR_PD, &sh->ops.pending)) {
-
-               clear_bit(STRIPE_OP_MOD_REPAIR_PD, &sh->ops.pending);
-               clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.complete);
-               clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.ack);
-               clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending);
-       }
-
+               dev = &sh->dev[s->failed_num];
+               /* fall through */
+       case check_state_compute_result:
+               sh->check_state = check_state_idle;
+               if (!dev)
+                       dev = &sh->dev[sh->pd_idx];
+
+               /* check that a write has not made the stripe insync */
+               if (test_bit(STRIPE_INSYNC, &sh->state))
+                       break;
 
-       /* Wait for check parity and compute block operations to complete
-        * before write-back.  If a failure occurred while the check operation
-        * was in flight we need to cycle this stripe through handle_stripe
-        * since the parity block may not be uptodate
-        */
-       if (!canceled_check && !test_bit(STRIPE_INSYNC, &sh->state) &&
-           !test_bit(STRIPE_OP_CHECK, &sh->ops.pending) &&
-           !test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending)) {
-               struct r5dev *dev;
                /* either failed parity check, or recovery is happening */
-               if (s->failed == 0)
-                       s->failed_num = sh->pd_idx;
-               dev = &sh->dev[s->failed_num];
                BUG_ON(!test_bit(R5_UPTODATE, &dev->flags));
                BUG_ON(s->uptodate != disks);
 
                set_bit(R5_LOCKED, &dev->flags);
+               s->locked++;
                set_bit(R5_Wantwrite, &dev->flags);
-               if (!test_and_set_bit(STRIPE_OP_IO, &sh->ops.pending))
-                       sh->ops.count++;
 
                clear_bit(STRIPE_DEGRADED, &sh->state);
-               s->locked++;
                set_bit(STRIPE_INSYNC, &sh->state);
+               break;
+       case check_state_run:
+               break; /* we will be called again upon completion */
+       case check_state_check_result:
+               sh->check_state = check_state_idle;
+
+               /* if a failure occurred during the check operation, leave
+                * STRIPE_INSYNC not set and let the stripe be handled again
+                */
+               if (s->failed)
+                       break;
+
+               /* handle a successful check operation, if parity is correct
+                * we are done.  Otherwise update the mismatch count and repair
+                * parity if !MD_RECOVERY_CHECK
+                */
+               if (sh->ops.zero_sum_result == 0)
+                       /* parity is correct (on disc,
+                        * not in buffer any more)
+                        */
+                       set_bit(STRIPE_INSYNC, &sh->state);
+               else {
+                       conf->mddev->resync_mismatches += STRIPE_SECTORS;
+                       if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery))
+                               /* don't try to repair!! */
+                               set_bit(STRIPE_INSYNC, &sh->state);
+                       else {
+                               sh->check_state = check_state_compute_run;
+                               set_bit(STRIPE_OP_COMPUTE_BLK, &s->ops_request);
+                               set_bit(R5_Wantcompute,
+                                       &sh->dev[sh->pd_idx].flags);
+                               sh->ops.target = sh->pd_idx;
+                               s->uptodate++;
+                       }
+               }
+               break;
+       case check_state_compute_run:
+               break;
+       default:
+               printk(KERN_ERR "%s: unknown check_state: %d sector: %llu\n",
+                      __func__, sh->check_state,
+                      (unsigned long long) sh->sector);
+               BUG();
        }
 }
 
@@ -2811,9 +2780,6 @@ static void handle_stripe5(struct stripe_head *sh)
                                (i == sh->pd_idx || dev->written)) {
                                pr_debug("Writing block %d\n", i);
                                set_bit(R5_Wantwrite, &dev->flags);
-                               if (!test_and_set_bit(
-                                   STRIPE_OP_IO, &sh->ops.pending))
-                                       sh->ops.count++;
                                if (prexor)
                                        continue;
                                if (!test_bit(R5_Insync, &dev->flags) ||
@@ -2836,7 +2802,7 @@ static void handle_stripe5(struct stripe_head *sh)
         *    block.
         */
        if (s.to_write && !test_bit(STRIPE_OP_POSTXOR, &sh->ops.pending) &&
-                         !test_bit(STRIPE_OP_CHECK, &sh->ops.pending))
+           !sh->check_state)
                handle_issuing_new_write_requests5(conf, sh, &s, disks);
 
        /* maybe we need to check and possibly fix the parity for this stripe
@@ -2844,11 +2810,10 @@ static void handle_stripe5(struct stripe_head *sh)
         * data is available.  The parity check is held off while parity
         * dependent operations are in flight.
         */
-       if ((s.syncing && s.locked == 0 &&
+       if (sh->check_state ||
+           (s.syncing && s.locked == 0 &&
             !test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending) &&
-            !test_bit(STRIPE_INSYNC, &sh->state)) ||
-             test_bit(STRIPE_OP_CHECK, &sh->ops.pending) ||
-             test_bit(STRIPE_OP_MOD_REPAIR_PD, &sh->ops.pending))
+            !test_bit(STRIPE_INSYNC, &sh->state)))
                handle_parity_checks5(conf, sh, &s, disks);
 
        if (s.syncing && s.locked == 0 && test_bit(STRIPE_INSYNC, &sh->state)) {
@@ -2867,16 +2832,12 @@ static void handle_stripe5(struct stripe_head *sh)
                dev = &sh->dev[s.failed_num];
                if (!test_bit(R5_ReWrite, &dev->flags)) {
                        set_bit(R5_Wantwrite, &dev->flags);
-                       if (!test_and_set_bit(STRIPE_OP_IO, &sh->ops.pending))
-                               sh->ops.count++;
                        set_bit(R5_ReWrite, &dev->flags);
                        set_bit(R5_LOCKED, &dev->flags);
                        s.locked++;
                } else {
                        /* let's read it back */
                        set_bit(R5_Wantread, &dev->flags);
-                       if (!test_and_set_bit(STRIPE_OP_IO, &sh->ops.pending))
-                               sh->ops.count++;
                        set_bit(R5_LOCKED, &dev->flags);
                        s.locked++;
                }
@@ -2894,13 +2855,10 @@ static void handle_stripe5(struct stripe_head *sh)
                clear_bit(STRIPE_OP_POSTXOR, &sh->ops.ack);
                clear_bit(STRIPE_OP_POSTXOR, &sh->ops.complete);
 
-               for (i = conf->raid_disks; i--; ) {
+               for (i = conf->raid_disks; i--; )
                        set_bit(R5_Wantwrite, &sh->dev[i].flags);
                        set_bit(R5_LOCKED, &dev->flags);
                        s.locked++;
-                       if (!test_and_set_bit(STRIPE_OP_IO, &sh->ops.pending))
-                               sh->ops.count++;
-               }
        }
 
        if (s.expanded && test_bit(STRIPE_EXPANDING, &sh->state) &&
@@ -2933,11 +2891,12 @@ static void handle_stripe5(struct stripe_head *sh)
        if (unlikely(blocked_rdev))
                md_wait_for_blocked_rdev(blocked_rdev, conf->mddev);
 
-       if (pending)
-               raid5_run_ops(sh, pending);
+       if (pending || s.ops_request)
+               raid5_run_ops(sh, pending, s.ops_request);
 
-       return_io(return_bi);
+       ops_run_io(sh, &s);
 
+       return_io(return_bi);
 }
 
 static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
@@ -3149,68 +3108,9 @@ static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
        if (unlikely(blocked_rdev))
                md_wait_for_blocked_rdev(blocked_rdev, conf->mddev);
 
-       return_io(return_bi);
-
-       for (i=disks; i-- ;) {
-               int rw;
-               struct bio *bi;
-               mdk_rdev_t *rdev;
-               if (test_and_clear_bit(R5_Wantwrite, &sh->dev[i].flags))
-                       rw = WRITE;
-               else if (test_and_clear_bit(R5_Wantread, &sh->dev[i].flags))
-                       rw = READ;
-               else
-                       continue;
-
-               set_bit(STRIPE_IO_STARTED, &sh->state);
-
-               bi = &sh->dev[i].req;
-
-               bi->bi_rw = rw;
-               if (rw == WRITE)
-                       bi->bi_end_io = raid5_end_write_request;
-               else
-                       bi->bi_end_io = raid5_end_read_request;
-
-               rcu_read_lock();
-               rdev = rcu_dereference(conf->disks[i].rdev);
-               if (rdev && test_bit(Faulty, &rdev->flags))
-                       rdev = NULL;
-               if (rdev)
-                       atomic_inc(&rdev->nr_pending);
-               rcu_read_unlock();
+       ops_run_io(sh, &s);
 
-               if (rdev) {
-                       if (s.syncing || s.expanding || s.expanded)
-                               md_sync_acct(rdev->bdev, STRIPE_SECTORS);
-
-                       bi->bi_bdev = rdev->bdev;
-                       pr_debug("for %llu schedule op %ld on disc %d\n",
-                               (unsigned long long)sh->sector, bi->bi_rw, i);
-                       atomic_inc(&sh->count);
-                       bi->bi_sector = sh->sector + rdev->data_offset;
-                       bi->bi_flags = 1 << BIO_UPTODATE;
-                       bi->bi_vcnt = 1;
-                       bi->bi_max_vecs = 1;
-                       bi->bi_idx = 0;
-                       bi->bi_io_vec = &sh->dev[i].vec;
-                       bi->bi_io_vec[0].bv_len = STRIPE_SIZE;
-                       bi->bi_io_vec[0].bv_offset = 0;
-                       bi->bi_size = STRIPE_SIZE;
-                       bi->bi_next = NULL;
-                       if (rw == WRITE &&
-                           test_bit(R5_ReWrite, &sh->dev[i].flags))
-                               atomic_add(STRIPE_SECTORS, &rdev->corrected_errors);
-                       generic_make_request(bi);
-               } else {
-                       if (rw == WRITE)
-                               set_bit(STRIPE_DEGRADED, &sh->state);
-                       pr_debug("skip op %ld on disc %d for sector %llu\n",
-                               bi->bi_rw, i, (unsigned long long)sh->sector);
-                       clear_bit(R5_LOCKED, &sh->dev[i].flags);
-                       set_bit(STRIPE_HANDLE, &sh->state);
-               }
-       }
+       return_io(return_bi);
 }
 
 static void handle_stripe(struct stripe_head *sh, struct page *tmp_page)