Merge tag 'md-3.4-fixes' of git://neil.brown.name/md
authorLinus Torvalds <torvalds@linux-foundation.org>
Wed, 4 Apr 2012 15:31:06 +0000 (08:31 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Wed, 4 Apr 2012 15:31:06 +0000 (08:31 -0700)
Pull assorted md fixes from Neil Brown:
 - some RAID levels didn't clear up properly if md_integrity_register
  failed
 - a 'check' of RAID5/RAID6 doesn't actually read any data since a
   recent patch - so fix that (and mark for -stable)
 - a couple of other minor bugs.

* tag 'md-3.4-fixes' of git://neil.brown.name/md:
  md/raid1,raid10: don't compare excess byte during consistency check.
  md/raid5: Fix a bug about judging if the operation is syncing or replacing
  md/raid1:Remove unnecessary rcu_dereference(conf->mirrors[i].rdev).
  md: Avoid OOPS when reshaping raid1 to raid0
  md/raid5: fix handling of bad blocks during recovery.
  md/raid1: If md_integrity_register() failed,run() must free the mem
  md/raid0: If md_integrity_register() fails, raid0_run() must free the mem.
  md/linear: If md_integrity_register() fails, linear_run() must free the mem.

drivers/md/linear.c
drivers/md/raid0.c
drivers/md/raid1.c
drivers/md/raid10.c
drivers/md/raid5.c

index b0fcc7d02adb49b37275ccf9caa2c48465e886ad..fa211d80fc0a1e4e37603b1455184cbee7da6e56 100644 (file)
@@ -198,6 +198,7 @@ out:
 static int linear_run (struct mddev *mddev)
 {
        struct linear_conf *conf;
+       int ret;
 
        if (md_check_no_bitmap(mddev))
                return -EINVAL;
@@ -211,7 +212,13 @@ static int linear_run (struct mddev *mddev)
        blk_queue_merge_bvec(mddev->queue, linear_mergeable_bvec);
        mddev->queue->backing_dev_info.congested_fn = linear_congested;
        mddev->queue->backing_dev_info.congested_data = mddev;
-       return md_integrity_register(mddev);
+
+       ret =  md_integrity_register(mddev);
+       if (ret) {
+               kfree(conf);
+               mddev->private = NULL;
+       }
+       return ret;
 }
 
 static int linear_add(struct mddev *mddev, struct md_rdev *rdev)
index 6f31f5596e01e0d032f50db71077e07d3af5c3e4..de63a1fc3737b7ac2af3c0f7cbf60cd99b495a31 100644 (file)
@@ -407,6 +407,8 @@ static sector_t raid0_size(struct mddev *mddev, sector_t sectors, int raid_disks
        return array_sectors;
 }
 
+static int raid0_stop(struct mddev *mddev);
+
 static int raid0_run(struct mddev *mddev)
 {
        struct r0conf *conf;
@@ -454,7 +456,12 @@ static int raid0_run(struct mddev *mddev)
 
        blk_queue_merge_bvec(mddev->queue, raid0_mergeable_bvec);
        dump_zones(mddev);
-       return md_integrity_register(mddev);
+
+       ret = md_integrity_register(mddev);
+       if (ret)
+               raid0_stop(mddev);
+
+       return ret;
 }
 
 static int raid0_stop(struct mddev *mddev)
@@ -625,6 +632,7 @@ static void *raid0_takeover_raid10(struct mddev *mddev)
 static void *raid0_takeover_raid1(struct mddev *mddev)
 {
        struct r0conf *priv_conf;
+       int chunksect;
 
        /* Check layout:
         *  - (N - 1) mirror drives must be already faulty
@@ -635,10 +643,25 @@ static void *raid0_takeover_raid1(struct mddev *mddev)
                return ERR_PTR(-EINVAL);
        }
 
+       /*
+        * a raid1 doesn't have the notion of chunk size, so
+        * figure out the largest suitable size we can use.
+        */
+       chunksect = 64 * 2; /* 64K by default */
+
+       /* The array must be an exact multiple of chunksize */
+       while (chunksect && (mddev->array_sectors & (chunksect - 1)))
+               chunksect >>= 1;
+
+       if ((chunksect << 9) < PAGE_SIZE)
+               /* array size does not allow a suitable chunk size */
+               return ERR_PTR(-EINVAL);
+
        /* Set new parameters */
        mddev->new_level = 0;
        mddev->new_layout = 0;
-       mddev->new_chunk_sectors = 128; /* by default set chunk size to 64k */
+       mddev->new_chunk_sectors = chunksect;
+       mddev->chunk_sectors = chunksect;
        mddev->delta_disks = 1 - mddev->raid_disks;
        mddev->raid_disks = 1;
        /* make sure it will be not marked as dirty */
index 4a40a200d7696319c59cafacfffbf9044d12743c..d35e4c991e38262425cf4495ed6fe98676210ef8 100644 (file)
@@ -1738,7 +1738,7 @@ static int process_checks(struct r1bio *r1_bio)
                                s = sbio->bi_io_vec[j].bv_page;
                                if (memcmp(page_address(p),
                                           page_address(s),
-                                          PAGE_SIZE))
+                                          sbio->bi_io_vec[j].bv_len))
                                        break;
                        }
                } else
@@ -2386,8 +2386,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int *skipp
                int ok = 1;
                for (i = 0 ; i < conf->raid_disks * 2 ; i++)
                        if (r1_bio->bios[i]->bi_end_io == end_sync_write) {
-                               struct md_rdev *rdev =
-                                       rcu_dereference(conf->mirrors[i].rdev);
+                               struct md_rdev *rdev = conf->mirrors[i].rdev;
                                ok = rdev_set_badblocks(rdev, sector_nr,
                                                        min_bad, 0
                                        ) && ok;
@@ -2636,11 +2635,13 @@ static struct r1conf *setup_conf(struct mddev *mddev)
        return ERR_PTR(err);
 }
 
+static int stop(struct mddev *mddev);
 static int run(struct mddev *mddev)
 {
        struct r1conf *conf;
        int i;
        struct md_rdev *rdev;
+       int ret;
 
        if (mddev->level != 1) {
                printk(KERN_ERR "md/raid1:%s: raid level not set to mirroring (%d)\n",
@@ -2705,7 +2706,11 @@ static int run(struct mddev *mddev)
                mddev->queue->backing_dev_info.congested_data = mddev;
                blk_queue_merge_bvec(mddev->queue, raid1_mergeable_bvec);
        }
-       return md_integrity_register(mddev);
+
+       ret =  md_integrity_register(mddev);
+       if (ret)
+               stop(mddev);
+       return ret;
 }
 
 static int stop(struct mddev *mddev)
index 3540316886f2bca71a7bd943c4cf98de610ac1ae..fff782189e48f017f3407bb93ec7847a1fe367fd 100644 (file)
@@ -1821,7 +1821,7 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio)
                        for (j = 0; j < vcnt; j++)
                                if (memcmp(page_address(fbio->bi_io_vec[j].bv_page),
                                           page_address(tbio->bi_io_vec[j].bv_page),
-                                          PAGE_SIZE))
+                                          fbio->bi_io_vec[j].bv_len))
                                        break;
                        if (j == vcnt)
                                continue;
index 23ac880bba9a5cbee0533e5144f135d3eb120ace..f351422938e05f0718d9d71be8ff1b86621b948c 100644 (file)
@@ -2471,39 +2471,41 @@ handle_failed_sync(struct r5conf *conf, struct stripe_head *sh,
        int abort = 0;
        int i;
 
-       md_done_sync(conf->mddev, STRIPE_SECTORS, 0);
        clear_bit(STRIPE_SYNCING, &sh->state);
        s->syncing = 0;
        s->replacing = 0;
        /* There is nothing more to do for sync/check/repair.
+        * Don't even need to abort as that is handled elsewhere
+        * if needed, and not always wanted e.g. if there is a known
+        * bad block here.
         * For recover/replace we need to record a bad block on all
         * non-sync devices, or abort the recovery
         */
-       if (!test_bit(MD_RECOVERY_RECOVER, &conf->mddev->recovery))
-               return;
-       /* During recovery devices cannot be removed, so locking and
-        * refcounting of rdevs is not needed
-        */
-       for (i = 0; i < conf->raid_disks; i++) {
-               struct md_rdev *rdev = conf->disks[i].rdev;
-               if (rdev
-                   && !test_bit(Faulty, &rdev->flags)
-                   && !test_bit(In_sync, &rdev->flags)
-                   && !rdev_set_badblocks(rdev, sh->sector,
-                                          STRIPE_SECTORS, 0))
-                       abort = 1;
-               rdev = conf->disks[i].replacement;
-               if (rdev
-                   && !test_bit(Faulty, &rdev->flags)
-                   && !test_bit(In_sync, &rdev->flags)
-                   && !rdev_set_badblocks(rdev, sh->sector,
-                                          STRIPE_SECTORS, 0))
-                       abort = 1;
-       }
-       if (abort) {
-               conf->recovery_disabled = conf->mddev->recovery_disabled;
-               set_bit(MD_RECOVERY_INTR, &conf->mddev->recovery);
+       if (test_bit(MD_RECOVERY_RECOVER, &conf->mddev->recovery)) {
+               /* During recovery devices cannot be removed, so
+                * locking and refcounting of rdevs is not needed
+                */
+               for (i = 0; i < conf->raid_disks; i++) {
+                       struct md_rdev *rdev = conf->disks[i].rdev;
+                       if (rdev
+                           && !test_bit(Faulty, &rdev->flags)
+                           && !test_bit(In_sync, &rdev->flags)
+                           && !rdev_set_badblocks(rdev, sh->sector,
+                                                  STRIPE_SECTORS, 0))
+                               abort = 1;
+                       rdev = conf->disks[i].replacement;
+                       if (rdev
+                           && !test_bit(Faulty, &rdev->flags)
+                           && !test_bit(In_sync, &rdev->flags)
+                           && !rdev_set_badblocks(rdev, sh->sector,
+                                                  STRIPE_SECTORS, 0))
+                               abort = 1;
+               }
+               if (abort)
+                       conf->recovery_disabled =
+                               conf->mddev->recovery_disabled;
        }
+       md_done_sync(conf->mddev, STRIPE_SECTORS, !abort);
 }
 
 static int want_replace(struct stripe_head *sh, int disk_idx)
@@ -3203,7 +3205,8 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s)
                        /* Not in-sync */;
                else if (is_bad) {
                        /* also not in-sync */
-                       if (!test_bit(WriteErrorSeen, &rdev->flags)) {
+                       if (!test_bit(WriteErrorSeen, &rdev->flags) &&
+                           test_bit(R5_UPTODATE, &dev->flags)) {
                                /* treat as in-sync, but with a read error
                                 * which we can now try to correct
                                 */
@@ -3276,12 +3279,14 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s)
                /* If there is a failed device being replaced,
                 *     we must be recovering.
                 * else if we are after recovery_cp, we must be syncing
+                * else if MD_RECOVERY_REQUESTED is set, we also are syncing.
                 * else we can only be replacing
                 * sync and recovery both need to read all devices, and so
                 * use the same flag.
                 */
                if (do_recovery ||
-                   sh->sector >= conf->mddev->recovery_cp)
+                   sh->sector >= conf->mddev->recovery_cp ||
+                   test_bit(MD_RECOVERY_REQUESTED, &(conf->mddev->recovery)))
                        s->syncing = 1;
                else
                        s->replacing = 1;