Merge tag 'md-3.9-fixes' of git://neil.brown.name/md
authorLinus Torvalds <torvalds@linux-foundation.org>
Sat, 23 Mar 2013 22:49:49 +0000 (15:49 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Sat, 23 Mar 2013 22:49:49 +0000 (15:49 -0700)
Pull md fixes from NeilBrown:
 "A few bugfixes for md

   - recent regressions in raid5
   - recent regressions in dmraid
   - a few instances of CONFIG_MULTICORE_RAID456 linger

  Several tagged for -stable"

* tag 'md-3.9-fixes' of git://neil.brown.name/md:
  md: remove CONFIG_MULTICORE_RAID456 entirely
  md/raid5: ensure sync and DISCARD don't happen at the same time.
  MD: Prevent sysfs operations on uninitialized kobjects
  MD RAID5: Avoid accessing gendisk or queue structs when not available
  md/raid5: schedule_construction should abort if nothing to do.

arch/tile/configs/tilegx_defconfig
arch/tile/configs/tilepro_defconfig
drivers/md/md.c
drivers/md/md.h
drivers/md/raid5.c
drivers/md/raid5.h

index 8c5eff6..4768481 100644 (file)
@@ -330,7 +330,6 @@ CONFIG_MD_RAID0=m
 CONFIG_MD_RAID1=m
 CONFIG_MD_RAID10=m
 CONFIG_MD_RAID456=m
-CONFIG_MULTICORE_RAID456=y
 CONFIG_MD_FAULTY=m
 CONFIG_BLK_DEV_DM=m
 CONFIG_DM_DEBUG=y
index e7a3dfc..dd2b8f0 100644 (file)
@@ -324,7 +324,6 @@ CONFIG_MD_RAID0=m
 CONFIG_MD_RAID1=m
 CONFIG_MD_RAID10=m
 CONFIG_MD_RAID456=m
-CONFIG_MULTICORE_RAID456=y
 CONFIG_MD_FAULTY=m
 CONFIG_BLK_DEV_DM=m
 CONFIG_DM_DEBUG=y
index fcb878f..aeceedf 100644 (file)
@@ -7663,10 +7663,8 @@ static int remove_and_add_spares(struct mddev *mddev)
                                removed++;
                        }
                }
-       if (removed)
-               sysfs_notify(&mddev->kobj, NULL,
-                            "degraded");
-
+       if (removed && mddev->kobj.sd)
+               sysfs_notify(&mddev->kobj, NULL, "degraded");
 
        rdev_for_each(rdev, mddev) {
                if (rdev->raid_disk >= 0 &&
index eca59c3..d90fb1a 100644 (file)
@@ -506,7 +506,7 @@ static inline char * mdname (struct mddev * mddev)
 static inline int sysfs_link_rdev(struct mddev *mddev, struct md_rdev *rdev)
 {
        char nm[20];
-       if (!test_bit(Replacement, &rdev->flags)) {
+       if (!test_bit(Replacement, &rdev->flags) && mddev->kobj.sd) {
                sprintf(nm, "rd%d", rdev->raid_disk);
                return sysfs_create_link(&mddev->kobj, &rdev->kobj, nm);
        } else
@@ -516,7 +516,7 @@ static inline int sysfs_link_rdev(struct mddev *mddev, struct md_rdev *rdev)
 static inline void sysfs_unlink_rdev(struct mddev *mddev, struct md_rdev *rdev)
 {
        char nm[20];
-       if (!test_bit(Replacement, &rdev->flags)) {
+       if (!test_bit(Replacement, &rdev->flags) && mddev->kobj.sd) {
                sprintf(nm, "rd%d", rdev->raid_disk);
                sysfs_remove_link(&mddev->kobj, nm);
        }
index 3ee2912..24909eb 100644 (file)
@@ -671,9 +671,11 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
                        bi->bi_next = NULL;
                        if (rrdev)
                                set_bit(R5_DOUBLE_LOCKED, &sh->dev[i].flags);
-                       trace_block_bio_remap(bdev_get_queue(bi->bi_bdev),
-                                             bi, disk_devt(conf->mddev->gendisk),
-                                             sh->dev[i].sector);
+
+                       if (conf->mddev->gendisk)
+                               trace_block_bio_remap(bdev_get_queue(bi->bi_bdev),
+                                                     bi, disk_devt(conf->mddev->gendisk),
+                                                     sh->dev[i].sector);
                        generic_make_request(bi);
                }
                if (rrdev) {
@@ -701,9 +703,10 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
                        rbi->bi_io_vec[0].bv_offset = 0;
                        rbi->bi_size = STRIPE_SIZE;
                        rbi->bi_next = NULL;
-                       trace_block_bio_remap(bdev_get_queue(rbi->bi_bdev),
-                                             rbi, disk_devt(conf->mddev->gendisk),
-                                             sh->dev[i].sector);
+                       if (conf->mddev->gendisk)
+                               trace_block_bio_remap(bdev_get_queue(rbi->bi_bdev),
+                                                     rbi, disk_devt(conf->mddev->gendisk),
+                                                     sh->dev[i].sector);
                        generic_make_request(rbi);
                }
                if (!rdev && !rrdev) {
@@ -2280,17 +2283,6 @@ schedule_reconstruction(struct stripe_head *sh, struct stripe_head_state *s,
        int level = conf->level;
 
        if (rcw) {
-               /* if we are not expanding this is a proper write request, and
-                * there will be bios with new data to be drained into the
-                * stripe cache
-                */
-               if (!expand) {
-                       sh->reconstruct_state = reconstruct_state_drain_run;
-                       set_bit(STRIPE_OP_BIODRAIN, &s->ops_request);
-               } else
-                       sh->reconstruct_state = reconstruct_state_run;
-
-               set_bit(STRIPE_OP_RECONSTRUCT, &s->ops_request);
 
                for (i = disks; i--; ) {
                        struct r5dev *dev = &sh->dev[i];
@@ -2303,6 +2295,21 @@ schedule_reconstruction(struct stripe_head *sh, struct stripe_head_state *s,
                                s->locked++;
                        }
                }
+               /* if we are not expanding this is a proper write request, and
+                * there will be bios with new data to be drained into the
+                * stripe cache
+                */
+               if (!expand) {
+                       if (!s->locked)
+                               /* False alarm, nothing to do */
+                               return;
+                       sh->reconstruct_state = reconstruct_state_drain_run;
+                       set_bit(STRIPE_OP_BIODRAIN, &s->ops_request);
+               } else
+                       sh->reconstruct_state = reconstruct_state_run;
+
+               set_bit(STRIPE_OP_RECONSTRUCT, &s->ops_request);
+
                if (s->locked + conf->max_degraded == disks)
                        if (!test_and_set_bit(STRIPE_FULL_WRITE, &sh->state))
                                atomic_inc(&conf->pending_full_writes);
@@ -2311,11 +2318,6 @@ schedule_reconstruction(struct stripe_head *sh, struct stripe_head_state *s,
                BUG_ON(!(test_bit(R5_UPTODATE, &sh->dev[pd_idx].flags) ||
                        test_bit(R5_Wantcompute, &sh->dev[pd_idx].flags)));
 
-               sh->reconstruct_state = reconstruct_state_prexor_drain_run;
-               set_bit(STRIPE_OP_PREXOR, &s->ops_request);
-               set_bit(STRIPE_OP_BIODRAIN, &s->ops_request);
-               set_bit(STRIPE_OP_RECONSTRUCT, &s->ops_request);
-
                for (i = disks; i--; ) {
                        struct r5dev *dev = &sh->dev[i];
                        if (i == pd_idx)
@@ -2330,6 +2332,13 @@ schedule_reconstruction(struct stripe_head *sh, struct stripe_head_state *s,
                                s->locked++;
                        }
                }
+               if (!s->locked)
+                       /* False alarm - nothing to do */
+                       return;
+               sh->reconstruct_state = reconstruct_state_prexor_drain_run;
+               set_bit(STRIPE_OP_PREXOR, &s->ops_request);
+               set_bit(STRIPE_OP_BIODRAIN, &s->ops_request);
+               set_bit(STRIPE_OP_RECONSTRUCT, &s->ops_request);
        }
 
        /* keep the parity disk(s) locked while asynchronous operations
@@ -2564,6 +2573,8 @@ handle_failed_sync(struct r5conf *conf, struct stripe_head *sh,
        int i;
 
        clear_bit(STRIPE_SYNCING, &sh->state);
+       if (test_and_clear_bit(R5_Overlap, &sh->dev[sh->pd_idx].flags))
+               wake_up(&conf->wait_for_overlap);
        s->syncing = 0;
        s->replacing = 0;
        /* There is nothing more to do for sync/check/repair.
@@ -2737,6 +2748,7 @@ static void handle_stripe_clean_event(struct r5conf *conf,
 {
        int i;
        struct r5dev *dev;
+       int discard_pending = 0;
 
        for (i = disks; i--; )
                if (sh->dev[i].written) {
@@ -2765,9 +2777,23 @@ static void handle_stripe_clean_event(struct r5conf *conf,
                                                STRIPE_SECTORS,
                                         !test_bit(STRIPE_DEGRADED, &sh->state),
                                                0);
-                       }
-               } else if (test_bit(R5_Discard, &sh->dev[i].flags))
-                       clear_bit(R5_Discard, &sh->dev[i].flags);
+                       } else if (test_bit(R5_Discard, &dev->flags))
+                               discard_pending = 1;
+               }
+       if (!discard_pending &&
+           test_bit(R5_Discard, &sh->dev[sh->pd_idx].flags)) {
+               clear_bit(R5_Discard, &sh->dev[sh->pd_idx].flags);
+               clear_bit(R5_UPTODATE, &sh->dev[sh->pd_idx].flags);
+               if (sh->qd_idx >= 0) {
+                       clear_bit(R5_Discard, &sh->dev[sh->qd_idx].flags);
+                       clear_bit(R5_UPTODATE, &sh->dev[sh->qd_idx].flags);
+               }
+               /* now that discard is done we can proceed with any sync */
+               clear_bit(STRIPE_DISCARD, &sh->state);
+               if (test_bit(STRIPE_SYNC_REQUESTED, &sh->state))
+                       set_bit(STRIPE_HANDLE, &sh->state);
+
+       }
 
        if (test_and_clear_bit(STRIPE_FULL_WRITE, &sh->state))
                if (atomic_dec_and_test(&conf->pending_full_writes))
@@ -2826,8 +2852,10 @@ static void handle_stripe_dirtying(struct r5conf *conf,
        set_bit(STRIPE_HANDLE, &sh->state);
        if (rmw < rcw && rmw > 0) {
                /* prefer read-modify-write, but need to get some data */
-               blk_add_trace_msg(conf->mddev->queue, "raid5 rmw %llu %d",
-                                 (unsigned long long)sh->sector, rmw);
+               if (conf->mddev->queue)
+                       blk_add_trace_msg(conf->mddev->queue,
+                                         "raid5 rmw %llu %d",
+                                         (unsigned long long)sh->sector, rmw);
                for (i = disks; i--; ) {
                        struct r5dev *dev = &sh->dev[i];
                        if ((dev->towrite || i == sh->pd_idx) &&
@@ -2877,7 +2905,7 @@ static void handle_stripe_dirtying(struct r5conf *conf,
                                }
                        }
                }
-               if (rcw)
+               if (rcw && conf->mddev->queue)
                        blk_add_trace_msg(conf->mddev->queue, "raid5 rcw %llu %d %d %d",
                                          (unsigned long long)sh->sector,
                                          rcw, qread, test_bit(STRIPE_DELAYED, &sh->state));
@@ -3417,9 +3445,15 @@ static void handle_stripe(struct stripe_head *sh)
                return;
        }
 
-       if (test_and_clear_bit(STRIPE_SYNC_REQUESTED, &sh->state)) {
-               set_bit(STRIPE_SYNCING, &sh->state);
-               clear_bit(STRIPE_INSYNC, &sh->state);
+       if (test_bit(STRIPE_SYNC_REQUESTED, &sh->state)) {
+               spin_lock(&sh->stripe_lock);
+               /* Cannot process 'sync' concurrently with 'discard' */
+               if (!test_bit(STRIPE_DISCARD, &sh->state) &&
+                   test_and_clear_bit(STRIPE_SYNC_REQUESTED, &sh->state)) {
+                       set_bit(STRIPE_SYNCING, &sh->state);
+                       clear_bit(STRIPE_INSYNC, &sh->state);
+               }
+               spin_unlock(&sh->stripe_lock);
        }
        clear_bit(STRIPE_DELAYED, &sh->state);
 
@@ -3579,6 +3613,8 @@ static void handle_stripe(struct stripe_head *sh)
            test_bit(STRIPE_INSYNC, &sh->state)) {
                md_done_sync(conf->mddev, STRIPE_SECTORS, 1);
                clear_bit(STRIPE_SYNCING, &sh->state);
+               if (test_and_clear_bit(R5_Overlap, &sh->dev[sh->pd_idx].flags))
+                       wake_up(&conf->wait_for_overlap);
        }
 
        /* If the failed drives are just a ReadError, then we might need
@@ -3982,9 +4018,10 @@ static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio)
                atomic_inc(&conf->active_aligned_reads);
                spin_unlock_irq(&conf->device_lock);
 
-               trace_block_bio_remap(bdev_get_queue(align_bi->bi_bdev),
-                                     align_bi, disk_devt(mddev->gendisk),
-                                     raid_bio->bi_sector);
+               if (mddev->gendisk)
+                       trace_block_bio_remap(bdev_get_queue(align_bi->bi_bdev),
+                                             align_bi, disk_devt(mddev->gendisk),
+                                             raid_bio->bi_sector);
                generic_make_request(align_bi);
                return 1;
        } else {
@@ -4078,7 +4115,8 @@ static void raid5_unplug(struct blk_plug_cb *blk_cb, bool from_schedule)
                }
                spin_unlock_irq(&conf->device_lock);
        }
-       trace_block_unplug(mddev->queue, cnt, !from_schedule);
+       if (mddev->queue)
+               trace_block_unplug(mddev->queue, cnt, !from_schedule);
        kfree(cb);
 }
 
@@ -4141,6 +4179,13 @@ static void make_discard_request(struct mddev *mddev, struct bio *bi)
                sh = get_active_stripe(conf, logical_sector, 0, 0, 0);
                prepare_to_wait(&conf->wait_for_overlap, &w,
                                TASK_UNINTERRUPTIBLE);
+               set_bit(R5_Overlap, &sh->dev[sh->pd_idx].flags);
+               if (test_bit(STRIPE_SYNCING, &sh->state)) {
+                       release_stripe(sh);
+                       schedule();
+                       goto again;
+               }
+               clear_bit(R5_Overlap, &sh->dev[sh->pd_idx].flags);
                spin_lock_irq(&sh->stripe_lock);
                for (d = 0; d < conf->raid_disks; d++) {
                        if (d == sh->pd_idx || d == sh->qd_idx)
@@ -4153,6 +4198,7 @@ static void make_discard_request(struct mddev *mddev, struct bio *bi)
                                goto again;
                        }
                }
+               set_bit(STRIPE_DISCARD, &sh->state);
                finish_wait(&conf->wait_for_overlap, &w);
                for (d = 0; d < conf->raid_disks; d++) {
                        if (d == sh->pd_idx || d == sh->qd_idx)
index 18b2c4a..b0b663b 100644 (file)
@@ -221,10 +221,6 @@ struct stripe_head {
        struct stripe_operations {
                int                  target, target2;
                enum sum_check_flags zero_sum_result;
-               #ifdef CONFIG_MULTICORE_RAID456
-               unsigned long        request;
-               wait_queue_head_t    wait_for_ops;
-               #endif
        } ops;
        struct r5dev {
                /* rreq and rvec are used for the replacement device when
@@ -323,6 +319,7 @@ enum {
        STRIPE_COMPUTE_RUN,
        STRIPE_OPS_REQ_PENDING,
        STRIPE_ON_UNPLUG_LIST,
+       STRIPE_DISCARD,
 };
 
 /*