]> git.openfabrics.org - ~shefty/rdma-dev.git/blob - drivers/md/dm-stripe.c
dm stripe: add WRITE SAME support
[~shefty/rdma-dev.git] / drivers / md / dm-stripe.c
1 /*
2  * Copyright (C) 2001-2003 Sistina Software (UK) Limited.
3  *
4  * This file is released under the GPL.
5  */
6
7 #include <linux/device-mapper.h>
8
9 #include <linux/module.h>
10 #include <linux/init.h>
11 #include <linux/blkdev.h>
12 #include <linux/bio.h>
13 #include <linux/slab.h>
14 #include <linux/log2.h>
15
16 #define DM_MSG_PREFIX "striped"
17 #define DM_IO_ERROR_THRESHOLD 15
18
19 struct stripe {
20         struct dm_dev *dev;
21         sector_t physical_start;
22
23         atomic_t error_count;
24 };
25
26 struct stripe_c {
27         uint32_t stripes;
28         int stripes_shift;
29
30         /* The size of this target / num. stripes */
31         sector_t stripe_width;
32
33         uint32_t chunk_size;
34         int chunk_size_shift;
35
36         /* Needed for handling events */
37         struct dm_target *ti;
38
39         /* Work struct used for triggering events*/
40         struct work_struct trigger_event;
41
42         struct stripe stripe[0];
43 };
44
45 /*
46  * An event is triggered whenever a drive
47  * drops out of a stripe volume.
48  */
49 static void trigger_event(struct work_struct *work)
50 {
51         struct stripe_c *sc = container_of(work, struct stripe_c,
52                                            trigger_event);
53         dm_table_event(sc->ti->table);
54 }
55
56 static inline struct stripe_c *alloc_context(unsigned int stripes)
57 {
58         size_t len;
59
60         if (dm_array_too_big(sizeof(struct stripe_c), sizeof(struct stripe),
61                              stripes))
62                 return NULL;
63
64         len = sizeof(struct stripe_c) + (sizeof(struct stripe) * stripes);
65
66         return kmalloc(len, GFP_KERNEL);
67 }
68
69 /*
70  * Parse a single <dev> <sector> pair
71  */
72 static int get_stripe(struct dm_target *ti, struct stripe_c *sc,
73                       unsigned int stripe, char **argv)
74 {
75         unsigned long long start;
76         char dummy;
77
78         if (sscanf(argv[1], "%llu%c", &start, &dummy) != 1)
79                 return -EINVAL;
80
81         if (dm_get_device(ti, argv[0], dm_table_get_mode(ti->table),
82                           &sc->stripe[stripe].dev))
83                 return -ENXIO;
84
85         sc->stripe[stripe].physical_start = start;
86
87         return 0;
88 }
89
90 /*
91  * Construct a striped mapping.
92  * <number of stripes> <chunk size> [<dev_path> <offset>]+
93  */
94 static int stripe_ctr(struct dm_target *ti, unsigned int argc, char **argv)
95 {
96         struct stripe_c *sc;
97         sector_t width;
98         uint32_t stripes;
99         uint32_t chunk_size;
100         int r;
101         unsigned int i;
102
103         if (argc < 2) {
104                 ti->error = "Not enough arguments";
105                 return -EINVAL;
106         }
107
108         if (kstrtouint(argv[0], 10, &stripes) || !stripes) {
109                 ti->error = "Invalid stripe count";
110                 return -EINVAL;
111         }
112
113         if (kstrtouint(argv[1], 10, &chunk_size) || !chunk_size) {
114                 ti->error = "Invalid chunk_size";
115                 return -EINVAL;
116         }
117
118         width = ti->len;
119         if (sector_div(width, chunk_size)) {
120                 ti->error = "Target length not divisible by "
121                     "chunk size";
122                 return -EINVAL;
123         }
124
125         if (sector_div(width, stripes)) {
126                 ti->error = "Target length not divisible by "
127                     "number of stripes";
128                 return -EINVAL;
129         }
130
131         /*
132          * Do we have enough arguments for that many stripes ?
133          */
134         if (argc != (2 + 2 * stripes)) {
135                 ti->error = "Not enough destinations "
136                         "specified";
137                 return -EINVAL;
138         }
139
140         sc = alloc_context(stripes);
141         if (!sc) {
142                 ti->error = "Memory allocation for striped context "
143                     "failed";
144                 return -ENOMEM;
145         }
146
147         INIT_WORK(&sc->trigger_event, trigger_event);
148
149         /* Set pointer to dm target; used in trigger_event */
150         sc->ti = ti;
151         sc->stripes = stripes;
152         sc->stripe_width = width;
153
154         if (stripes & (stripes - 1))
155                 sc->stripes_shift = -1;
156         else
157                 sc->stripes_shift = __ffs(stripes);
158
159         r = dm_set_target_max_io_len(ti, chunk_size);
160         if (r)
161                 return r;
162
163         ti->num_flush_requests = stripes;
164         ti->num_discard_requests = stripes;
165         ti->num_write_same_requests = stripes;
166
167         sc->chunk_size = chunk_size;
168         if (chunk_size & (chunk_size - 1))
169                 sc->chunk_size_shift = -1;
170         else
171                 sc->chunk_size_shift = __ffs(chunk_size);
172
173         /*
174          * Get the stripe destinations.
175          */
176         for (i = 0; i < stripes; i++) {
177                 argv += 2;
178
179                 r = get_stripe(ti, sc, i, argv);
180                 if (r < 0) {
181                         ti->error = "Couldn't parse stripe destination";
182                         while (i--)
183                                 dm_put_device(ti, sc->stripe[i].dev);
184                         kfree(sc);
185                         return r;
186                 }
187                 atomic_set(&(sc->stripe[i].error_count), 0);
188         }
189
190         ti->private = sc;
191
192         return 0;
193 }
194
195 static void stripe_dtr(struct dm_target *ti)
196 {
197         unsigned int i;
198         struct stripe_c *sc = (struct stripe_c *) ti->private;
199
200         for (i = 0; i < sc->stripes; i++)
201                 dm_put_device(ti, sc->stripe[i].dev);
202
203         flush_work(&sc->trigger_event);
204         kfree(sc);
205 }
206
207 static void stripe_map_sector(struct stripe_c *sc, sector_t sector,
208                               uint32_t *stripe, sector_t *result)
209 {
210         sector_t chunk = dm_target_offset(sc->ti, sector);
211         sector_t chunk_offset;
212
213         if (sc->chunk_size_shift < 0)
214                 chunk_offset = sector_div(chunk, sc->chunk_size);
215         else {
216                 chunk_offset = chunk & (sc->chunk_size - 1);
217                 chunk >>= sc->chunk_size_shift;
218         }
219
220         if (sc->stripes_shift < 0)
221                 *stripe = sector_div(chunk, sc->stripes);
222         else {
223                 *stripe = chunk & (sc->stripes - 1);
224                 chunk >>= sc->stripes_shift;
225         }
226
227         if (sc->chunk_size_shift < 0)
228                 chunk *= sc->chunk_size;
229         else
230                 chunk <<= sc->chunk_size_shift;
231
232         *result = chunk + chunk_offset;
233 }
234
235 static void stripe_map_range_sector(struct stripe_c *sc, sector_t sector,
236                                     uint32_t target_stripe, sector_t *result)
237 {
238         uint32_t stripe;
239
240         stripe_map_sector(sc, sector, &stripe, result);
241         if (stripe == target_stripe)
242                 return;
243
244         /* round down */
245         sector = *result;
246         if (sc->chunk_size_shift < 0)
247                 *result -= sector_div(sector, sc->chunk_size);
248         else
249                 *result = sector & ~(sector_t)(sc->chunk_size - 1);
250
251         if (target_stripe < stripe)
252                 *result += sc->chunk_size;              /* next chunk */
253 }
254
255 static int stripe_map_range(struct stripe_c *sc, struct bio *bio,
256                             uint32_t target_stripe)
257 {
258         sector_t begin, end;
259
260         stripe_map_range_sector(sc, bio->bi_sector, target_stripe, &begin);
261         stripe_map_range_sector(sc, bio->bi_sector + bio_sectors(bio),
262                                 target_stripe, &end);
263         if (begin < end) {
264                 bio->bi_bdev = sc->stripe[target_stripe].dev->bdev;
265                 bio->bi_sector = begin + sc->stripe[target_stripe].physical_start;
266                 bio->bi_size = to_bytes(end - begin);
267                 return DM_MAPIO_REMAPPED;
268         } else {
269                 /* The range doesn't map to the target stripe */
270                 bio_endio(bio, 0);
271                 return DM_MAPIO_SUBMITTED;
272         }
273 }
274
275 static int stripe_map(struct dm_target *ti, struct bio *bio)
276 {
277         struct stripe_c *sc = ti->private;
278         uint32_t stripe;
279         unsigned target_request_nr;
280
281         if (bio->bi_rw & REQ_FLUSH) {
282                 target_request_nr = dm_bio_get_target_request_nr(bio);
283                 BUG_ON(target_request_nr >= sc->stripes);
284                 bio->bi_bdev = sc->stripe[target_request_nr].dev->bdev;
285                 return DM_MAPIO_REMAPPED;
286         }
287         if (unlikely(bio->bi_rw & REQ_DISCARD) ||
288             unlikely(bio->bi_rw & REQ_WRITE_SAME)) {
289                 target_request_nr = dm_bio_get_target_request_nr(bio);
290                 BUG_ON(target_request_nr >= sc->stripes);
291                 return stripe_map_range(sc, bio, target_request_nr);
292         }
293
294         stripe_map_sector(sc, bio->bi_sector, &stripe, &bio->bi_sector);
295
296         bio->bi_sector += sc->stripe[stripe].physical_start;
297         bio->bi_bdev = sc->stripe[stripe].dev->bdev;
298
299         return DM_MAPIO_REMAPPED;
300 }
301
302 /*
303  * Stripe status:
304  *
305  * INFO
306  * #stripes [stripe_name <stripe_name>] [group word count]
307  * [error count 'A|D' <error count 'A|D'>]
308  *
309  * TABLE
310  * #stripes [stripe chunk size]
311  * [stripe_name physical_start <stripe_name physical_start>]
312  *
313  */
314
315 static int stripe_status(struct dm_target *ti, status_type_t type,
316                          unsigned status_flags, char *result, unsigned maxlen)
317 {
318         struct stripe_c *sc = (struct stripe_c *) ti->private;
319         char buffer[sc->stripes + 1];
320         unsigned int sz = 0;
321         unsigned int i;
322
323         switch (type) {
324         case STATUSTYPE_INFO:
325                 DMEMIT("%d ", sc->stripes);
326                 for (i = 0; i < sc->stripes; i++)  {
327                         DMEMIT("%s ", sc->stripe[i].dev->name);
328                         buffer[i] = atomic_read(&(sc->stripe[i].error_count)) ?
329                                 'D' : 'A';
330                 }
331                 buffer[i] = '\0';
332                 DMEMIT("1 %s", buffer);
333                 break;
334
335         case STATUSTYPE_TABLE:
336                 DMEMIT("%d %llu", sc->stripes,
337                         (unsigned long long)sc->chunk_size);
338                 for (i = 0; i < sc->stripes; i++)
339                         DMEMIT(" %s %llu", sc->stripe[i].dev->name,
340                             (unsigned long long)sc->stripe[i].physical_start);
341                 break;
342         }
343         return 0;
344 }
345
346 static int stripe_end_io(struct dm_target *ti, struct bio *bio, int error)
347 {
348         unsigned i;
349         char major_minor[16];
350         struct stripe_c *sc = ti->private;
351
352         if (!error)
353                 return 0; /* I/O complete */
354
355         if ((error == -EWOULDBLOCK) && (bio->bi_rw & REQ_RAHEAD))
356                 return error;
357
358         if (error == -EOPNOTSUPP)
359                 return error;
360
361         memset(major_minor, 0, sizeof(major_minor));
362         sprintf(major_minor, "%d:%d",
363                 MAJOR(disk_devt(bio->bi_bdev->bd_disk)),
364                 MINOR(disk_devt(bio->bi_bdev->bd_disk)));
365
366         /*
367          * Test to see which stripe drive triggered the event
368          * and increment error count for all stripes on that device.
369          * If the error count for a given device exceeds the threshold
370          * value we will no longer trigger any further events.
371          */
372         for (i = 0; i < sc->stripes; i++)
373                 if (!strcmp(sc->stripe[i].dev->name, major_minor)) {
374                         atomic_inc(&(sc->stripe[i].error_count));
375                         if (atomic_read(&(sc->stripe[i].error_count)) <
376                             DM_IO_ERROR_THRESHOLD)
377                                 schedule_work(&sc->trigger_event);
378                 }
379
380         return error;
381 }
382
383 static int stripe_iterate_devices(struct dm_target *ti,
384                                   iterate_devices_callout_fn fn, void *data)
385 {
386         struct stripe_c *sc = ti->private;
387         int ret = 0;
388         unsigned i = 0;
389
390         do {
391                 ret = fn(ti, sc->stripe[i].dev,
392                          sc->stripe[i].physical_start,
393                          sc->stripe_width, data);
394         } while (!ret && ++i < sc->stripes);
395
396         return ret;
397 }
398
399 static void stripe_io_hints(struct dm_target *ti,
400                             struct queue_limits *limits)
401 {
402         struct stripe_c *sc = ti->private;
403         unsigned chunk_size = sc->chunk_size << SECTOR_SHIFT;
404
405         blk_limits_io_min(limits, chunk_size);
406         blk_limits_io_opt(limits, chunk_size * sc->stripes);
407 }
408
409 static int stripe_merge(struct dm_target *ti, struct bvec_merge_data *bvm,
410                         struct bio_vec *biovec, int max_size)
411 {
412         struct stripe_c *sc = ti->private;
413         sector_t bvm_sector = bvm->bi_sector;
414         uint32_t stripe;
415         struct request_queue *q;
416
417         stripe_map_sector(sc, bvm_sector, &stripe, &bvm_sector);
418
419         q = bdev_get_queue(sc->stripe[stripe].dev->bdev);
420         if (!q->merge_bvec_fn)
421                 return max_size;
422
423         bvm->bi_bdev = sc->stripe[stripe].dev->bdev;
424         bvm->bi_sector = sc->stripe[stripe].physical_start + bvm_sector;
425
426         return min(max_size, q->merge_bvec_fn(q, bvm, biovec));
427 }
428
429 static struct target_type stripe_target = {
430         .name   = "striped",
431         .version = {1, 5, 0},
432         .module = THIS_MODULE,
433         .ctr    = stripe_ctr,
434         .dtr    = stripe_dtr,
435         .map    = stripe_map,
436         .end_io = stripe_end_io,
437         .status = stripe_status,
438         .iterate_devices = stripe_iterate_devices,
439         .io_hints = stripe_io_hints,
440         .merge  = stripe_merge,
441 };
442
443 int __init dm_stripe_init(void)
444 {
445         int r;
446
447         r = dm_register_target(&stripe_target);
448         if (r < 0) {
449                 DMWARN("target registration failed");
450                 return r;
451         }
452
453         return r;
454 }
455
456 void dm_stripe_exit(void)
457 {
458         dm_unregister_target(&stripe_target);
459 }