Merge branch 'btrfs' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
[~shefty/rdma-dev.git] / fs / btrfs / volumes.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18 #include <linux/sched.h>
19 #include <linux/bio.h>
20 #include <linux/slab.h>
21 #include <linux/buffer_head.h>
22 #include <linux/blkdev.h>
23 #include <linux/random.h>
24 #include <linux/iocontext.h>
25 #include <linux/capability.h>
26 #include <linux/kthread.h>
27 #include <asm/div64.h>
28 #include "compat.h"
29 #include "ctree.h"
30 #include "extent_map.h"
31 #include "disk-io.h"
32 #include "transaction.h"
33 #include "print-tree.h"
34 #include "volumes.h"
35 #include "async-thread.h"
36 #include "check-integrity.h"
37
38 static int init_first_rw_device(struct btrfs_trans_handle *trans,
39                                 struct btrfs_root *root,
40                                 struct btrfs_device *device);
41 static int btrfs_relocate_sys_chunks(struct btrfs_root *root);
42
43 static DEFINE_MUTEX(uuid_mutex);
44 static LIST_HEAD(fs_uuids);
45
46 static void lock_chunks(struct btrfs_root *root)
47 {
48         mutex_lock(&root->fs_info->chunk_mutex);
49 }
50
51 static void unlock_chunks(struct btrfs_root *root)
52 {
53         mutex_unlock(&root->fs_info->chunk_mutex);
54 }
55
56 static void free_fs_devices(struct btrfs_fs_devices *fs_devices)
57 {
58         struct btrfs_device *device;
59         WARN_ON(fs_devices->opened);
60         while (!list_empty(&fs_devices->devices)) {
61                 device = list_entry(fs_devices->devices.next,
62                                     struct btrfs_device, dev_list);
63                 list_del(&device->dev_list);
64                 kfree(device->name);
65                 kfree(device);
66         }
67         kfree(fs_devices);
68 }
69
70 int btrfs_cleanup_fs_uuids(void)
71 {
72         struct btrfs_fs_devices *fs_devices;
73
74         while (!list_empty(&fs_uuids)) {
75                 fs_devices = list_entry(fs_uuids.next,
76                                         struct btrfs_fs_devices, list);
77                 list_del(&fs_devices->list);
78                 free_fs_devices(fs_devices);
79         }
80         return 0;
81 }
82
83 static noinline struct btrfs_device *__find_device(struct list_head *head,
84                                                    u64 devid, u8 *uuid)
85 {
86         struct btrfs_device *dev;
87
88         list_for_each_entry(dev, head, dev_list) {
89                 if (dev->devid == devid &&
90                     (!uuid || !memcmp(dev->uuid, uuid, BTRFS_UUID_SIZE))) {
91                         return dev;
92                 }
93         }
94         return NULL;
95 }
96
97 static noinline struct btrfs_fs_devices *find_fsid(u8 *fsid)
98 {
99         struct btrfs_fs_devices *fs_devices;
100
101         list_for_each_entry(fs_devices, &fs_uuids, list) {
102                 if (memcmp(fsid, fs_devices->fsid, BTRFS_FSID_SIZE) == 0)
103                         return fs_devices;
104         }
105         return NULL;
106 }
107
108 static void requeue_list(struct btrfs_pending_bios *pending_bios,
109                         struct bio *head, struct bio *tail)
110 {
111
112         struct bio *old_head;
113
114         old_head = pending_bios->head;
115         pending_bios->head = head;
116         if (pending_bios->tail)
117                 tail->bi_next = old_head;
118         else
119                 pending_bios->tail = tail;
120 }
121
122 /*
123  * we try to collect pending bios for a device so we don't get a large
124  * number of procs sending bios down to the same device.  This greatly
125  * improves the schedulers ability to collect and merge the bios.
126  *
127  * But, it also turns into a long list of bios to process and that is sure
128  * to eventually make the worker thread block.  The solution here is to
129  * make some progress and then put this work struct back at the end of
130  * the list if the block device is congested.  This way, multiple devices
131  * can make progress from a single worker thread.
132  */
133 static noinline int run_scheduled_bios(struct btrfs_device *device)
134 {
135         struct bio *pending;
136         struct backing_dev_info *bdi;
137         struct btrfs_fs_info *fs_info;
138         struct btrfs_pending_bios *pending_bios;
139         struct bio *tail;
140         struct bio *cur;
141         int again = 0;
142         unsigned long num_run;
143         unsigned long batch_run = 0;
144         unsigned long limit;
145         unsigned long last_waited = 0;
146         int force_reg = 0;
147         int sync_pending = 0;
148         struct blk_plug plug;
149
150         /*
151          * this function runs all the bios we've collected for
152          * a particular device.  We don't want to wander off to
153          * another device without first sending all of these down.
154          * So, setup a plug here and finish it off before we return
155          */
156         blk_start_plug(&plug);
157
158         bdi = blk_get_backing_dev_info(device->bdev);
159         fs_info = device->dev_root->fs_info;
160         limit = btrfs_async_submit_limit(fs_info);
161         limit = limit * 2 / 3;
162
163 loop:
164         spin_lock(&device->io_lock);
165
166 loop_lock:
167         num_run = 0;
168
169         /* take all the bios off the list at once and process them
170          * later on (without the lock held).  But, remember the
171          * tail and other pointers so the bios can be properly reinserted
172          * into the list if we hit congestion
173          */
174         if (!force_reg && device->pending_sync_bios.head) {
175                 pending_bios = &device->pending_sync_bios;
176                 force_reg = 1;
177         } else {
178                 pending_bios = &device->pending_bios;
179                 force_reg = 0;
180         }
181
182         pending = pending_bios->head;
183         tail = pending_bios->tail;
184         WARN_ON(pending && !tail);
185
186         /*
187          * if pending was null this time around, no bios need processing
188          * at all and we can stop.  Otherwise it'll loop back up again
189          * and do an additional check so no bios are missed.
190          *
191          * device->running_pending is used to synchronize with the
192          * schedule_bio code.
193          */
194         if (device->pending_sync_bios.head == NULL &&
195             device->pending_bios.head == NULL) {
196                 again = 0;
197                 device->running_pending = 0;
198         } else {
199                 again = 1;
200                 device->running_pending = 1;
201         }
202
203         pending_bios->head = NULL;
204         pending_bios->tail = NULL;
205
206         spin_unlock(&device->io_lock);
207
208         while (pending) {
209
210                 rmb();
211                 /* we want to work on both lists, but do more bios on the
212                  * sync list than the regular list
213                  */
214                 if ((num_run > 32 &&
215                     pending_bios != &device->pending_sync_bios &&
216                     device->pending_sync_bios.head) ||
217                    (num_run > 64 && pending_bios == &device->pending_sync_bios &&
218                     device->pending_bios.head)) {
219                         spin_lock(&device->io_lock);
220                         requeue_list(pending_bios, pending, tail);
221                         goto loop_lock;
222                 }
223
224                 cur = pending;
225                 pending = pending->bi_next;
226                 cur->bi_next = NULL;
227                 atomic_dec(&fs_info->nr_async_bios);
228
229                 if (atomic_read(&fs_info->nr_async_bios) < limit &&
230                     waitqueue_active(&fs_info->async_submit_wait))
231                         wake_up(&fs_info->async_submit_wait);
232
233                 BUG_ON(atomic_read(&cur->bi_cnt) == 0);
234
235                 /*
236                  * if we're doing the sync list, record that our
237                  * plug has some sync requests on it
238                  *
239                  * If we're doing the regular list and there are
240                  * sync requests sitting around, unplug before
241                  * we add more
242                  */
243                 if (pending_bios == &device->pending_sync_bios) {
244                         sync_pending = 1;
245                 } else if (sync_pending) {
246                         blk_finish_plug(&plug);
247                         blk_start_plug(&plug);
248                         sync_pending = 0;
249                 }
250
251                 btrfsic_submit_bio(cur->bi_rw, cur);
252                 num_run++;
253                 batch_run++;
254                 if (need_resched())
255                         cond_resched();
256
257                 /*
258                  * we made progress, there is more work to do and the bdi
259                  * is now congested.  Back off and let other work structs
260                  * run instead
261                  */
262                 if (pending && bdi_write_congested(bdi) && batch_run > 8 &&
263                     fs_info->fs_devices->open_devices > 1) {
264                         struct io_context *ioc;
265
266                         ioc = current->io_context;
267
268                         /*
269                          * the main goal here is that we don't want to
270                          * block if we're going to be able to submit
271                          * more requests without blocking.
272                          *
273                          * This code does two great things, it pokes into
274                          * the elevator code from a filesystem _and_
275                          * it makes assumptions about how batching works.
276                          */
277                         if (ioc && ioc->nr_batch_requests > 0 &&
278                             time_before(jiffies, ioc->last_waited + HZ/50UL) &&
279                             (last_waited == 0 ||
280                              ioc->last_waited == last_waited)) {
281                                 /*
282                                  * we want to go through our batch of
283                                  * requests and stop.  So, we copy out
284                                  * the ioc->last_waited time and test
285                                  * against it before looping
286                                  */
287                                 last_waited = ioc->last_waited;
288                                 if (need_resched())
289                                         cond_resched();
290                                 continue;
291                         }
292                         spin_lock(&device->io_lock);
293                         requeue_list(pending_bios, pending, tail);
294                         device->running_pending = 1;
295
296                         spin_unlock(&device->io_lock);
297                         btrfs_requeue_work(&device->work);
298                         goto done;
299                 }
300                 /* unplug every 64 requests just for good measure */
301                 if (batch_run % 64 == 0) {
302                         blk_finish_plug(&plug);
303                         blk_start_plug(&plug);
304                         sync_pending = 0;
305                 }
306         }
307
308         cond_resched();
309         if (again)
310                 goto loop;
311
312         spin_lock(&device->io_lock);
313         if (device->pending_bios.head || device->pending_sync_bios.head)
314                 goto loop_lock;
315         spin_unlock(&device->io_lock);
316
317 done:
318         blk_finish_plug(&plug);
319         return 0;
320 }
321
322 static void pending_bios_fn(struct btrfs_work *work)
323 {
324         struct btrfs_device *device;
325
326         device = container_of(work, struct btrfs_device, work);
327         run_scheduled_bios(device);
328 }
329
330 static noinline int device_list_add(const char *path,
331                            struct btrfs_super_block *disk_super,
332                            u64 devid, struct btrfs_fs_devices **fs_devices_ret)
333 {
334         struct btrfs_device *device;
335         struct btrfs_fs_devices *fs_devices;
336         u64 found_transid = btrfs_super_generation(disk_super);
337         char *name;
338
339         fs_devices = find_fsid(disk_super->fsid);
340         if (!fs_devices) {
341                 fs_devices = kzalloc(sizeof(*fs_devices), GFP_NOFS);
342                 if (!fs_devices)
343                         return -ENOMEM;
344                 INIT_LIST_HEAD(&fs_devices->devices);
345                 INIT_LIST_HEAD(&fs_devices->alloc_list);
346                 list_add(&fs_devices->list, &fs_uuids);
347                 memcpy(fs_devices->fsid, disk_super->fsid, BTRFS_FSID_SIZE);
348                 fs_devices->latest_devid = devid;
349                 fs_devices->latest_trans = found_transid;
350                 mutex_init(&fs_devices->device_list_mutex);
351                 device = NULL;
352         } else {
353                 device = __find_device(&fs_devices->devices, devid,
354                                        disk_super->dev_item.uuid);
355         }
356         if (!device) {
357                 if (fs_devices->opened)
358                         return -EBUSY;
359
360                 device = kzalloc(sizeof(*device), GFP_NOFS);
361                 if (!device) {
362                         /* we can safely leave the fs_devices entry around */
363                         return -ENOMEM;
364                 }
365                 device->devid = devid;
366                 device->work.func = pending_bios_fn;
367                 memcpy(device->uuid, disk_super->dev_item.uuid,
368                        BTRFS_UUID_SIZE);
369                 spin_lock_init(&device->io_lock);
370                 device->name = kstrdup(path, GFP_NOFS);
371                 if (!device->name) {
372                         kfree(device);
373                         return -ENOMEM;
374                 }
375                 INIT_LIST_HEAD(&device->dev_alloc_list);
376
377                 /* init readahead state */
378                 spin_lock_init(&device->reada_lock);
379                 device->reada_curr_zone = NULL;
380                 atomic_set(&device->reada_in_flight, 0);
381                 device->reada_next = 0;
382                 INIT_RADIX_TREE(&device->reada_zones, GFP_NOFS & ~__GFP_WAIT);
383                 INIT_RADIX_TREE(&device->reada_extents, GFP_NOFS & ~__GFP_WAIT);
384
385                 mutex_lock(&fs_devices->device_list_mutex);
386                 list_add_rcu(&device->dev_list, &fs_devices->devices);
387                 mutex_unlock(&fs_devices->device_list_mutex);
388
389                 device->fs_devices = fs_devices;
390                 fs_devices->num_devices++;
391         } else if (!device->name || strcmp(device->name, path)) {
392                 name = kstrdup(path, GFP_NOFS);
393                 if (!name)
394                         return -ENOMEM;
395                 kfree(device->name);
396                 device->name = name;
397                 if (device->missing) {
398                         fs_devices->missing_devices--;
399                         device->missing = 0;
400                 }
401         }
402
403         if (found_transid > fs_devices->latest_trans) {
404                 fs_devices->latest_devid = devid;
405                 fs_devices->latest_trans = found_transid;
406         }
407         *fs_devices_ret = fs_devices;
408         return 0;
409 }
410
411 static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig)
412 {
413         struct btrfs_fs_devices *fs_devices;
414         struct btrfs_device *device;
415         struct btrfs_device *orig_dev;
416
417         fs_devices = kzalloc(sizeof(*fs_devices), GFP_NOFS);
418         if (!fs_devices)
419                 return ERR_PTR(-ENOMEM);
420
421         INIT_LIST_HEAD(&fs_devices->devices);
422         INIT_LIST_HEAD(&fs_devices->alloc_list);
423         INIT_LIST_HEAD(&fs_devices->list);
424         mutex_init(&fs_devices->device_list_mutex);
425         fs_devices->latest_devid = orig->latest_devid;
426         fs_devices->latest_trans = orig->latest_trans;
427         memcpy(fs_devices->fsid, orig->fsid, sizeof(fs_devices->fsid));
428
429         /* We have held the volume lock, it is safe to get the devices. */
430         list_for_each_entry(orig_dev, &orig->devices, dev_list) {
431                 device = kzalloc(sizeof(*device), GFP_NOFS);
432                 if (!device)
433                         goto error;
434
435                 device->name = kstrdup(orig_dev->name, GFP_NOFS);
436                 if (!device->name) {
437                         kfree(device);
438                         goto error;
439                 }
440
441                 device->devid = orig_dev->devid;
442                 device->work.func = pending_bios_fn;
443                 memcpy(device->uuid, orig_dev->uuid, sizeof(device->uuid));
444                 spin_lock_init(&device->io_lock);
445                 INIT_LIST_HEAD(&device->dev_list);
446                 INIT_LIST_HEAD(&device->dev_alloc_list);
447
448                 list_add(&device->dev_list, &fs_devices->devices);
449                 device->fs_devices = fs_devices;
450                 fs_devices->num_devices++;
451         }
452         return fs_devices;
453 error:
454         free_fs_devices(fs_devices);
455         return ERR_PTR(-ENOMEM);
456 }
457
458 int btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices)
459 {
460         struct btrfs_device *device, *next;
461
462         mutex_lock(&uuid_mutex);
463 again:
464         /* This is the initialized path, it is safe to release the devices. */
465         list_for_each_entry_safe(device, next, &fs_devices->devices, dev_list) {
466                 if (device->in_fs_metadata)
467                         continue;
468
469                 if (device->bdev) {
470                         blkdev_put(device->bdev, device->mode);
471                         device->bdev = NULL;
472                         fs_devices->open_devices--;
473                 }
474                 if (device->writeable) {
475                         list_del_init(&device->dev_alloc_list);
476                         device->writeable = 0;
477                         fs_devices->rw_devices--;
478                 }
479                 list_del_init(&device->dev_list);
480                 fs_devices->num_devices--;
481                 kfree(device->name);
482                 kfree(device);
483         }
484
485         if (fs_devices->seed) {
486                 fs_devices = fs_devices->seed;
487                 goto again;
488         }
489
490         mutex_unlock(&uuid_mutex);
491         return 0;
492 }
493
494 static void __free_device(struct work_struct *work)
495 {
496         struct btrfs_device *device;
497
498         device = container_of(work, struct btrfs_device, rcu_work);
499
500         if (device->bdev)
501                 blkdev_put(device->bdev, device->mode);
502
503         kfree(device->name);
504         kfree(device);
505 }
506
507 static void free_device(struct rcu_head *head)
508 {
509         struct btrfs_device *device;
510
511         device = container_of(head, struct btrfs_device, rcu);
512
513         INIT_WORK(&device->rcu_work, __free_device);
514         schedule_work(&device->rcu_work);
515 }
516
517 static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
518 {
519         struct btrfs_device *device;
520
521         if (--fs_devices->opened > 0)
522                 return 0;
523
524         mutex_lock(&fs_devices->device_list_mutex);
525         list_for_each_entry(device, &fs_devices->devices, dev_list) {
526                 struct btrfs_device *new_device;
527
528                 if (device->bdev)
529                         fs_devices->open_devices--;
530
531                 if (device->writeable) {
532                         list_del_init(&device->dev_alloc_list);
533                         fs_devices->rw_devices--;
534                 }
535
536                 if (device->can_discard)
537                         fs_devices->num_can_discard--;
538
539                 new_device = kmalloc(sizeof(*new_device), GFP_NOFS);
540                 BUG_ON(!new_device);
541                 memcpy(new_device, device, sizeof(*new_device));
542                 new_device->name = kstrdup(device->name, GFP_NOFS);
543                 BUG_ON(device->name && !new_device->name);
544                 new_device->bdev = NULL;
545                 new_device->writeable = 0;
546                 new_device->in_fs_metadata = 0;
547                 new_device->can_discard = 0;
548                 list_replace_rcu(&device->dev_list, &new_device->dev_list);
549
550                 call_rcu(&device->rcu, free_device);
551         }
552         mutex_unlock(&fs_devices->device_list_mutex);
553
554         WARN_ON(fs_devices->open_devices);
555         WARN_ON(fs_devices->rw_devices);
556         fs_devices->opened = 0;
557         fs_devices->seeding = 0;
558
559         return 0;
560 }
561
562 int btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
563 {
564         struct btrfs_fs_devices *seed_devices = NULL;
565         int ret;
566
567         mutex_lock(&uuid_mutex);
568         ret = __btrfs_close_devices(fs_devices);
569         if (!fs_devices->opened) {
570                 seed_devices = fs_devices->seed;
571                 fs_devices->seed = NULL;
572         }
573         mutex_unlock(&uuid_mutex);
574
575         while (seed_devices) {
576                 fs_devices = seed_devices;
577                 seed_devices = fs_devices->seed;
578                 __btrfs_close_devices(fs_devices);
579                 free_fs_devices(fs_devices);
580         }
581         return ret;
582 }
583
584 static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
585                                 fmode_t flags, void *holder)
586 {
587         struct request_queue *q;
588         struct block_device *bdev;
589         struct list_head *head = &fs_devices->devices;
590         struct btrfs_device *device;
591         struct block_device *latest_bdev = NULL;
592         struct buffer_head *bh;
593         struct btrfs_super_block *disk_super;
594         u64 latest_devid = 0;
595         u64 latest_transid = 0;
596         u64 devid;
597         int seeding = 1;
598         int ret = 0;
599
600         flags |= FMODE_EXCL;
601
602         list_for_each_entry(device, head, dev_list) {
603                 if (device->bdev)
604                         continue;
605                 if (!device->name)
606                         continue;
607
608                 bdev = blkdev_get_by_path(device->name, flags, holder);
609                 if (IS_ERR(bdev)) {
610                         printk(KERN_INFO "open %s failed\n", device->name);
611                         goto error;
612                 }
613                 set_blocksize(bdev, 4096);
614
615                 bh = btrfs_read_dev_super(bdev);
616                 if (!bh)
617                         goto error_close;
618
619                 disk_super = (struct btrfs_super_block *)bh->b_data;
620                 devid = btrfs_stack_device_id(&disk_super->dev_item);
621                 if (devid != device->devid)
622                         goto error_brelse;
623
624                 if (memcmp(device->uuid, disk_super->dev_item.uuid,
625                            BTRFS_UUID_SIZE))
626                         goto error_brelse;
627
628                 device->generation = btrfs_super_generation(disk_super);
629                 if (!latest_transid || device->generation > latest_transid) {
630                         latest_devid = devid;
631                         latest_transid = device->generation;
632                         latest_bdev = bdev;
633                 }
634
635                 if (btrfs_super_flags(disk_super) & BTRFS_SUPER_FLAG_SEEDING) {
636                         device->writeable = 0;
637                 } else {
638                         device->writeable = !bdev_read_only(bdev);
639                         seeding = 0;
640                 }
641
642                 q = bdev_get_queue(bdev);
643                 if (blk_queue_discard(q)) {
644                         device->can_discard = 1;
645                         fs_devices->num_can_discard++;
646                 }
647
648                 device->bdev = bdev;
649                 device->in_fs_metadata = 0;
650                 device->mode = flags;
651
652                 if (!blk_queue_nonrot(bdev_get_queue(bdev)))
653                         fs_devices->rotating = 1;
654
655                 fs_devices->open_devices++;
656                 if (device->writeable) {
657                         fs_devices->rw_devices++;
658                         list_add(&device->dev_alloc_list,
659                                  &fs_devices->alloc_list);
660                 }
661                 brelse(bh);
662                 continue;
663
664 error_brelse:
665                 brelse(bh);
666 error_close:
667                 blkdev_put(bdev, flags);
668 error:
669                 continue;
670         }
671         if (fs_devices->open_devices == 0) {
672                 ret = -EINVAL;
673                 goto out;
674         }
675         fs_devices->seeding = seeding;
676         fs_devices->opened = 1;
677         fs_devices->latest_bdev = latest_bdev;
678         fs_devices->latest_devid = latest_devid;
679         fs_devices->latest_trans = latest_transid;
680         fs_devices->total_rw_bytes = 0;
681 out:
682         return ret;
683 }
684
685 int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
686                        fmode_t flags, void *holder)
687 {
688         int ret;
689
690         mutex_lock(&uuid_mutex);
691         if (fs_devices->opened) {
692                 fs_devices->opened++;
693                 ret = 0;
694         } else {
695                 ret = __btrfs_open_devices(fs_devices, flags, holder);
696         }
697         mutex_unlock(&uuid_mutex);
698         return ret;
699 }
700
701 int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
702                           struct btrfs_fs_devices **fs_devices_ret)
703 {
704         struct btrfs_super_block *disk_super;
705         struct block_device *bdev;
706         struct buffer_head *bh;
707         int ret;
708         u64 devid;
709         u64 transid;
710
711         flags |= FMODE_EXCL;
712         bdev = blkdev_get_by_path(path, flags, holder);
713
714         if (IS_ERR(bdev)) {
715                 ret = PTR_ERR(bdev);
716                 goto error;
717         }
718
719         mutex_lock(&uuid_mutex);
720         ret = set_blocksize(bdev, 4096);
721         if (ret)
722                 goto error_close;
723         bh = btrfs_read_dev_super(bdev);
724         if (!bh) {
725                 ret = -EINVAL;
726                 goto error_close;
727         }
728         disk_super = (struct btrfs_super_block *)bh->b_data;
729         devid = btrfs_stack_device_id(&disk_super->dev_item);
730         transid = btrfs_super_generation(disk_super);
731         if (disk_super->label[0])
732                 printk(KERN_INFO "device label %s ", disk_super->label);
733         else
734                 printk(KERN_INFO "device fsid %pU ", disk_super->fsid);
735         printk(KERN_CONT "devid %llu transid %llu %s\n",
736                (unsigned long long)devid, (unsigned long long)transid, path);
737         ret = device_list_add(path, disk_super, devid, fs_devices_ret);
738
739         brelse(bh);
740 error_close:
741         mutex_unlock(&uuid_mutex);
742         blkdev_put(bdev, flags);
743 error:
744         return ret;
745 }
746
747 /* helper to account the used device space in the range */
748 int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start,
749                                    u64 end, u64 *length)
750 {
751         struct btrfs_key key;
752         struct btrfs_root *root = device->dev_root;
753         struct btrfs_dev_extent *dev_extent;
754         struct btrfs_path *path;
755         u64 extent_end;
756         int ret;
757         int slot;
758         struct extent_buffer *l;
759
760         *length = 0;
761
762         if (start >= device->total_bytes)
763                 return 0;
764
765         path = btrfs_alloc_path();
766         if (!path)
767                 return -ENOMEM;
768         path->reada = 2;
769
770         key.objectid = device->devid;
771         key.offset = start;
772         key.type = BTRFS_DEV_EXTENT_KEY;
773
774         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
775         if (ret < 0)
776                 goto out;
777         if (ret > 0) {
778                 ret = btrfs_previous_item(root, path, key.objectid, key.type);
779                 if (ret < 0)
780                         goto out;
781         }
782
783         while (1) {
784                 l = path->nodes[0];
785                 slot = path->slots[0];
786                 if (slot >= btrfs_header_nritems(l)) {
787                         ret = btrfs_next_leaf(root, path);
788                         if (ret == 0)
789                                 continue;
790                         if (ret < 0)
791                                 goto out;
792
793                         break;
794                 }
795                 btrfs_item_key_to_cpu(l, &key, slot);
796
797                 if (key.objectid < device->devid)
798                         goto next;
799
800                 if (key.objectid > device->devid)
801                         break;
802
803                 if (btrfs_key_type(&key) != BTRFS_DEV_EXTENT_KEY)
804                         goto next;
805
806                 dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
807                 extent_end = key.offset + btrfs_dev_extent_length(l,
808                                                                   dev_extent);
809                 if (key.offset <= start && extent_end > end) {
810                         *length = end - start + 1;
811                         break;
812                 } else if (key.offset <= start && extent_end > start)
813                         *length += extent_end - start;
814                 else if (key.offset > start && extent_end <= end)
815                         *length += extent_end - key.offset;
816                 else if (key.offset > start && key.offset <= end) {
817                         *length += end - key.offset + 1;
818                         break;
819                 } else if (key.offset > end)
820                         break;
821
822 next:
823                 path->slots[0]++;
824         }
825         ret = 0;
826 out:
827         btrfs_free_path(path);
828         return ret;
829 }
830
831 /*
832  * find_free_dev_extent - find free space in the specified device
833  * @device:     the device which we search the free space in
834  * @num_bytes:  the size of the free space that we need
835  * @start:      store the start of the free space.
836  * @len:        the size of the free space. that we find, or the size of the max
837  *              free space if we don't find suitable free space
838  *
839  * this uses a pretty simple search, the expectation is that it is
840  * called very infrequently and that a given device has a small number
841  * of extents
842  *
843  * @start is used to store the start of the free space if we find. But if we
844  * don't find suitable free space, it will be used to store the start position
845  * of the max free space.
846  *
847  * @len is used to store the size of the free space that we find.
848  * But if we don't find suitable free space, it is used to store the size of
849  * the max free space.
850  */
851 int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes,
852                          u64 *start, u64 *len)
853 {
854         struct btrfs_key key;
855         struct btrfs_root *root = device->dev_root;
856         struct btrfs_dev_extent *dev_extent;
857         struct btrfs_path *path;
858         u64 hole_size;
859         u64 max_hole_start;
860         u64 max_hole_size;
861         u64 extent_end;
862         u64 search_start;
863         u64 search_end = device->total_bytes;
864         int ret;
865         int slot;
866         struct extent_buffer *l;
867
868         /* FIXME use last free of some kind */
869
870         /* we don't want to overwrite the superblock on the drive,
871          * so we make sure to start at an offset of at least 1MB
872          */
873         search_start = max(root->fs_info->alloc_start, 1024ull * 1024);
874
875         max_hole_start = search_start;
876         max_hole_size = 0;
877         hole_size = 0;
878
879         if (search_start >= search_end) {
880                 ret = -ENOSPC;
881                 goto error;
882         }
883
884         path = btrfs_alloc_path();
885         if (!path) {
886                 ret = -ENOMEM;
887                 goto error;
888         }
889         path->reada = 2;
890
891         key.objectid = device->devid;
892         key.offset = search_start;
893         key.type = BTRFS_DEV_EXTENT_KEY;
894
895         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
896         if (ret < 0)
897                 goto out;
898         if (ret > 0) {
899                 ret = btrfs_previous_item(root, path, key.objectid, key.type);
900                 if (ret < 0)
901                         goto out;
902         }
903
904         while (1) {
905                 l = path->nodes[0];
906                 slot = path->slots[0];
907                 if (slot >= btrfs_header_nritems(l)) {
908                         ret = btrfs_next_leaf(root, path);
909                         if (ret == 0)
910                                 continue;
911                         if (ret < 0)
912                                 goto out;
913
914                         break;
915                 }
916                 btrfs_item_key_to_cpu(l, &key, slot);
917
918                 if (key.objectid < device->devid)
919                         goto next;
920
921                 if (key.objectid > device->devid)
922                         break;
923
924                 if (btrfs_key_type(&key) != BTRFS_DEV_EXTENT_KEY)
925                         goto next;
926
927                 if (key.offset > search_start) {
928                         hole_size = key.offset - search_start;
929
930                         if (hole_size > max_hole_size) {
931                                 max_hole_start = search_start;
932                                 max_hole_size = hole_size;
933                         }
934
935                         /*
936                          * If this free space is greater than which we need,
937                          * it must be the max free space that we have found
938                          * until now, so max_hole_start must point to the start
939                          * of this free space and the length of this free space
940                          * is stored in max_hole_size. Thus, we return
941                          * max_hole_start and max_hole_size and go back to the
942                          * caller.
943                          */
944                         if (hole_size >= num_bytes) {
945                                 ret = 0;
946                                 goto out;
947                         }
948                 }
949
950                 dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
951                 extent_end = key.offset + btrfs_dev_extent_length(l,
952                                                                   dev_extent);
953                 if (extent_end > search_start)
954                         search_start = extent_end;
955 next:
956                 path->slots[0]++;
957                 cond_resched();
958         }
959
960         /*
961          * At this point, search_start should be the end of
962          * allocated dev extents, and when shrinking the device,
963          * search_end may be smaller than search_start.
964          */
965         if (search_end > search_start)
966                 hole_size = search_end - search_start;
967
968         if (hole_size > max_hole_size) {
969                 max_hole_start = search_start;
970                 max_hole_size = hole_size;
971         }
972
973         /* See above. */
974         if (hole_size < num_bytes)
975                 ret = -ENOSPC;
976         else
977                 ret = 0;
978
979 out:
980         btrfs_free_path(path);
981 error:
982         *start = max_hole_start;
983         if (len)
984                 *len = max_hole_size;
985         return ret;
986 }
987
988 static int btrfs_free_dev_extent(struct btrfs_trans_handle *trans,
989                           struct btrfs_device *device,
990                           u64 start)
991 {
992         int ret;
993         struct btrfs_path *path;
994         struct btrfs_root *root = device->dev_root;
995         struct btrfs_key key;
996         struct btrfs_key found_key;
997         struct extent_buffer *leaf = NULL;
998         struct btrfs_dev_extent *extent = NULL;
999
1000         path = btrfs_alloc_path();
1001         if (!path)
1002                 return -ENOMEM;
1003
1004         key.objectid = device->devid;
1005         key.offset = start;
1006         key.type = BTRFS_DEV_EXTENT_KEY;
1007 again:
1008         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1009         if (ret > 0) {
1010                 ret = btrfs_previous_item(root, path, key.objectid,
1011                                           BTRFS_DEV_EXTENT_KEY);
1012                 if (ret)
1013                         goto out;
1014                 leaf = path->nodes[0];
1015                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
1016                 extent = btrfs_item_ptr(leaf, path->slots[0],
1017                                         struct btrfs_dev_extent);
1018                 BUG_ON(found_key.offset > start || found_key.offset +
1019                        btrfs_dev_extent_length(leaf, extent) < start);
1020                 key = found_key;
1021                 btrfs_release_path(path);
1022                 goto again;
1023         } else if (ret == 0) {
1024                 leaf = path->nodes[0];
1025                 extent = btrfs_item_ptr(leaf, path->slots[0],
1026                                         struct btrfs_dev_extent);
1027         }
1028         BUG_ON(ret);
1029
1030         if (device->bytes_used > 0) {
1031                 u64 len = btrfs_dev_extent_length(leaf, extent);
1032                 device->bytes_used -= len;
1033                 spin_lock(&root->fs_info->free_chunk_lock);
1034                 root->fs_info->free_chunk_space += len;
1035                 spin_unlock(&root->fs_info->free_chunk_lock);
1036         }
1037         ret = btrfs_del_item(trans, root, path);
1038
1039 out:
1040         btrfs_free_path(path);
1041         return ret;
1042 }
1043
1044 int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans,
1045                            struct btrfs_device *device,
1046                            u64 chunk_tree, u64 chunk_objectid,
1047                            u64 chunk_offset, u64 start, u64 num_bytes)
1048 {
1049         int ret;
1050         struct btrfs_path *path;
1051         struct btrfs_root *root = device->dev_root;
1052         struct btrfs_dev_extent *extent;
1053         struct extent_buffer *leaf;
1054         struct btrfs_key key;
1055
1056         WARN_ON(!device->in_fs_metadata);
1057         path = btrfs_alloc_path();
1058         if (!path)
1059                 return -ENOMEM;
1060
1061         key.objectid = device->devid;
1062         key.offset = start;
1063         key.type = BTRFS_DEV_EXTENT_KEY;
1064         ret = btrfs_insert_empty_item(trans, root, path, &key,
1065                                       sizeof(*extent));
1066         BUG_ON(ret);
1067
1068         leaf = path->nodes[0];
1069         extent = btrfs_item_ptr(leaf, path->slots[0],
1070                                 struct btrfs_dev_extent);
1071         btrfs_set_dev_extent_chunk_tree(leaf, extent, chunk_tree);
1072         btrfs_set_dev_extent_chunk_objectid(leaf, extent, chunk_objectid);
1073         btrfs_set_dev_extent_chunk_offset(leaf, extent, chunk_offset);
1074
1075         write_extent_buffer(leaf, root->fs_info->chunk_tree_uuid,
1076                     (unsigned long)btrfs_dev_extent_chunk_tree_uuid(extent),
1077                     BTRFS_UUID_SIZE);
1078
1079         btrfs_set_dev_extent_length(leaf, extent, num_bytes);
1080         btrfs_mark_buffer_dirty(leaf);
1081         btrfs_free_path(path);
1082         return ret;
1083 }
1084
1085 static noinline int find_next_chunk(struct btrfs_root *root,
1086                                     u64 objectid, u64 *offset)
1087 {
1088         struct btrfs_path *path;
1089         int ret;
1090         struct btrfs_key key;
1091         struct btrfs_chunk *chunk;
1092         struct btrfs_key found_key;
1093
1094         path = btrfs_alloc_path();
1095         if (!path)
1096                 return -ENOMEM;
1097
1098         key.objectid = objectid;
1099         key.offset = (u64)-1;
1100         key.type = BTRFS_CHUNK_ITEM_KEY;
1101
1102         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1103         if (ret < 0)
1104                 goto error;
1105
1106         BUG_ON(ret == 0);
1107
1108         ret = btrfs_previous_item(root, path, 0, BTRFS_CHUNK_ITEM_KEY);
1109         if (ret) {
1110                 *offset = 0;
1111         } else {
1112                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
1113                                       path->slots[0]);
1114                 if (found_key.objectid != objectid)
1115                         *offset = 0;
1116                 else {
1117                         chunk = btrfs_item_ptr(path->nodes[0], path->slots[0],
1118                                                struct btrfs_chunk);
1119                         *offset = found_key.offset +
1120                                 btrfs_chunk_length(path->nodes[0], chunk);
1121                 }
1122         }
1123         ret = 0;
1124 error:
1125         btrfs_free_path(path);
1126         return ret;
1127 }
1128
1129 static noinline int find_next_devid(struct btrfs_root *root, u64 *objectid)
1130 {
1131         int ret;
1132         struct btrfs_key key;
1133         struct btrfs_key found_key;
1134         struct btrfs_path *path;
1135
1136         root = root->fs_info->chunk_root;
1137
1138         path = btrfs_alloc_path();
1139         if (!path)
1140                 return -ENOMEM;
1141
1142         key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
1143         key.type = BTRFS_DEV_ITEM_KEY;
1144         key.offset = (u64)-1;
1145
1146         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1147         if (ret < 0)
1148                 goto error;
1149
1150         BUG_ON(ret == 0);
1151
1152         ret = btrfs_previous_item(root, path, BTRFS_DEV_ITEMS_OBJECTID,
1153                                   BTRFS_DEV_ITEM_KEY);
1154         if (ret) {
1155                 *objectid = 1;
1156         } else {
1157                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
1158                                       path->slots[0]);
1159                 *objectid = found_key.offset + 1;
1160         }
1161         ret = 0;
1162 error:
1163         btrfs_free_path(path);
1164         return ret;
1165 }
1166
1167 /*
1168  * the device information is stored in the chunk root
1169  * the btrfs_device struct should be fully filled in
1170  */
1171 int btrfs_add_device(struct btrfs_trans_handle *trans,
1172                      struct btrfs_root *root,
1173                      struct btrfs_device *device)
1174 {
1175         int ret;
1176         struct btrfs_path *path;
1177         struct btrfs_dev_item *dev_item;
1178         struct extent_buffer *leaf;
1179         struct btrfs_key key;
1180         unsigned long ptr;
1181
1182         root = root->fs_info->chunk_root;
1183
1184         path = btrfs_alloc_path();
1185         if (!path)
1186                 return -ENOMEM;
1187
1188         key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
1189         key.type = BTRFS_DEV_ITEM_KEY;
1190         key.offset = device->devid;
1191
1192         ret = btrfs_insert_empty_item(trans, root, path, &key,
1193                                       sizeof(*dev_item));
1194         if (ret)
1195                 goto out;
1196
1197         leaf = path->nodes[0];
1198         dev_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_item);
1199
1200         btrfs_set_device_id(leaf, dev_item, device->devid);
1201         btrfs_set_device_generation(leaf, dev_item, 0);
1202         btrfs_set_device_type(leaf, dev_item, device->type);
1203         btrfs_set_device_io_align(leaf, dev_item, device->io_align);
1204         btrfs_set_device_io_width(leaf, dev_item, device->io_width);
1205         btrfs_set_device_sector_size(leaf, dev_item, device->sector_size);
1206         btrfs_set_device_total_bytes(leaf, dev_item, device->total_bytes);
1207         btrfs_set_device_bytes_used(leaf, dev_item, device->bytes_used);
1208         btrfs_set_device_group(leaf, dev_item, 0);
1209         btrfs_set_device_seek_speed(leaf, dev_item, 0);
1210         btrfs_set_device_bandwidth(leaf, dev_item, 0);
1211         btrfs_set_device_start_offset(leaf, dev_item, 0);
1212
1213         ptr = (unsigned long)btrfs_device_uuid(dev_item);
1214         write_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE);
1215         ptr = (unsigned long)btrfs_device_fsid(dev_item);
1216         write_extent_buffer(leaf, root->fs_info->fsid, ptr, BTRFS_UUID_SIZE);
1217         btrfs_mark_buffer_dirty(leaf);
1218
1219         ret = 0;
1220 out:
1221         btrfs_free_path(path);
1222         return ret;
1223 }
1224
1225 static int btrfs_rm_dev_item(struct btrfs_root *root,
1226                              struct btrfs_device *device)
1227 {
1228         int ret;
1229         struct btrfs_path *path;
1230         struct btrfs_key key;
1231         struct btrfs_trans_handle *trans;
1232
1233         root = root->fs_info->chunk_root;
1234
1235         path = btrfs_alloc_path();
1236         if (!path)
1237                 return -ENOMEM;
1238
1239         trans = btrfs_start_transaction(root, 0);
1240         if (IS_ERR(trans)) {
1241                 btrfs_free_path(path);
1242                 return PTR_ERR(trans);
1243         }
1244         key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
1245         key.type = BTRFS_DEV_ITEM_KEY;
1246         key.offset = device->devid;
1247         lock_chunks(root);
1248
1249         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1250         if (ret < 0)
1251                 goto out;
1252
1253         if (ret > 0) {
1254                 ret = -ENOENT;
1255                 goto out;
1256         }
1257
1258         ret = btrfs_del_item(trans, root, path);
1259         if (ret)
1260                 goto out;
1261 out:
1262         btrfs_free_path(path);
1263         unlock_chunks(root);
1264         btrfs_commit_transaction(trans, root);
1265         return ret;
1266 }
1267
1268 int btrfs_rm_device(struct btrfs_root *root, char *device_path)
1269 {
1270         struct btrfs_device *device;
1271         struct btrfs_device *next_device;
1272         struct block_device *bdev;
1273         struct buffer_head *bh = NULL;
1274         struct btrfs_super_block *disk_super;
1275         struct btrfs_fs_devices *cur_devices;
1276         u64 all_avail;
1277         u64 devid;
1278         u64 num_devices;
1279         u8 *dev_uuid;
1280         int ret = 0;
1281         bool clear_super = false;
1282
1283         mutex_lock(&uuid_mutex);
1284
1285         all_avail = root->fs_info->avail_data_alloc_bits |
1286                 root->fs_info->avail_system_alloc_bits |
1287                 root->fs_info->avail_metadata_alloc_bits;
1288
1289         if ((all_avail & BTRFS_BLOCK_GROUP_RAID10) &&
1290             root->fs_info->fs_devices->num_devices <= 4) {
1291                 printk(KERN_ERR "btrfs: unable to go below four devices "
1292                        "on raid10\n");
1293                 ret = -EINVAL;
1294                 goto out;
1295         }
1296
1297         if ((all_avail & BTRFS_BLOCK_GROUP_RAID1) &&
1298             root->fs_info->fs_devices->num_devices <= 2) {
1299                 printk(KERN_ERR "btrfs: unable to go below two "
1300                        "devices on raid1\n");
1301                 ret = -EINVAL;
1302                 goto out;
1303         }
1304
1305         if (strcmp(device_path, "missing") == 0) {
1306                 struct list_head *devices;
1307                 struct btrfs_device *tmp;
1308
1309                 device = NULL;
1310                 devices = &root->fs_info->fs_devices->devices;
1311                 /*
1312                  * It is safe to read the devices since the volume_mutex
1313                  * is held.
1314                  */
1315                 list_for_each_entry(tmp, devices, dev_list) {
1316                         if (tmp->in_fs_metadata && !tmp->bdev) {
1317                                 device = tmp;
1318                                 break;
1319                         }
1320                 }
1321                 bdev = NULL;
1322                 bh = NULL;
1323                 disk_super = NULL;
1324                 if (!device) {
1325                         printk(KERN_ERR "btrfs: no missing devices found to "
1326                                "remove\n");
1327                         goto out;
1328                 }
1329         } else {
1330                 bdev = blkdev_get_by_path(device_path, FMODE_READ | FMODE_EXCL,
1331                                           root->fs_info->bdev_holder);
1332                 if (IS_ERR(bdev)) {
1333                         ret = PTR_ERR(bdev);
1334                         goto out;
1335                 }
1336
1337                 set_blocksize(bdev, 4096);
1338                 bh = btrfs_read_dev_super(bdev);
1339                 if (!bh) {
1340                         ret = -EINVAL;
1341                         goto error_close;
1342                 }
1343                 disk_super = (struct btrfs_super_block *)bh->b_data;
1344                 devid = btrfs_stack_device_id(&disk_super->dev_item);
1345                 dev_uuid = disk_super->dev_item.uuid;
1346                 device = btrfs_find_device(root, devid, dev_uuid,
1347                                            disk_super->fsid);
1348                 if (!device) {
1349                         ret = -ENOENT;
1350                         goto error_brelse;
1351                 }
1352         }
1353
1354         if (device->writeable && root->fs_info->fs_devices->rw_devices == 1) {
1355                 printk(KERN_ERR "btrfs: unable to remove the only writeable "
1356                        "device\n");
1357                 ret = -EINVAL;
1358                 goto error_brelse;
1359         }
1360
1361         if (device->writeable) {
1362                 lock_chunks(root);
1363                 list_del_init(&device->dev_alloc_list);
1364                 unlock_chunks(root);
1365                 root->fs_info->fs_devices->rw_devices--;
1366                 clear_super = true;
1367         }
1368
1369         ret = btrfs_shrink_device(device, 0);
1370         if (ret)
1371                 goto error_undo;
1372
1373         ret = btrfs_rm_dev_item(root->fs_info->chunk_root, device);
1374         if (ret)
1375                 goto error_undo;
1376
1377         spin_lock(&root->fs_info->free_chunk_lock);
1378         root->fs_info->free_chunk_space = device->total_bytes -
1379                 device->bytes_used;
1380         spin_unlock(&root->fs_info->free_chunk_lock);
1381
1382         device->in_fs_metadata = 0;
1383         btrfs_scrub_cancel_dev(root, device);
1384
1385         /*
1386          * the device list mutex makes sure that we don't change
1387          * the device list while someone else is writing out all
1388          * the device supers.
1389          */
1390
1391         cur_devices = device->fs_devices;
1392         mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
1393         list_del_rcu(&device->dev_list);
1394
1395         device->fs_devices->num_devices--;
1396
1397         if (device->missing)
1398                 root->fs_info->fs_devices->missing_devices--;
1399
1400         next_device = list_entry(root->fs_info->fs_devices->devices.next,
1401                                  struct btrfs_device, dev_list);
1402         if (device->bdev == root->fs_info->sb->s_bdev)
1403                 root->fs_info->sb->s_bdev = next_device->bdev;
1404         if (device->bdev == root->fs_info->fs_devices->latest_bdev)
1405                 root->fs_info->fs_devices->latest_bdev = next_device->bdev;
1406
1407         if (device->bdev)
1408                 device->fs_devices->open_devices--;
1409
1410         call_rcu(&device->rcu, free_device);
1411         mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
1412
1413         num_devices = btrfs_super_num_devices(root->fs_info->super_copy) - 1;
1414         btrfs_set_super_num_devices(root->fs_info->super_copy, num_devices);
1415
1416         if (cur_devices->open_devices == 0) {
1417                 struct btrfs_fs_devices *fs_devices;
1418                 fs_devices = root->fs_info->fs_devices;
1419                 while (fs_devices) {
1420                         if (fs_devices->seed == cur_devices)
1421                                 break;
1422                         fs_devices = fs_devices->seed;
1423                 }
1424                 fs_devices->seed = cur_devices->seed;
1425                 cur_devices->seed = NULL;
1426                 lock_chunks(root);
1427                 __btrfs_close_devices(cur_devices);
1428                 unlock_chunks(root);
1429                 free_fs_devices(cur_devices);
1430         }
1431
1432         /*
1433          * at this point, the device is zero sized.  We want to
1434          * remove it from the devices list and zero out the old super
1435          */
1436         if (clear_super) {
1437                 /* make sure this device isn't detected as part of
1438                  * the FS anymore
1439                  */
1440                 memset(&disk_super->magic, 0, sizeof(disk_super->magic));
1441                 set_buffer_dirty(bh);
1442                 sync_dirty_buffer(bh);
1443         }
1444
1445         ret = 0;
1446
1447 error_brelse:
1448         brelse(bh);
1449 error_close:
1450         if (bdev)
1451                 blkdev_put(bdev, FMODE_READ | FMODE_EXCL);
1452 out:
1453         mutex_unlock(&uuid_mutex);
1454         return ret;
1455 error_undo:
1456         if (device->writeable) {
1457                 lock_chunks(root);
1458                 list_add(&device->dev_alloc_list,
1459                          &root->fs_info->fs_devices->alloc_list);
1460                 unlock_chunks(root);
1461                 root->fs_info->fs_devices->rw_devices++;
1462         }
1463         goto error_brelse;
1464 }
1465
1466 /*
1467  * does all the dirty work required for changing file system's UUID.
1468  */
1469 static int btrfs_prepare_sprout(struct btrfs_root *root)
1470 {
1471         struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices;
1472         struct btrfs_fs_devices *old_devices;
1473         struct btrfs_fs_devices *seed_devices;
1474         struct btrfs_super_block *disk_super = root->fs_info->super_copy;
1475         struct btrfs_device *device;
1476         u64 super_flags;
1477
1478         BUG_ON(!mutex_is_locked(&uuid_mutex));
1479         if (!fs_devices->seeding)
1480                 return -EINVAL;
1481
1482         seed_devices = kzalloc(sizeof(*fs_devices), GFP_NOFS);
1483         if (!seed_devices)
1484                 return -ENOMEM;
1485
1486         old_devices = clone_fs_devices(fs_devices);
1487         if (IS_ERR(old_devices)) {
1488                 kfree(seed_devices);
1489                 return PTR_ERR(old_devices);
1490         }
1491
1492         list_add(&old_devices->list, &fs_uuids);
1493
1494         memcpy(seed_devices, fs_devices, sizeof(*seed_devices));
1495         seed_devices->opened = 1;
1496         INIT_LIST_HEAD(&seed_devices->devices);
1497         INIT_LIST_HEAD(&seed_devices->alloc_list);
1498         mutex_init(&seed_devices->device_list_mutex);
1499
1500         mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
1501         list_splice_init_rcu(&fs_devices->devices, &seed_devices->devices,
1502                               synchronize_rcu);
1503         mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
1504
1505         list_splice_init(&fs_devices->alloc_list, &seed_devices->alloc_list);
1506         list_for_each_entry(device, &seed_devices->devices, dev_list) {
1507                 device->fs_devices = seed_devices;
1508         }
1509
1510         fs_devices->seeding = 0;
1511         fs_devices->num_devices = 0;
1512         fs_devices->open_devices = 0;
1513         fs_devices->seed = seed_devices;
1514
1515         generate_random_uuid(fs_devices->fsid);
1516         memcpy(root->fs_info->fsid, fs_devices->fsid, BTRFS_FSID_SIZE);
1517         memcpy(disk_super->fsid, fs_devices->fsid, BTRFS_FSID_SIZE);
1518         super_flags = btrfs_super_flags(disk_super) &
1519                       ~BTRFS_SUPER_FLAG_SEEDING;
1520         btrfs_set_super_flags(disk_super, super_flags);
1521
1522         return 0;
1523 }
1524
1525 /*
1526  * strore the expected generation for seed devices in device items.
1527  */
1528 static int btrfs_finish_sprout(struct btrfs_trans_handle *trans,
1529                                struct btrfs_root *root)
1530 {
1531         struct btrfs_path *path;
1532         struct extent_buffer *leaf;
1533         struct btrfs_dev_item *dev_item;
1534         struct btrfs_device *device;
1535         struct btrfs_key key;
1536         u8 fs_uuid[BTRFS_UUID_SIZE];
1537         u8 dev_uuid[BTRFS_UUID_SIZE];
1538         u64 devid;
1539         int ret;
1540
1541         path = btrfs_alloc_path();
1542         if (!path)
1543                 return -ENOMEM;
1544
1545         root = root->fs_info->chunk_root;
1546         key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
1547         key.offset = 0;
1548         key.type = BTRFS_DEV_ITEM_KEY;
1549
1550         while (1) {
1551                 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
1552                 if (ret < 0)
1553                         goto error;
1554
1555                 leaf = path->nodes[0];
1556 next_slot:
1557                 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
1558                         ret = btrfs_next_leaf(root, path);
1559                         if (ret > 0)
1560                                 break;
1561                         if (ret < 0)
1562                                 goto error;
1563                         leaf = path->nodes[0];
1564                         btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1565                         btrfs_release_path(path);
1566                         continue;
1567                 }
1568
1569                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1570                 if (key.objectid != BTRFS_DEV_ITEMS_OBJECTID ||
1571                     key.type != BTRFS_DEV_ITEM_KEY)
1572                         break;
1573
1574                 dev_item = btrfs_item_ptr(leaf, path->slots[0],
1575                                           struct btrfs_dev_item);
1576                 devid = btrfs_device_id(leaf, dev_item);
1577                 read_extent_buffer(leaf, dev_uuid,
1578                                    (unsigned long)btrfs_device_uuid(dev_item),
1579                                    BTRFS_UUID_SIZE);
1580                 read_extent_buffer(leaf, fs_uuid,
1581                                    (unsigned long)btrfs_device_fsid(dev_item),
1582                                    BTRFS_UUID_SIZE);
1583                 device = btrfs_find_device(root, devid, dev_uuid, fs_uuid);
1584                 BUG_ON(!device);
1585
1586                 if (device->fs_devices->seeding) {
1587                         btrfs_set_device_generation(leaf, dev_item,
1588                                                     device->generation);
1589                         btrfs_mark_buffer_dirty(leaf);
1590                 }
1591
1592                 path->slots[0]++;
1593                 goto next_slot;
1594         }
1595         ret = 0;
1596 error:
1597         btrfs_free_path(path);
1598         return ret;
1599 }
1600
1601 int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
1602 {
1603         struct request_queue *q;
1604         struct btrfs_trans_handle *trans;
1605         struct btrfs_device *device;
1606         struct block_device *bdev;
1607         struct list_head *devices;
1608         struct super_block *sb = root->fs_info->sb;
1609         u64 total_bytes;
1610         int seeding_dev = 0;
1611         int ret = 0;
1612
1613         if ((sb->s_flags & MS_RDONLY) && !root->fs_info->fs_devices->seeding)
1614                 return -EINVAL;
1615
1616         bdev = blkdev_get_by_path(device_path, FMODE_WRITE | FMODE_EXCL,
1617                                   root->fs_info->bdev_holder);
1618         if (IS_ERR(bdev))
1619                 return PTR_ERR(bdev);
1620
1621         if (root->fs_info->fs_devices->seeding) {
1622                 seeding_dev = 1;
1623                 down_write(&sb->s_umount);
1624                 mutex_lock(&uuid_mutex);
1625         }
1626
1627         filemap_write_and_wait(bdev->bd_inode->i_mapping);
1628
1629         devices = &root->fs_info->fs_devices->devices;
1630         /*
1631          * we have the volume lock, so we don't need the extra
1632          * device list mutex while reading the list here.
1633          */
1634         list_for_each_entry(device, devices, dev_list) {
1635                 if (device->bdev == bdev) {
1636                         ret = -EEXIST;
1637                         goto error;
1638                 }
1639         }
1640
1641         device = kzalloc(sizeof(*device), GFP_NOFS);
1642         if (!device) {
1643                 /* we can safely leave the fs_devices entry around */
1644                 ret = -ENOMEM;
1645                 goto error;
1646         }
1647
1648         device->name = kstrdup(device_path, GFP_NOFS);
1649         if (!device->name) {
1650                 kfree(device);
1651                 ret = -ENOMEM;
1652                 goto error;
1653         }
1654
1655         ret = find_next_devid(root, &device->devid);
1656         if (ret) {
1657                 kfree(device->name);
1658                 kfree(device);
1659                 goto error;
1660         }
1661
1662         trans = btrfs_start_transaction(root, 0);
1663         if (IS_ERR(trans)) {
1664                 kfree(device->name);
1665                 kfree(device);
1666                 ret = PTR_ERR(trans);
1667                 goto error;
1668         }
1669
1670         lock_chunks(root);
1671
1672         q = bdev_get_queue(bdev);
1673         if (blk_queue_discard(q))
1674                 device->can_discard = 1;
1675         device->writeable = 1;
1676         device->work.func = pending_bios_fn;
1677         generate_random_uuid(device->uuid);
1678         spin_lock_init(&device->io_lock);
1679         device->generation = trans->transid;
1680         device->io_width = root->sectorsize;
1681         device->io_align = root->sectorsize;
1682         device->sector_size = root->sectorsize;
1683         device->total_bytes = i_size_read(bdev->bd_inode);
1684         device->disk_total_bytes = device->total_bytes;
1685         device->dev_root = root->fs_info->dev_root;
1686         device->bdev = bdev;
1687         device->in_fs_metadata = 1;
1688         device->mode = FMODE_EXCL;
1689         set_blocksize(device->bdev, 4096);
1690
1691         if (seeding_dev) {
1692                 sb->s_flags &= ~MS_RDONLY;
1693                 ret = btrfs_prepare_sprout(root);
1694                 BUG_ON(ret);
1695         }
1696
1697         device->fs_devices = root->fs_info->fs_devices;
1698
1699         /*
1700          * we don't want write_supers to jump in here with our device
1701          * half setup
1702          */
1703         mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
1704         list_add_rcu(&device->dev_list, &root->fs_info->fs_devices->devices);
1705         list_add(&device->dev_alloc_list,
1706                  &root->fs_info->fs_devices->alloc_list);
1707         root->fs_info->fs_devices->num_devices++;
1708         root->fs_info->fs_devices->open_devices++;
1709         root->fs_info->fs_devices->rw_devices++;
1710         if (device->can_discard)
1711                 root->fs_info->fs_devices->num_can_discard++;
1712         root->fs_info->fs_devices->total_rw_bytes += device->total_bytes;
1713
1714         spin_lock(&root->fs_info->free_chunk_lock);
1715         root->fs_info->free_chunk_space += device->total_bytes;
1716         spin_unlock(&root->fs_info->free_chunk_lock);
1717
1718         if (!blk_queue_nonrot(bdev_get_queue(bdev)))
1719                 root->fs_info->fs_devices->rotating = 1;
1720
1721         total_bytes = btrfs_super_total_bytes(root->fs_info->super_copy);
1722         btrfs_set_super_total_bytes(root->fs_info->super_copy,
1723                                     total_bytes + device->total_bytes);
1724
1725         total_bytes = btrfs_super_num_devices(root->fs_info->super_copy);
1726         btrfs_set_super_num_devices(root->fs_info->super_copy,
1727                                     total_bytes + 1);
1728         mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
1729
1730         if (seeding_dev) {
1731                 ret = init_first_rw_device(trans, root, device);
1732                 BUG_ON(ret);
1733                 ret = btrfs_finish_sprout(trans, root);
1734                 BUG_ON(ret);
1735         } else {
1736                 ret = btrfs_add_device(trans, root, device);
1737         }
1738
1739         /*
1740          * we've got more storage, clear any full flags on the space
1741          * infos
1742          */
1743         btrfs_clear_space_info_full(root->fs_info);
1744
1745         unlock_chunks(root);
1746         btrfs_commit_transaction(trans, root);
1747
1748         if (seeding_dev) {
1749                 mutex_unlock(&uuid_mutex);
1750                 up_write(&sb->s_umount);
1751
1752                 ret = btrfs_relocate_sys_chunks(root);
1753                 BUG_ON(ret);
1754         }
1755
1756         return ret;
1757 error:
1758         blkdev_put(bdev, FMODE_EXCL);
1759         if (seeding_dev) {
1760                 mutex_unlock(&uuid_mutex);
1761                 up_write(&sb->s_umount);
1762         }
1763         return ret;
1764 }
1765
1766 static noinline int btrfs_update_device(struct btrfs_trans_handle *trans,
1767                                         struct btrfs_device *device)
1768 {
1769         int ret;
1770         struct btrfs_path *path;
1771         struct btrfs_root *root;
1772         struct btrfs_dev_item *dev_item;
1773         struct extent_buffer *leaf;
1774         struct btrfs_key key;
1775
1776         root = device->dev_root->fs_info->chunk_root;
1777
1778         path = btrfs_alloc_path();
1779         if (!path)
1780                 return -ENOMEM;
1781
1782         key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
1783         key.type = BTRFS_DEV_ITEM_KEY;
1784         key.offset = device->devid;
1785
1786         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
1787         if (ret < 0)
1788                 goto out;
1789
1790         if (ret > 0) {
1791                 ret = -ENOENT;
1792                 goto out;
1793         }
1794
1795         leaf = path->nodes[0];
1796         dev_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_item);
1797
1798         btrfs_set_device_id(leaf, dev_item, device->devid);
1799         btrfs_set_device_type(leaf, dev_item, device->type);
1800         btrfs_set_device_io_align(leaf, dev_item, device->io_align);
1801         btrfs_set_device_io_width(leaf, dev_item, device->io_width);
1802         btrfs_set_device_sector_size(leaf, dev_item, device->sector_size);
1803         btrfs_set_device_total_bytes(leaf, dev_item, device->disk_total_bytes);
1804         btrfs_set_device_bytes_used(leaf, dev_item, device->bytes_used);
1805         btrfs_mark_buffer_dirty(leaf);
1806
1807 out:
1808         btrfs_free_path(path);
1809         return ret;
1810 }
1811
1812 static int __btrfs_grow_device(struct btrfs_trans_handle *trans,
1813                       struct btrfs_device *device, u64 new_size)
1814 {
1815         struct btrfs_super_block *super_copy =
1816                 device->dev_root->fs_info->super_copy;
1817         u64 old_total = btrfs_super_total_bytes(super_copy);
1818         u64 diff = new_size - device->total_bytes;
1819
1820         if (!device->writeable)
1821                 return -EACCES;
1822         if (new_size <= device->total_bytes)
1823                 return -EINVAL;
1824
1825         btrfs_set_super_total_bytes(super_copy, old_total + diff);
1826         device->fs_devices->total_rw_bytes += diff;
1827
1828         device->total_bytes = new_size;
1829         device->disk_total_bytes = new_size;
1830         btrfs_clear_space_info_full(device->dev_root->fs_info);
1831
1832         return btrfs_update_device(trans, device);
1833 }
1834
1835 int btrfs_grow_device(struct btrfs_trans_handle *trans,
1836                       struct btrfs_device *device, u64 new_size)
1837 {
1838         int ret;
1839         lock_chunks(device->dev_root);
1840         ret = __btrfs_grow_device(trans, device, new_size);
1841         unlock_chunks(device->dev_root);
1842         return ret;
1843 }
1844
1845 static int btrfs_free_chunk(struct btrfs_trans_handle *trans,
1846                             struct btrfs_root *root,
1847                             u64 chunk_tree, u64 chunk_objectid,
1848                             u64 chunk_offset)
1849 {
1850         int ret;
1851         struct btrfs_path *path;
1852         struct btrfs_key key;
1853
1854         root = root->fs_info->chunk_root;
1855         path = btrfs_alloc_path();
1856         if (!path)
1857                 return -ENOMEM;
1858
1859         key.objectid = chunk_objectid;
1860         key.offset = chunk_offset;
1861         key.type = BTRFS_CHUNK_ITEM_KEY;
1862
1863         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1864         BUG_ON(ret);
1865
1866         ret = btrfs_del_item(trans, root, path);
1867
1868         btrfs_free_path(path);
1869         return ret;
1870 }
1871
1872 static int btrfs_del_sys_chunk(struct btrfs_root *root, u64 chunk_objectid, u64
1873                         chunk_offset)
1874 {
1875         struct btrfs_super_block *super_copy = root->fs_info->super_copy;
1876         struct btrfs_disk_key *disk_key;
1877         struct btrfs_chunk *chunk;
1878         u8 *ptr;
1879         int ret = 0;
1880         u32 num_stripes;
1881         u32 array_size;
1882         u32 len = 0;
1883         u32 cur;
1884         struct btrfs_key key;
1885
1886         array_size = btrfs_super_sys_array_size(super_copy);
1887
1888         ptr = super_copy->sys_chunk_array;
1889         cur = 0;
1890
1891         while (cur < array_size) {
1892                 disk_key = (struct btrfs_disk_key *)ptr;
1893                 btrfs_disk_key_to_cpu(&key, disk_key);
1894
1895                 len = sizeof(*disk_key);
1896
1897                 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
1898                         chunk = (struct btrfs_chunk *)(ptr + len);
1899                         num_stripes = btrfs_stack_chunk_num_stripes(chunk);
1900                         len += btrfs_chunk_item_size(num_stripes);
1901                 } else {
1902                         ret = -EIO;
1903                         break;
1904                 }
1905                 if (key.objectid == chunk_objectid &&
1906                     key.offset == chunk_offset) {
1907                         memmove(ptr, ptr + len, array_size - (cur + len));
1908                         array_size -= len;
1909                         btrfs_set_super_sys_array_size(super_copy, array_size);
1910                 } else {
1911                         ptr += len;
1912                         cur += len;
1913                 }
1914         }
1915         return ret;
1916 }
1917
1918 static int btrfs_relocate_chunk(struct btrfs_root *root,
1919                          u64 chunk_tree, u64 chunk_objectid,
1920                          u64 chunk_offset)
1921 {
1922         struct extent_map_tree *em_tree;
1923         struct btrfs_root *extent_root;
1924         struct btrfs_trans_handle *trans;
1925         struct extent_map *em;
1926         struct map_lookup *map;
1927         int ret;
1928         int i;
1929
1930         root = root->fs_info->chunk_root;
1931         extent_root = root->fs_info->extent_root;
1932         em_tree = &root->fs_info->mapping_tree.map_tree;
1933
1934         ret = btrfs_can_relocate(extent_root, chunk_offset);
1935         if (ret)
1936                 return -ENOSPC;
1937
1938         /* step one, relocate all the extents inside this chunk */
1939         ret = btrfs_relocate_block_group(extent_root, chunk_offset);
1940         if (ret)
1941                 return ret;
1942
1943         trans = btrfs_start_transaction(root, 0);
1944         BUG_ON(IS_ERR(trans));
1945
1946         lock_chunks(root);
1947
1948         /*
1949          * step two, delete the device extents and the
1950          * chunk tree entries
1951          */
1952         read_lock(&em_tree->lock);
1953         em = lookup_extent_mapping(em_tree, chunk_offset, 1);
1954         read_unlock(&em_tree->lock);
1955
1956         BUG_ON(em->start > chunk_offset ||
1957                em->start + em->len < chunk_offset);
1958         map = (struct map_lookup *)em->bdev;
1959
1960         for (i = 0; i < map->num_stripes; i++) {
1961                 ret = btrfs_free_dev_extent(trans, map->stripes[i].dev,
1962                                             map->stripes[i].physical);
1963                 BUG_ON(ret);
1964
1965                 if (map->stripes[i].dev) {
1966                         ret = btrfs_update_device(trans, map->stripes[i].dev);
1967                         BUG_ON(ret);
1968                 }
1969         }
1970         ret = btrfs_free_chunk(trans, root, chunk_tree, chunk_objectid,
1971                                chunk_offset);
1972
1973         BUG_ON(ret);
1974
1975         trace_btrfs_chunk_free(root, map, chunk_offset, em->len);
1976
1977         if (map->type & BTRFS_BLOCK_GROUP_SYSTEM) {
1978                 ret = btrfs_del_sys_chunk(root, chunk_objectid, chunk_offset);
1979                 BUG_ON(ret);
1980         }
1981
1982         ret = btrfs_remove_block_group(trans, extent_root, chunk_offset);
1983         BUG_ON(ret);
1984
1985         write_lock(&em_tree->lock);
1986         remove_extent_mapping(em_tree, em);
1987         write_unlock(&em_tree->lock);
1988
1989         kfree(map);
1990         em->bdev = NULL;
1991
1992         /* once for the tree */
1993         free_extent_map(em);
1994         /* once for us */
1995         free_extent_map(em);
1996
1997         unlock_chunks(root);
1998         btrfs_end_transaction(trans, root);
1999         return 0;
2000 }
2001
2002 static int btrfs_relocate_sys_chunks(struct btrfs_root *root)
2003 {
2004         struct btrfs_root *chunk_root = root->fs_info->chunk_root;
2005         struct btrfs_path *path;
2006         struct extent_buffer *leaf;
2007         struct btrfs_chunk *chunk;
2008         struct btrfs_key key;
2009         struct btrfs_key found_key;
2010         u64 chunk_tree = chunk_root->root_key.objectid;
2011         u64 chunk_type;
2012         bool retried = false;
2013         int failed = 0;
2014         int ret;
2015
2016         path = btrfs_alloc_path();
2017         if (!path)
2018                 return -ENOMEM;
2019
2020 again:
2021         key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
2022         key.offset = (u64)-1;
2023         key.type = BTRFS_CHUNK_ITEM_KEY;
2024
2025         while (1) {
2026                 ret = btrfs_search_slot(NULL, chunk_root, &key, path, 0, 0);
2027                 if (ret < 0)
2028                         goto error;
2029                 BUG_ON(ret == 0);
2030
2031                 ret = btrfs_previous_item(chunk_root, path, key.objectid,
2032                                           key.type);
2033                 if (ret < 0)
2034                         goto error;
2035                 if (ret > 0)
2036                         break;
2037
2038                 leaf = path->nodes[0];
2039                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
2040
2041                 chunk = btrfs_item_ptr(leaf, path->slots[0],
2042                                        struct btrfs_chunk);
2043                 chunk_type = btrfs_chunk_type(leaf, chunk);
2044                 btrfs_release_path(path);
2045
2046                 if (chunk_type & BTRFS_BLOCK_GROUP_SYSTEM) {
2047                         ret = btrfs_relocate_chunk(chunk_root, chunk_tree,
2048                                                    found_key.objectid,
2049                                                    found_key.offset);
2050                         if (ret == -ENOSPC)
2051                                 failed++;
2052                         else if (ret)
2053                                 BUG();
2054                 }
2055
2056                 if (found_key.offset == 0)
2057                         break;
2058                 key.offset = found_key.offset - 1;
2059         }
2060         ret = 0;
2061         if (failed && !retried) {
2062                 failed = 0;
2063                 retried = true;
2064                 goto again;
2065         } else if (failed && retried) {
2066                 WARN_ON(1);
2067                 ret = -ENOSPC;
2068         }
2069 error:
2070         btrfs_free_path(path);
2071         return ret;
2072 }
2073
2074 static int insert_balance_item(struct btrfs_root *root,
2075                                struct btrfs_balance_control *bctl)
2076 {
2077         struct btrfs_trans_handle *trans;
2078         struct btrfs_balance_item *item;
2079         struct btrfs_disk_balance_args disk_bargs;
2080         struct btrfs_path *path;
2081         struct extent_buffer *leaf;
2082         struct btrfs_key key;
2083         int ret, err;
2084
2085         path = btrfs_alloc_path();
2086         if (!path)
2087                 return -ENOMEM;
2088
2089         trans = btrfs_start_transaction(root, 0);
2090         if (IS_ERR(trans)) {
2091                 btrfs_free_path(path);
2092                 return PTR_ERR(trans);
2093         }
2094
2095         key.objectid = BTRFS_BALANCE_OBJECTID;
2096         key.type = BTRFS_BALANCE_ITEM_KEY;
2097         key.offset = 0;
2098
2099         ret = btrfs_insert_empty_item(trans, root, path, &key,
2100                                       sizeof(*item));
2101         if (ret)
2102                 goto out;
2103
2104         leaf = path->nodes[0];
2105         item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_balance_item);
2106
2107         memset_extent_buffer(leaf, 0, (unsigned long)item, sizeof(*item));
2108
2109         btrfs_cpu_balance_args_to_disk(&disk_bargs, &bctl->data);
2110         btrfs_set_balance_data(leaf, item, &disk_bargs);
2111         btrfs_cpu_balance_args_to_disk(&disk_bargs, &bctl->meta);
2112         btrfs_set_balance_meta(leaf, item, &disk_bargs);
2113         btrfs_cpu_balance_args_to_disk(&disk_bargs, &bctl->sys);
2114         btrfs_set_balance_sys(leaf, item, &disk_bargs);
2115
2116         btrfs_set_balance_flags(leaf, item, bctl->flags);
2117
2118         btrfs_mark_buffer_dirty(leaf);
2119 out:
2120         btrfs_free_path(path);
2121         err = btrfs_commit_transaction(trans, root);
2122         if (err && !ret)
2123                 ret = err;
2124         return ret;
2125 }
2126
2127 static int del_balance_item(struct btrfs_root *root)
2128 {
2129         struct btrfs_trans_handle *trans;
2130         struct btrfs_path *path;
2131         struct btrfs_key key;
2132         int ret, err;
2133
2134         path = btrfs_alloc_path();
2135         if (!path)
2136                 return -ENOMEM;
2137
2138         trans = btrfs_start_transaction(root, 0);
2139         if (IS_ERR(trans)) {
2140                 btrfs_free_path(path);
2141                 return PTR_ERR(trans);
2142         }
2143
2144         key.objectid = BTRFS_BALANCE_OBJECTID;
2145         key.type = BTRFS_BALANCE_ITEM_KEY;
2146         key.offset = 0;
2147
2148         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2149         if (ret < 0)
2150                 goto out;
2151         if (ret > 0) {
2152                 ret = -ENOENT;
2153                 goto out;
2154         }
2155
2156         ret = btrfs_del_item(trans, root, path);
2157 out:
2158         btrfs_free_path(path);
2159         err = btrfs_commit_transaction(trans, root);
2160         if (err && !ret)
2161                 ret = err;
2162         return ret;
2163 }
2164
2165 /*
2166  * This is a heuristic used to reduce the number of chunks balanced on
2167  * resume after balance was interrupted.
2168  */
2169 static void update_balance_args(struct btrfs_balance_control *bctl)
2170 {
2171         /*
2172          * Turn on soft mode for chunk types that were being converted.
2173          */
2174         if (bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT)
2175                 bctl->data.flags |= BTRFS_BALANCE_ARGS_SOFT;
2176         if (bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT)
2177                 bctl->sys.flags |= BTRFS_BALANCE_ARGS_SOFT;
2178         if (bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT)
2179                 bctl->meta.flags |= BTRFS_BALANCE_ARGS_SOFT;
2180
2181         /*
2182          * Turn on usage filter if is not already used.  The idea is
2183          * that chunks that we have already balanced should be
2184          * reasonably full.  Don't do it for chunks that are being
2185          * converted - that will keep us from relocating unconverted
2186          * (albeit full) chunks.
2187          */
2188         if (!(bctl->data.flags & BTRFS_BALANCE_ARGS_USAGE) &&
2189             !(bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT)) {
2190                 bctl->data.flags |= BTRFS_BALANCE_ARGS_USAGE;
2191                 bctl->data.usage = 90;
2192         }
2193         if (!(bctl->sys.flags & BTRFS_BALANCE_ARGS_USAGE) &&
2194             !(bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT)) {
2195                 bctl->sys.flags |= BTRFS_BALANCE_ARGS_USAGE;
2196                 bctl->sys.usage = 90;
2197         }
2198         if (!(bctl->meta.flags & BTRFS_BALANCE_ARGS_USAGE) &&
2199             !(bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT)) {
2200                 bctl->meta.flags |= BTRFS_BALANCE_ARGS_USAGE;
2201                 bctl->meta.usage = 90;
2202         }
2203 }
2204
2205 /*
2206  * Should be called with both balance and volume mutexes held to
2207  * serialize other volume operations (add_dev/rm_dev/resize) with
2208  * restriper.  Same goes for unset_balance_control.
2209  */
2210 static void set_balance_control(struct btrfs_balance_control *bctl)
2211 {
2212         struct btrfs_fs_info *fs_info = bctl->fs_info;
2213
2214         BUG_ON(fs_info->balance_ctl);
2215
2216         spin_lock(&fs_info->balance_lock);
2217         fs_info->balance_ctl = bctl;
2218         spin_unlock(&fs_info->balance_lock);
2219 }
2220
2221 static void unset_balance_control(struct btrfs_fs_info *fs_info)
2222 {
2223         struct btrfs_balance_control *bctl = fs_info->balance_ctl;
2224
2225         BUG_ON(!fs_info->balance_ctl);
2226
2227         spin_lock(&fs_info->balance_lock);
2228         fs_info->balance_ctl = NULL;
2229         spin_unlock(&fs_info->balance_lock);
2230
2231         kfree(bctl);
2232 }
2233
2234 /*
2235  * Balance filters.  Return 1 if chunk should be filtered out
2236  * (should not be balanced).
2237  */
2238 static int chunk_profiles_filter(u64 chunk_profile,
2239                                  struct btrfs_balance_args *bargs)
2240 {
2241         chunk_profile &= BTRFS_BLOCK_GROUP_PROFILE_MASK;
2242
2243         if (chunk_profile == 0)
2244                 chunk_profile = BTRFS_AVAIL_ALLOC_BIT_SINGLE;
2245
2246         if (bargs->profiles & chunk_profile)
2247                 return 0;
2248
2249         return 1;
2250 }
2251
2252 static u64 div_factor_fine(u64 num, int factor)
2253 {
2254         if (factor <= 0)
2255                 return 0;
2256         if (factor >= 100)
2257                 return num;
2258
2259         num *= factor;
2260         do_div(num, 100);
2261         return num;
2262 }
2263
2264 static int chunk_usage_filter(struct btrfs_fs_info *fs_info, u64 chunk_offset,
2265                               struct btrfs_balance_args *bargs)
2266 {
2267         struct btrfs_block_group_cache *cache;
2268         u64 chunk_used, user_thresh;
2269         int ret = 1;
2270
2271         cache = btrfs_lookup_block_group(fs_info, chunk_offset);
2272         chunk_used = btrfs_block_group_used(&cache->item);
2273
2274         user_thresh = div_factor_fine(cache->key.offset, bargs->usage);
2275         if (chunk_used < user_thresh)
2276                 ret = 0;
2277
2278         btrfs_put_block_group(cache);
2279         return ret;
2280 }
2281
2282 static int chunk_devid_filter(struct extent_buffer *leaf,
2283                               struct btrfs_chunk *chunk,
2284                               struct btrfs_balance_args *bargs)
2285 {
2286         struct btrfs_stripe *stripe;
2287         int num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
2288         int i;
2289
2290         for (i = 0; i < num_stripes; i++) {
2291                 stripe = btrfs_stripe_nr(chunk, i);
2292                 if (btrfs_stripe_devid(leaf, stripe) == bargs->devid)
2293                         return 0;
2294         }
2295
2296         return 1;
2297 }
2298
2299 /* [pstart, pend) */
2300 static int chunk_drange_filter(struct extent_buffer *leaf,
2301                                struct btrfs_chunk *chunk,
2302                                u64 chunk_offset,
2303                                struct btrfs_balance_args *bargs)
2304 {
2305         struct btrfs_stripe *stripe;
2306         int num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
2307         u64 stripe_offset;
2308         u64 stripe_length;
2309         int factor;
2310         int i;
2311
2312         if (!(bargs->flags & BTRFS_BALANCE_ARGS_DEVID))
2313                 return 0;
2314
2315         if (btrfs_chunk_type(leaf, chunk) & (BTRFS_BLOCK_GROUP_DUP |
2316              BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10))
2317                 factor = 2;
2318         else
2319                 factor = 1;
2320         factor = num_stripes / factor;
2321
2322         for (i = 0; i < num_stripes; i++) {
2323                 stripe = btrfs_stripe_nr(chunk, i);
2324                 if (btrfs_stripe_devid(leaf, stripe) != bargs->devid)
2325                         continue;
2326
2327                 stripe_offset = btrfs_stripe_offset(leaf, stripe);
2328                 stripe_length = btrfs_chunk_length(leaf, chunk);
2329                 do_div(stripe_length, factor);
2330
2331                 if (stripe_offset < bargs->pend &&
2332                     stripe_offset + stripe_length > bargs->pstart)
2333                         return 0;
2334         }
2335
2336         return 1;
2337 }
2338
2339 /* [vstart, vend) */
2340 static int chunk_vrange_filter(struct extent_buffer *leaf,
2341                                struct btrfs_chunk *chunk,
2342                                u64 chunk_offset,
2343                                struct btrfs_balance_args *bargs)
2344 {
2345         if (chunk_offset < bargs->vend &&
2346             chunk_offset + btrfs_chunk_length(leaf, chunk) > bargs->vstart)
2347                 /* at least part of the chunk is inside this vrange */
2348                 return 0;
2349
2350         return 1;
2351 }
2352
2353 static int chunk_soft_convert_filter(u64 chunk_profile,
2354                                      struct btrfs_balance_args *bargs)
2355 {
2356         if (!(bargs->flags & BTRFS_BALANCE_ARGS_CONVERT))
2357                 return 0;
2358
2359         chunk_profile &= BTRFS_BLOCK_GROUP_PROFILE_MASK;
2360
2361         if (chunk_profile == 0)
2362                 chunk_profile = BTRFS_AVAIL_ALLOC_BIT_SINGLE;
2363
2364         if (bargs->target & chunk_profile)
2365                 return 1;
2366
2367         return 0;
2368 }
2369
2370 static int should_balance_chunk(struct btrfs_root *root,
2371                                 struct extent_buffer *leaf,
2372                                 struct btrfs_chunk *chunk, u64 chunk_offset)
2373 {
2374         struct btrfs_balance_control *bctl = root->fs_info->balance_ctl;
2375         struct btrfs_balance_args *bargs = NULL;
2376         u64 chunk_type = btrfs_chunk_type(leaf, chunk);
2377
2378         /* type filter */
2379         if (!((chunk_type & BTRFS_BLOCK_GROUP_TYPE_MASK) &
2380               (bctl->flags & BTRFS_BALANCE_TYPE_MASK))) {
2381                 return 0;
2382         }
2383
2384         if (chunk_type & BTRFS_BLOCK_GROUP_DATA)
2385                 bargs = &bctl->data;
2386         else if (chunk_type & BTRFS_BLOCK_GROUP_SYSTEM)
2387                 bargs = &bctl->sys;
2388         else if (chunk_type & BTRFS_BLOCK_GROUP_METADATA)
2389                 bargs = &bctl->meta;
2390
2391         /* profiles filter */
2392         if ((bargs->flags & BTRFS_BALANCE_ARGS_PROFILES) &&
2393             chunk_profiles_filter(chunk_type, bargs)) {
2394                 return 0;
2395         }
2396
2397         /* usage filter */
2398         if ((bargs->flags & BTRFS_BALANCE_ARGS_USAGE) &&
2399             chunk_usage_filter(bctl->fs_info, chunk_offset, bargs)) {
2400                 return 0;
2401         }
2402
2403         /* devid filter */
2404         if ((bargs->flags & BTRFS_BALANCE_ARGS_DEVID) &&
2405             chunk_devid_filter(leaf, chunk, bargs)) {
2406                 return 0;
2407         }
2408
2409         /* drange filter, makes sense only with devid filter */
2410         if ((bargs->flags & BTRFS_BALANCE_ARGS_DRANGE) &&
2411             chunk_drange_filter(leaf, chunk, chunk_offset, bargs)) {
2412                 return 0;
2413         }
2414
2415         /* vrange filter */
2416         if ((bargs->flags & BTRFS_BALANCE_ARGS_VRANGE) &&
2417             chunk_vrange_filter(leaf, chunk, chunk_offset, bargs)) {
2418                 return 0;
2419         }
2420
2421         /* soft profile changing mode */
2422         if ((bargs->flags & BTRFS_BALANCE_ARGS_SOFT) &&
2423             chunk_soft_convert_filter(chunk_type, bargs)) {
2424                 return 0;
2425         }
2426
2427         return 1;
2428 }
2429
2430 static u64 div_factor(u64 num, int factor)
2431 {
2432         if (factor == 10)
2433                 return num;
2434         num *= factor;
2435         do_div(num, 10);
2436         return num;
2437 }
2438
2439 static int __btrfs_balance(struct btrfs_fs_info *fs_info)
2440 {
2441         struct btrfs_balance_control *bctl = fs_info->balance_ctl;
2442         struct btrfs_root *chunk_root = fs_info->chunk_root;
2443         struct btrfs_root *dev_root = fs_info->dev_root;
2444         struct list_head *devices;
2445         struct btrfs_device *device;
2446         u64 old_size;
2447         u64 size_to_free;
2448         struct btrfs_chunk *chunk;
2449         struct btrfs_path *path;
2450         struct btrfs_key key;
2451         struct btrfs_key found_key;
2452         struct btrfs_trans_handle *trans;
2453         struct extent_buffer *leaf;
2454         int slot;
2455         int ret;
2456         int enospc_errors = 0;
2457         bool counting = true;
2458
2459         /* step one make some room on all the devices */
2460         devices = &fs_info->fs_devices->devices;
2461         list_for_each_entry(device, devices, dev_list) {
2462                 old_size = device->total_bytes;
2463                 size_to_free = div_factor(old_size, 1);
2464                 size_to_free = min(size_to_free, (u64)1 * 1024 * 1024);
2465                 if (!device->writeable ||
2466                     device->total_bytes - device->bytes_used > size_to_free)
2467                         continue;
2468
2469                 ret = btrfs_shrink_device(device, old_size - size_to_free);
2470                 if (ret == -ENOSPC)
2471                         break;
2472                 BUG_ON(ret);
2473
2474                 trans = btrfs_start_transaction(dev_root, 0);
2475                 BUG_ON(IS_ERR(trans));
2476
2477                 ret = btrfs_grow_device(trans, device, old_size);
2478                 BUG_ON(ret);
2479
2480                 btrfs_end_transaction(trans, dev_root);
2481         }
2482
2483         /* step two, relocate all the chunks */
2484         path = btrfs_alloc_path();
2485         if (!path) {
2486                 ret = -ENOMEM;
2487                 goto error;
2488         }
2489
2490         /* zero out stat counters */
2491         spin_lock(&fs_info->balance_lock);
2492         memset(&bctl->stat, 0, sizeof(bctl->stat));
2493         spin_unlock(&fs_info->balance_lock);
2494 again:
2495         key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
2496         key.offset = (u64)-1;
2497         key.type = BTRFS_CHUNK_ITEM_KEY;
2498
2499         while (1) {
2500                 if ((!counting && atomic_read(&fs_info->balance_pause_req)) ||
2501                     atomic_read(&fs_info->balance_cancel_req)) {
2502                         ret = -ECANCELED;
2503                         goto error;
2504                 }
2505
2506                 ret = btrfs_search_slot(NULL, chunk_root, &key, path, 0, 0);
2507                 if (ret < 0)
2508                         goto error;
2509
2510                 /*
2511                  * this shouldn't happen, it means the last relocate
2512                  * failed
2513                  */
2514                 if (ret == 0)
2515                         BUG(); /* FIXME break ? */
2516
2517                 ret = btrfs_previous_item(chunk_root, path, 0,
2518                                           BTRFS_CHUNK_ITEM_KEY);
2519                 if (ret) {
2520                         ret = 0;
2521                         break;
2522                 }
2523
2524                 leaf = path->nodes[0];
2525                 slot = path->slots[0];
2526                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
2527
2528                 if (found_key.objectid != key.objectid)
2529                         break;
2530
2531                 /* chunk zero is special */
2532                 if (found_key.offset == 0)
2533                         break;
2534
2535                 chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
2536
2537                 if (!counting) {
2538                         spin_lock(&fs_info->balance_lock);
2539                         bctl->stat.considered++;
2540                         spin_unlock(&fs_info->balance_lock);
2541                 }
2542
2543                 ret = should_balance_chunk(chunk_root, leaf, chunk,
2544                                            found_key.offset);
2545                 btrfs_release_path(path);
2546                 if (!ret)
2547                         goto loop;
2548
2549                 if (counting) {
2550                         spin_lock(&fs_info->balance_lock);
2551                         bctl->stat.expected++;
2552                         spin_unlock(&fs_info->balance_lock);
2553                         goto loop;
2554                 }
2555
2556                 ret = btrfs_relocate_chunk(chunk_root,
2557                                            chunk_root->root_key.objectid,
2558                                            found_key.objectid,
2559                                            found_key.offset);
2560                 if (ret && ret != -ENOSPC)
2561                         goto error;
2562                 if (ret == -ENOSPC) {
2563                         enospc_errors++;
2564                 } else {
2565                         spin_lock(&fs_info->balance_lock);
2566                         bctl->stat.completed++;
2567                         spin_unlock(&fs_info->balance_lock);
2568                 }
2569 loop:
2570                 key.offset = found_key.offset - 1;
2571         }
2572
2573         if (counting) {
2574                 btrfs_release_path(path);
2575                 counting = false;
2576                 goto again;
2577         }
2578 error:
2579         btrfs_free_path(path);
2580         if (enospc_errors) {
2581                 printk(KERN_INFO "btrfs: %d enospc errors during balance\n",
2582                        enospc_errors);
2583                 if (!ret)
2584                         ret = -ENOSPC;
2585         }
2586
2587         return ret;
2588 }
2589
2590 static inline int balance_need_close(struct btrfs_fs_info *fs_info)
2591 {
2592         /* cancel requested || normal exit path */
2593         return atomic_read(&fs_info->balance_cancel_req) ||
2594                 (atomic_read(&fs_info->balance_pause_req) == 0 &&
2595                  atomic_read(&fs_info->balance_cancel_req) == 0);
2596 }
2597
2598 static void __cancel_balance(struct btrfs_fs_info *fs_info)
2599 {
2600         int ret;
2601
2602         unset_balance_control(fs_info);
2603         ret = del_balance_item(fs_info->tree_root);
2604         BUG_ON(ret);
2605 }
2606
2607 void update_ioctl_balance_args(struct btrfs_fs_info *fs_info, int lock,
2608                                struct btrfs_ioctl_balance_args *bargs);
2609
2610 /*
2611  * Should be called with both balance and volume mutexes held
2612  */
2613 int btrfs_balance(struct btrfs_balance_control *bctl,
2614                   struct btrfs_ioctl_balance_args *bargs)
2615 {
2616         struct btrfs_fs_info *fs_info = bctl->fs_info;
2617         u64 allowed;
2618         int ret;
2619
2620         if (btrfs_fs_closing(fs_info) ||
2621             atomic_read(&fs_info->balance_pause_req) ||
2622             atomic_read(&fs_info->balance_cancel_req)) {
2623                 ret = -EINVAL;
2624                 goto out;
2625         }
2626
2627         /*
2628          * In case of mixed groups both data and meta should be picked,
2629          * and identical options should be given for both of them.
2630          */
2631         allowed = btrfs_super_incompat_flags(fs_info->super_copy);
2632         if ((allowed & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS) &&
2633             (bctl->flags & (BTRFS_BALANCE_DATA | BTRFS_BALANCE_METADATA))) {
2634                 if (!(bctl->flags & BTRFS_BALANCE_DATA) ||
2635                     !(bctl->flags & BTRFS_BALANCE_METADATA) ||
2636                     memcmp(&bctl->data, &bctl->meta, sizeof(bctl->data))) {
2637                         printk(KERN_ERR "btrfs: with mixed groups data and "
2638                                "metadata balance options must be the same\n");
2639                         ret = -EINVAL;
2640                         goto out;
2641                 }
2642         }
2643
2644         /*
2645          * Profile changing sanity checks.  Skip them if a simple
2646          * balance is requested.
2647          */
2648         if (!((bctl->data.flags | bctl->sys.flags | bctl->meta.flags) &
2649               BTRFS_BALANCE_ARGS_CONVERT))
2650                 goto do_balance;
2651
2652         allowed = BTRFS_AVAIL_ALLOC_BIT_SINGLE;
2653         if (fs_info->fs_devices->num_devices == 1)
2654                 allowed |= BTRFS_BLOCK_GROUP_DUP;
2655         else if (fs_info->fs_devices->num_devices < 4)
2656                 allowed |= (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1);
2657         else
2658                 allowed |= (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1 |
2659                                 BTRFS_BLOCK_GROUP_RAID10);
2660
2661         if (!profile_is_valid(bctl->data.target, 1) ||
2662             bctl->data.target & ~allowed) {
2663                 printk(KERN_ERR "btrfs: unable to start balance with target "
2664                        "data profile %llu\n",
2665                        (unsigned long long)bctl->data.target);
2666                 ret = -EINVAL;
2667                 goto out;
2668         }
2669         if (!profile_is_valid(bctl->meta.target, 1) ||
2670             bctl->meta.target & ~allowed) {
2671                 printk(KERN_ERR "btrfs: unable to start balance with target "
2672                        "metadata profile %llu\n",
2673                        (unsigned long long)bctl->meta.target);
2674                 ret = -EINVAL;
2675                 goto out;
2676         }
2677         if (!profile_is_valid(bctl->sys.target, 1) ||
2678             bctl->sys.target & ~allowed) {
2679                 printk(KERN_ERR "btrfs: unable to start balance with target "
2680                        "system profile %llu\n",
2681                        (unsigned long long)bctl->sys.target);
2682                 ret = -EINVAL;
2683                 goto out;
2684         }
2685
2686         if (bctl->data.target & BTRFS_BLOCK_GROUP_DUP) {
2687                 printk(KERN_ERR "btrfs: dup for data is not allowed\n");
2688                 ret = -EINVAL;
2689                 goto out;
2690         }
2691
2692         /* allow to reduce meta or sys integrity only if force set */
2693         allowed = BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1 |
2694                         BTRFS_BLOCK_GROUP_RAID10;
2695         if (((bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
2696              (fs_info->avail_system_alloc_bits & allowed) &&
2697              !(bctl->sys.target & allowed)) ||
2698             ((bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
2699              (fs_info->avail_metadata_alloc_bits & allowed) &&
2700              !(bctl->meta.target & allowed))) {
2701                 if (bctl->flags & BTRFS_BALANCE_FORCE) {
2702                         printk(KERN_INFO "btrfs: force reducing metadata "
2703                                "integrity\n");
2704                 } else {
2705                         printk(KERN_ERR "btrfs: balance will reduce metadata "
2706                                "integrity, use force if you want this\n");
2707                         ret = -EINVAL;
2708                         goto out;
2709                 }
2710         }
2711
2712 do_balance:
2713         ret = insert_balance_item(fs_info->tree_root, bctl);
2714         if (ret && ret != -EEXIST)
2715                 goto out;
2716
2717         if (!(bctl->flags & BTRFS_BALANCE_RESUME)) {
2718                 BUG_ON(ret == -EEXIST);
2719                 set_balance_control(bctl);
2720         } else {
2721                 BUG_ON(ret != -EEXIST);
2722                 spin_lock(&fs_info->balance_lock);
2723                 update_balance_args(bctl);
2724                 spin_unlock(&fs_info->balance_lock);
2725         }
2726
2727         atomic_inc(&fs_info->balance_running);
2728         mutex_unlock(&fs_info->balance_mutex);
2729
2730         ret = __btrfs_balance(fs_info);
2731
2732         mutex_lock(&fs_info->balance_mutex);
2733         atomic_dec(&fs_info->balance_running);
2734
2735         if (bargs) {
2736                 memset(bargs, 0, sizeof(*bargs));
2737                 update_ioctl_balance_args(fs_info, 0, bargs);
2738         }
2739
2740         if ((ret && ret != -ECANCELED && ret != -ENOSPC) ||
2741             balance_need_close(fs_info)) {
2742                 __cancel_balance(fs_info);
2743         }
2744
2745         wake_up(&fs_info->balance_wait_q);
2746
2747         return ret;
2748 out:
2749         if (bctl->flags & BTRFS_BALANCE_RESUME)
2750                 __cancel_balance(fs_info);
2751         else
2752                 kfree(bctl);
2753         return ret;
2754 }
2755
2756 static int balance_kthread(void *data)
2757 {
2758         struct btrfs_balance_control *bctl =
2759                         (struct btrfs_balance_control *)data;
2760         struct btrfs_fs_info *fs_info = bctl->fs_info;
2761         int ret = 0;
2762
2763         mutex_lock(&fs_info->volume_mutex);
2764         mutex_lock(&fs_info->balance_mutex);
2765
2766         set_balance_control(bctl);
2767
2768         if (btrfs_test_opt(fs_info->tree_root, SKIP_BALANCE)) {
2769                 printk(KERN_INFO "btrfs: force skipping balance\n");
2770         } else {
2771                 printk(KERN_INFO "btrfs: continuing balance\n");
2772                 ret = btrfs_balance(bctl, NULL);
2773         }
2774
2775         mutex_unlock(&fs_info->balance_mutex);
2776         mutex_unlock(&fs_info->volume_mutex);
2777         return ret;
2778 }
2779
2780 int btrfs_recover_balance(struct btrfs_root *tree_root)
2781 {
2782         struct task_struct *tsk;
2783         struct btrfs_balance_control *bctl;
2784         struct btrfs_balance_item *item;
2785         struct btrfs_disk_balance_args disk_bargs;
2786         struct btrfs_path *path;
2787         struct extent_buffer *leaf;
2788         struct btrfs_key key;
2789         int ret;
2790
2791         path = btrfs_alloc_path();
2792         if (!path)
2793                 return -ENOMEM;
2794
2795         bctl = kzalloc(sizeof(*bctl), GFP_NOFS);
2796         if (!bctl) {
2797                 ret = -ENOMEM;
2798                 goto out;
2799         }
2800
2801         key.objectid = BTRFS_BALANCE_OBJECTID;
2802         key.type = BTRFS_BALANCE_ITEM_KEY;
2803         key.offset = 0;
2804
2805         ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
2806         if (ret < 0)
2807                 goto out_bctl;
2808         if (ret > 0) { /* ret = -ENOENT; */
2809                 ret = 0;
2810                 goto out_bctl;
2811         }
2812
2813         leaf = path->nodes[0];
2814         item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_balance_item);
2815
2816         bctl->fs_info = tree_root->fs_info;
2817         bctl->flags = btrfs_balance_flags(leaf, item) | BTRFS_BALANCE_RESUME;
2818
2819         btrfs_balance_data(leaf, item, &disk_bargs);
2820         btrfs_disk_balance_args_to_cpu(&bctl->data, &disk_bargs);
2821         btrfs_balance_meta(leaf, item, &disk_bargs);
2822         btrfs_disk_balance_args_to_cpu(&bctl->meta, &disk_bargs);
2823         btrfs_balance_sys(leaf, item, &disk_bargs);
2824         btrfs_disk_balance_args_to_cpu(&bctl->sys, &disk_bargs);
2825
2826         tsk = kthread_run(balance_kthread, bctl, "btrfs-balance");
2827         if (IS_ERR(tsk))
2828                 ret = PTR_ERR(tsk);
2829         else
2830                 goto out;
2831
2832 out_bctl:
2833         kfree(bctl);
2834 out:
2835         btrfs_free_path(path);
2836         return ret;
2837 }
2838
2839 int btrfs_pause_balance(struct btrfs_fs_info *fs_info)
2840 {
2841         int ret = 0;
2842
2843         mutex_lock(&fs_info->balance_mutex);
2844         if (!fs_info->balance_ctl) {
2845                 mutex_unlock(&fs_info->balance_mutex);
2846                 return -ENOTCONN;
2847         }
2848
2849         if (atomic_read(&fs_info->balance_running)) {
2850                 atomic_inc(&fs_info->balance_pause_req);
2851                 mutex_unlock(&fs_info->balance_mutex);
2852
2853                 wait_event(fs_info->balance_wait_q,
2854                            atomic_read(&fs_info->balance_running) == 0);
2855
2856                 mutex_lock(&fs_info->balance_mutex);
2857                 /* we are good with balance_ctl ripped off from under us */
2858                 BUG_ON(atomic_read(&fs_info->balance_running));
2859                 atomic_dec(&fs_info->balance_pause_req);
2860         } else {
2861                 ret = -ENOTCONN;
2862         }
2863
2864         mutex_unlock(&fs_info->balance_mutex);
2865         return ret;
2866 }
2867
2868 int btrfs_cancel_balance(struct btrfs_fs_info *fs_info)
2869 {
2870         mutex_lock(&fs_info->balance_mutex);
2871         if (!fs_info->balance_ctl) {
2872                 mutex_unlock(&fs_info->balance_mutex);
2873                 return -ENOTCONN;
2874         }
2875
2876         atomic_inc(&fs_info->balance_cancel_req);
2877         /*
2878          * if we are running just wait and return, balance item is
2879          * deleted in btrfs_balance in this case
2880          */
2881         if (atomic_read(&fs_info->balance_running)) {
2882                 mutex_unlock(&fs_info->balance_mutex);
2883                 wait_event(fs_info->balance_wait_q,
2884                            atomic_read(&fs_info->balance_running) == 0);
2885                 mutex_lock(&fs_info->balance_mutex);
2886         } else {
2887                 /* __cancel_balance needs volume_mutex */
2888                 mutex_unlock(&fs_info->balance_mutex);
2889                 mutex_lock(&fs_info->volume_mutex);
2890                 mutex_lock(&fs_info->balance_mutex);
2891
2892                 if (fs_info->balance_ctl)
2893                         __cancel_balance(fs_info);
2894
2895                 mutex_unlock(&fs_info->volume_mutex);
2896         }
2897
2898         BUG_ON(fs_info->balance_ctl || atomic_read(&fs_info->balance_running));
2899         atomic_dec(&fs_info->balance_cancel_req);
2900         mutex_unlock(&fs_info->balance_mutex);
2901         return 0;
2902 }
2903
2904 /*
2905  * shrinking a device means finding all of the device extents past
2906  * the new size, and then following the back refs to the chunks.
2907  * The chunk relocation code actually frees the device extent
2908  */
2909 int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
2910 {
2911         struct btrfs_trans_handle *trans;
2912         struct btrfs_root *root = device->dev_root;
2913         struct btrfs_dev_extent *dev_extent = NULL;
2914         struct btrfs_path *path;
2915         u64 length;
2916         u64 chunk_tree;
2917         u64 chunk_objectid;
2918         u64 chunk_offset;
2919         int ret;
2920         int slot;
2921         int failed = 0;
2922         bool retried = false;
2923         struct extent_buffer *l;
2924         struct btrfs_key key;
2925         struct btrfs_super_block *super_copy = root->fs_info->super_copy;
2926         u64 old_total = btrfs_super_total_bytes(super_copy);
2927         u64 old_size = device->total_bytes;
2928         u64 diff = device->total_bytes - new_size;
2929
2930         if (new_size >= device->total_bytes)
2931                 return -EINVAL;
2932
2933         path = btrfs_alloc_path();
2934         if (!path)
2935                 return -ENOMEM;
2936
2937         path->reada = 2;
2938
2939         lock_chunks(root);
2940
2941         device->total_bytes = new_size;
2942         if (device->writeable) {
2943                 device->fs_devices->total_rw_bytes -= diff;
2944                 spin_lock(&root->fs_info->free_chunk_lock);
2945                 root->fs_info->free_chunk_space -= diff;
2946                 spin_unlock(&root->fs_info->free_chunk_lock);
2947         }
2948         unlock_chunks(root);
2949
2950 again:
2951         key.objectid = device->devid;
2952         key.offset = (u64)-1;
2953         key.type = BTRFS_DEV_EXTENT_KEY;
2954
2955         while (1) {
2956                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2957                 if (ret < 0)
2958                         goto done;
2959
2960                 ret = btrfs_previous_item(root, path, 0, key.type);
2961                 if (ret < 0)
2962                         goto done;
2963                 if (ret) {
2964                         ret = 0;
2965                         btrfs_release_path(path);
2966                         break;
2967                 }
2968
2969                 l = path->nodes[0];
2970                 slot = path->slots[0];
2971                 btrfs_item_key_to_cpu(l, &key, path->slots[0]);
2972
2973                 if (key.objectid != device->devid) {
2974                         btrfs_release_path(path);
2975                         break;
2976                 }
2977
2978                 dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
2979                 length = btrfs_dev_extent_length(l, dev_extent);
2980
2981                 if (key.offset + length <= new_size) {
2982                         btrfs_release_path(path);
2983                         break;
2984                 }
2985
2986                 chunk_tree = btrfs_dev_extent_chunk_tree(l, dev_extent);
2987                 chunk_objectid = btrfs_dev_extent_chunk_objectid(l, dev_extent);
2988                 chunk_offset = btrfs_dev_extent_chunk_offset(l, dev_extent);
2989                 btrfs_release_path(path);
2990
2991                 ret = btrfs_relocate_chunk(root, chunk_tree, chunk_objectid,
2992                                            chunk_offset);
2993                 if (ret && ret != -ENOSPC)
2994                         goto done;
2995                 if (ret == -ENOSPC)
2996                         failed++;
2997                 key.offset -= 1;
2998         }
2999
3000         if (failed && !retried) {
3001                 failed = 0;
3002                 retried = true;
3003                 goto again;
3004         } else if (failed && retried) {
3005                 ret = -ENOSPC;
3006                 lock_chunks(root);
3007
3008                 device->total_bytes = old_size;
3009                 if (device->writeable)
3010                         device->fs_devices->total_rw_bytes += diff;
3011                 spin_lock(&root->fs_info->free_chunk_lock);
3012                 root->fs_info->free_chunk_space += diff;
3013                 spin_unlock(&root->fs_info->free_chunk_lock);
3014                 unlock_chunks(root);
3015                 goto done;
3016         }
3017
3018         /* Shrinking succeeded, else we would be at "done". */
3019         trans = btrfs_start_transaction(root, 0);
3020         if (IS_ERR(trans)) {
3021                 ret = PTR_ERR(trans);
3022                 goto done;
3023         }
3024
3025         lock_chunks(root);
3026
3027         device->disk_total_bytes = new_size;
3028         /* Now btrfs_update_device() will change the on-disk size. */
3029         ret = btrfs_update_device(trans, device);
3030         if (ret) {
3031                 unlock_chunks(root);
3032                 btrfs_end_transaction(trans, root);
3033                 goto done;
3034         }
3035         WARN_ON(diff > old_total);
3036         btrfs_set_super_total_bytes(super_copy, old_total - diff);
3037         unlock_chunks(root);
3038         btrfs_end_transaction(trans, root);
3039 done:
3040         btrfs_free_path(path);
3041         return ret;
3042 }
3043
3044 static int btrfs_add_system_chunk(struct btrfs_root *root,
3045                            struct btrfs_key *key,
3046                            struct btrfs_chunk *chunk, int item_size)
3047 {
3048         struct btrfs_super_block *super_copy = root->fs_info->super_copy;
3049         struct btrfs_disk_key disk_key;
3050         u32 array_size;
3051         u8 *ptr;
3052
3053         array_size = btrfs_super_sys_array_size(super_copy);
3054         if (array_size + item_size > BTRFS_SYSTEM_CHUNK_ARRAY_SIZE)
3055                 return -EFBIG;
3056
3057         ptr = super_copy->sys_chunk_array + array_size;
3058         btrfs_cpu_key_to_disk(&disk_key, key);
3059         memcpy(ptr, &disk_key, sizeof(disk_key));
3060         ptr += sizeof(disk_key);
3061         memcpy(ptr, chunk, item_size);
3062         item_size += sizeof(disk_key);
3063         btrfs_set_super_sys_array_size(super_copy, array_size + item_size);
3064         return 0;
3065 }
3066
3067 /*
3068  * sort the devices in descending order by max_avail, total_avail
3069  */
3070 static int btrfs_cmp_device_info(const void *a, const void *b)
3071 {
3072         const struct btrfs_device_info *di_a = a;
3073         const struct btrfs_device_info *di_b = b;
3074
3075         if (di_a->max_avail > di_b->max_avail)
3076                 return -1;
3077         if (di_a->max_avail < di_b->max_avail)
3078                 return 1;
3079         if (di_a->total_avail > di_b->total_avail)
3080                 return -1;
3081         if (di_a->total_avail < di_b->total_avail)
3082                 return 1;
3083         return 0;
3084 }
3085
3086 static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
3087                                struct btrfs_root *extent_root,
3088                                struct map_lookup **map_ret,
3089                                u64 *num_bytes_out, u64 *stripe_size_out,
3090                                u64 start, u64 type)
3091 {
3092         struct btrfs_fs_info *info = extent_root->fs_info;
3093         struct btrfs_fs_devices *fs_devices = info->fs_devices;
3094         struct list_head *cur;
3095         struct map_lookup *map = NULL;
3096         struct extent_map_tree *em_tree;
3097         struct extent_map *em;
3098         struct btrfs_device_info *devices_info = NULL;
3099         u64 total_avail;
3100         int num_stripes;        /* total number of stripes to allocate */
3101         int sub_stripes;        /* sub_stripes info for map */
3102         int dev_stripes;        /* stripes per dev */
3103         int devs_max;           /* max devs to use */
3104         int devs_min;           /* min devs needed */
3105         int devs_increment;     /* ndevs has to be a multiple of this */
3106         int ncopies;            /* how many copies to data has */
3107         int ret;
3108         u64 max_stripe_size;
3109         u64 max_chunk_size;
3110         u64 stripe_size;
3111         u64 num_bytes;
3112         int ndevs;
3113         int i;
3114         int j;
3115
3116         if ((type & BTRFS_BLOCK_GROUP_RAID1) &&
3117             (type & BTRFS_BLOCK_GROUP_DUP)) {
3118                 WARN_ON(1);
3119                 type &= ~BTRFS_BLOCK_GROUP_DUP;
3120         }
3121
3122         if (list_empty(&fs_devices->alloc_list))
3123                 return -ENOSPC;
3124
3125         sub_stripes = 1;
3126         dev_stripes = 1;
3127         devs_increment = 1;
3128         ncopies = 1;
3129         devs_max = 0;   /* 0 == as many as possible */
3130         devs_min = 1;
3131
3132         /*
3133          * define the properties of each RAID type.
3134          * FIXME: move this to a global table and use it in all RAID
3135          * calculation code
3136          */
3137         if (type & (BTRFS_BLOCK_GROUP_DUP)) {
3138                 dev_stripes = 2;
3139                 ncopies = 2;
3140                 devs_max = 1;
3141         } else if (type & (BTRFS_BLOCK_GROUP_RAID0)) {
3142                 devs_min = 2;
3143         } else if (type & (BTRFS_BLOCK_GROUP_RAID1)) {
3144                 devs_increment = 2;
3145                 ncopies = 2;
3146                 devs_max = 2;
3147                 devs_min = 2;
3148         } else if (type & (BTRFS_BLOCK_GROUP_RAID10)) {
3149                 sub_stripes = 2;
3150                 devs_increment = 2;
3151                 ncopies = 2;
3152                 devs_min = 4;
3153         } else {
3154                 devs_max = 1;
3155         }
3156
3157         if (type & BTRFS_BLOCK_GROUP_DATA) {
3158                 max_stripe_size = 1024 * 1024 * 1024;
3159                 max_chunk_size = 10 * max_stripe_size;
3160         } else if (type & BTRFS_BLOCK_GROUP_METADATA) {
3161                 /* for larger filesystems, use larger metadata chunks */
3162                 if (fs_devices->total_rw_bytes > 50ULL * 1024 * 1024 * 1024)
3163                         max_stripe_size = 1024 * 1024 * 1024;
3164                 else
3165                         max_stripe_size = 256 * 1024 * 1024;
3166                 max_chunk_size = max_stripe_size;
3167         } else if (type & BTRFS_BLOCK_GROUP_SYSTEM) {
3168                 max_stripe_size = 32 * 1024 * 1024;
3169                 max_chunk_size = 2 * max_stripe_size;
3170         } else {
3171                 printk(KERN_ERR "btrfs: invalid chunk type 0x%llx requested\n",
3172                        type);
3173                 BUG_ON(1);
3174         }
3175
3176         /* we don't want a chunk larger than 10% of writeable space */
3177         max_chunk_size = min(div_factor(fs_devices->total_rw_bytes, 1),
3178                              max_chunk_size);
3179
3180         devices_info = kzalloc(sizeof(*devices_info) * fs_devices->rw_devices,
3181                                GFP_NOFS);
3182         if (!devices_info)
3183                 return -ENOMEM;
3184
3185         cur = fs_devices->alloc_list.next;
3186
3187         /*
3188          * in the first pass through the devices list, we gather information
3189          * about the available holes on each device.
3190          */
3191         ndevs = 0;
3192         while (cur != &fs_devices->alloc_list) {
3193                 struct btrfs_device *device;
3194                 u64 max_avail;
3195                 u64 dev_offset;
3196
3197                 device = list_entry(cur, struct btrfs_device, dev_alloc_list);
3198
3199                 cur = cur->next;
3200
3201                 if (!device->writeable) {
3202                         printk(KERN_ERR
3203                                "btrfs: read-only device in alloc_list\n");
3204                         WARN_ON(1);
3205                         continue;
3206                 }
3207
3208                 if (!device->in_fs_metadata)
3209                         continue;
3210
3211                 if (device->total_bytes > device->bytes_used)
3212                         total_avail = device->total_bytes - device->bytes_used;
3213                 else
3214                         total_avail = 0;
3215
3216                 /* If there is no space on this device, skip it. */
3217                 if (total_avail == 0)
3218                         continue;
3219
3220                 ret = find_free_dev_extent(device,
3221                                            max_stripe_size * dev_stripes,
3222                                            &dev_offset, &max_avail);
3223                 if (ret && ret != -ENOSPC)
3224                         goto error;
3225
3226                 if (ret == 0)
3227                         max_avail = max_stripe_size * dev_stripes;
3228
3229                 if (max_avail < BTRFS_STRIPE_LEN * dev_stripes)
3230                         continue;
3231
3232                 devices_info[ndevs].dev_offset = dev_offset;
3233                 devices_info[ndevs].max_avail = max_avail;
3234                 devices_info[ndevs].total_avail = total_avail;
3235                 devices_info[ndevs].dev = device;
3236                 ++ndevs;
3237         }
3238
3239         /*
3240          * now sort the devices by hole size / available space
3241          */
3242         sort(devices_info, ndevs, sizeof(struct btrfs_device_info),
3243              btrfs_cmp_device_info, NULL);
3244
3245         /* round down to number of usable stripes */
3246         ndevs -= ndevs % devs_increment;
3247
3248         if (ndevs < devs_increment * sub_stripes || ndevs < devs_min) {
3249                 ret = -ENOSPC;
3250                 goto error;
3251         }
3252
3253         if (devs_max && ndevs > devs_max)
3254                 ndevs = devs_max;
3255         /*
3256          * the primary goal is to maximize the number of stripes, so use as many
3257          * devices as possible, even if the stripes are not maximum sized.
3258          */
3259         stripe_size = devices_info[ndevs-1].max_avail;
3260         num_stripes = ndevs * dev_stripes;
3261
3262         if (stripe_size * num_stripes > max_chunk_size * ncopies) {
3263                 stripe_size = max_chunk_size * ncopies;
3264                 do_div(stripe_size, num_stripes);
3265         }
3266
3267         do_div(stripe_size, dev_stripes);
3268         do_div(stripe_size, BTRFS_STRIPE_LEN);
3269         stripe_size *= BTRFS_STRIPE_LEN;
3270
3271         map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS);
3272         if (!map) {
3273                 ret = -ENOMEM;
3274                 goto error;
3275         }
3276         map->num_stripes = num_stripes;
3277
3278         for (i = 0; i < ndevs; ++i) {
3279                 for (j = 0; j < dev_stripes; ++j) {
3280                         int s = i * dev_stripes + j;
3281                         map->stripes[s].dev = devices_info[i].dev;
3282                         map->stripes[s].physical = devices_info[i].dev_offset +
3283                                                    j * stripe_size;
3284                 }
3285         }
3286         map->sector_size = extent_root->sectorsize;
3287         map->stripe_len = BTRFS_STRIPE_LEN;
3288         map->io_align = BTRFS_STRIPE_LEN;
3289         map->io_width = BTRFS_STRIPE_LEN;
3290         map->type = type;
3291         map->sub_stripes = sub_stripes;
3292
3293         *map_ret = map;
3294         num_bytes = stripe_size * (num_stripes / ncopies);
3295
3296         *stripe_size_out = stripe_size;
3297         *num_bytes_out = num_bytes;
3298
3299         trace_btrfs_chunk_alloc(info->chunk_root, map, start, num_bytes);
3300
3301         em = alloc_extent_map();
3302         if (!em) {
3303                 ret = -ENOMEM;
3304                 goto error;
3305         }
3306         em->bdev = (struct block_device *)map;
3307         em->start = start;
3308         em->len = num_bytes;
3309         em->block_start = 0;
3310         em->block_len = em->len;
3311
3312         em_tree = &extent_root->fs_info->mapping_tree.map_tree;
3313         write_lock(&em_tree->lock);
3314         ret = add_extent_mapping(em_tree, em);
3315         write_unlock(&em_tree->lock);
3316         BUG_ON(ret);
3317         free_extent_map(em);
3318
3319         ret = btrfs_make_block_group(trans, extent_root, 0, type,
3320                                      BTRFS_FIRST_CHUNK_TREE_OBJECTID,
3321                                      start, num_bytes);
3322         BUG_ON(ret);
3323
3324         for (i = 0; i < map->num_stripes; ++i) {
3325                 struct btrfs_device *device;
3326                 u64 dev_offset;
3327
3328                 device = map->stripes[i].dev;
3329                 dev_offset = map->stripes[i].physical;
3330
3331                 ret = btrfs_alloc_dev_extent(trans, device,
3332                                 info->chunk_root->root_key.objectid,
3333                                 BTRFS_FIRST_CHUNK_TREE_OBJECTID,
3334                                 start, dev_offset, stripe_size);
3335                 BUG_ON(ret);
3336         }
3337
3338         kfree(devices_info);
3339         return 0;
3340
3341 error:
3342         kfree(map);
3343         kfree(devices_info);
3344         return ret;
3345 }
3346
3347 static int __finish_chunk_alloc(struct btrfs_trans_handle *trans,
3348                                 struct btrfs_root *extent_root,
3349                                 struct map_lookup *map, u64 chunk_offset,
3350                                 u64 chunk_size, u64 stripe_size)
3351 {
3352         u64 dev_offset;
3353         struct btrfs_key key;
3354         struct btrfs_root *chunk_root = extent_root->fs_info->chunk_root;
3355         struct btrfs_device *device;
3356         struct btrfs_chunk *chunk;
3357         struct btrfs_stripe *stripe;
3358         size_t item_size = btrfs_chunk_item_size(map->num_stripes);
3359         int index = 0;
3360         int ret;
3361
3362         chunk = kzalloc(item_size, GFP_NOFS);
3363         if (!chunk)
3364                 return -ENOMEM;
3365
3366         index = 0;
3367         while (index < map->num_stripes) {
3368                 device = map->stripes[index].dev;
3369                 device->bytes_used += stripe_size;
3370                 ret = btrfs_update_device(trans, device);
3371                 BUG_ON(ret);
3372                 index++;
3373         }
3374
3375         spin_lock(&extent_root->fs_info->free_chunk_lock);
3376         extent_root->fs_info->free_chunk_space -= (stripe_size *
3377                                                    map->num_stripes);
3378         spin_unlock(&extent_root->fs_info->free_chunk_lock);
3379
3380