]> git.openfabrics.org - ~shefty/rdma-dev.git/blobdiff - fs/btrfs/extent-tree.c
Merge git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable
[~shefty/rdma-dev.git] / fs / btrfs / extent-tree.c
index bcd59c7dfb574def82517a80affe6a9ddf756a92..227e5815d8382393d4adc4d0a340ee636bca9d3e 100644 (file)
@@ -429,6 +429,7 @@ err:
 
 static int cache_block_group(struct btrfs_block_group_cache *cache,
                             struct btrfs_trans_handle *trans,
+                            struct btrfs_root *root,
                             int load_cache_only)
 {
        struct btrfs_fs_info *fs_info = cache->fs_info;
@@ -442,9 +443,12 @@ static int cache_block_group(struct btrfs_block_group_cache *cache,
 
        /*
         * We can't do the read from on-disk cache during a commit since we need
-        * to have the normal tree locking.
+        * to have the normal tree locking.  Also if we are currently trying to
+        * allocate blocks for the tree root we can't do the fast caching since
+        * we likely hold important locks.
         */
-       if (!trans->transaction->in_commit) {
+       if (!trans->transaction->in_commit &&
+           (root && root != root->fs_info->tree_root)) {
                spin_lock(&cache->lock);
                if (cache->cached != BTRFS_CACHE_NO) {
                        spin_unlock(&cache->lock);
@@ -2741,6 +2745,7 @@ static int cache_save_setup(struct btrfs_block_group_cache *block_group,
        struct btrfs_root *root = block_group->fs_info->tree_root;
        struct inode *inode = NULL;
        u64 alloc_hint = 0;
+       int dcs = BTRFS_DC_ERROR;
        int num_pages = 0;
        int retries = 0;
        int ret = 0;
@@ -2795,6 +2800,8 @@ again:
 
        spin_lock(&block_group->lock);
        if (block_group->cached != BTRFS_CACHE_FINISHED) {
+               /* We're not cached, don't bother trying to write stuff out */
+               dcs = BTRFS_DC_WRITTEN;
                spin_unlock(&block_group->lock);
                goto out_put;
        }
@@ -2821,6 +2828,8 @@ again:
        ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, num_pages,
                                              num_pages, num_pages,
                                              &alloc_hint);
+       if (!ret)
+               dcs = BTRFS_DC_SETUP;
        btrfs_free_reserved_data_space(inode, num_pages);
 out_put:
        iput(inode);
@@ -2828,10 +2837,7 @@ out_free:
        btrfs_release_path(root, path);
 out:
        spin_lock(&block_group->lock);
-       if (ret)
-               block_group->disk_cache_state = BTRFS_DC_ERROR;
-       else
-               block_group->disk_cache_state = BTRFS_DC_SETUP;
+       block_group->disk_cache_state = dcs;
        spin_unlock(&block_group->lock);
 
        return ret;
@@ -3037,7 +3043,13 @@ static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
 
 u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags)
 {
-       u64 num_devices = root->fs_info->fs_devices->rw_devices;
+       /*
+        * we add in the count of missing devices because we want
+        * to make sure that any RAID levels on a degraded FS
+        * continue to be honored.
+        */
+       u64 num_devices = root->fs_info->fs_devices->rw_devices +
+               root->fs_info->fs_devices->missing_devices;
 
        if (num_devices == 1)
                flags &= ~(BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID0);
@@ -4080,7 +4092,7 @@ static int update_block_group(struct btrfs_trans_handle *trans,
                 * space back to the block group, otherwise we will leak space.
                 */
                if (!alloc && cache->cached == BTRFS_CACHE_NO)
-                       cache_block_group(cache, trans, 1);
+                       cache_block_group(cache, trans, NULL, 1);
 
                byte_in_group = bytenr - cache->key.objectid;
                WARN_ON(byte_in_group > cache->key.offset);
@@ -4930,11 +4942,31 @@ search:
                btrfs_get_block_group(block_group);
                search_start = block_group->key.objectid;
 
+               /*
+                * this can happen if we end up cycling through all the
+                * raid types, but we want to make sure we only allocate
+                * for the proper type.
+                */
+               if (!block_group_bits(block_group, data)) {
+                   u64 extra = BTRFS_BLOCK_GROUP_DUP |
+                               BTRFS_BLOCK_GROUP_RAID1 |
+                               BTRFS_BLOCK_GROUP_RAID10;
+
+                       /*
+                        * if they asked for extra copies and this block group
+                        * doesn't provide them, bail.  This does allow us to
+                        * fill raid0 from raid1.
+                        */
+                       if ((data & extra) && !(block_group->flags & extra))
+                               goto loop;
+               }
+
 have_block_group:
                if (unlikely(block_group->cached == BTRFS_CACHE_NO)) {
                        u64 free_percent;
 
-                       ret = cache_block_group(block_group, trans, 1);
+                       ret = cache_block_group(block_group, trans,
+                                               orig_root, 1);
                        if (block_group->cached == BTRFS_CACHE_FINISHED)
                                goto have_block_group;
 
@@ -4958,7 +4990,8 @@ have_block_group:
                        if (loop > LOOP_CACHING_NOWAIT ||
                            (loop > LOOP_FIND_IDEAL &&
                             atomic_read(&space_info->caching_threads) < 2)) {
-                               ret = cache_block_group(block_group, trans, 0);
+                               ret = cache_block_group(block_group, trans,
+                                                       orig_root, 0);
                                BUG_ON(ret);
                        }
                        found_uncached_bg = true;
@@ -5515,7 +5548,7 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
        u64 num_bytes = ins->offset;
 
        block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid);
-       cache_block_group(block_group, trans, 0);
+       cache_block_group(block_group, trans, NULL, 0);
        caching_ctl = get_caching_control(block_group);
 
        if (!caching_ctl) {
@@ -6300,9 +6333,13 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
                                           NULL, NULL);
                BUG_ON(ret < 0);
                if (ret > 0) {
-                       ret = btrfs_del_orphan_item(trans, tree_root,
-                                                   root->root_key.objectid);
-                       BUG_ON(ret);
+                       /* if we fail to delete the orphan item this time
+                        * around, it'll get picked up the next time.
+                        *
+                        * The most common failure here is just -ENOENT.
+                        */
+                       btrfs_del_orphan_item(trans, tree_root,
+                                             root->root_key.objectid);
                }
        }
 
@@ -7878,7 +7915,14 @@ static u64 update_block_group_flags(struct btrfs_root *root, u64 flags)
        u64 stripped = BTRFS_BLOCK_GROUP_RAID0 |
                BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10;
 
-       num_devices = root->fs_info->fs_devices->rw_devices;
+       /*
+        * we add in the count of missing devices because we want
+        * to make sure that any RAID levels on a degraded FS
+        * continue to be honored.
+        */
+       num_devices = root->fs_info->fs_devices->rw_devices +
+               root->fs_info->fs_devices->missing_devices;
+
        if (num_devices == 1) {
                stripped |= BTRFS_BLOCK_GROUP_DUP;
                stripped = flags & ~stripped;
@@ -8247,7 +8291,6 @@ int btrfs_read_block_groups(struct btrfs_root *root)
                        break;
                if (ret != 0)
                        goto error;
-
                leaf = path->nodes[0];
                btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
                cache = kzalloc(sizeof(*cache), GFP_NOFS);