]> git.openfabrics.org - ~shefty/rdma-dev.git/blobdiff - fs/btrfs/extent-tree.c
Merge branch 'for-chris' of git://repo.or.cz/linux-btrfs-devel into integration
[~shefty/rdma-dev.git] / fs / btrfs / extent-tree.c
index bf30f670cda96e5e6c081ec69d54a12d7f95e07d..1c1cf216be8059e37a3c63b767dd40d036ce1f94 100644 (file)
@@ -618,8 +618,7 @@ static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
        struct list_head *head = &info->space_info;
        struct btrfs_space_info *found;
 
-       flags &= BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_SYSTEM |
-                BTRFS_BLOCK_GROUP_METADATA;
+       flags &= BTRFS_BLOCK_GROUP_TYPE_MASK;
 
        rcu_read_lock();
        list_for_each_entry_rcu(found, head, list) {
@@ -2267,9 +2266,7 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
                                BUG_ON(ret);
                                kfree(extent_op);
 
-                               cond_resched();
-                               spin_lock(&delayed_refs->lock);
-                               continue;
+                               goto next;
                        }
 
                        list_del_init(&locked_ref->cluster);
@@ -2289,7 +2286,11 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
                btrfs_put_delayed_ref(ref);
                kfree(extent_op);
                count++;
-
+next:
+               do_chunk_alloc(trans, root->fs_info->extent_root,
+                              2 * 1024 * 1024,
+                              btrfs_get_alloc_profile(root, 0),
+                              CHUNK_ALLOC_NO_FORCE);
                cond_resched();
                spin_lock(&delayed_refs->lock);
        }
@@ -2317,6 +2318,10 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
        if (root == root->fs_info->extent_root)
                root = root->fs_info->tree_root;
 
+       do_chunk_alloc(trans, root->fs_info->extent_root,
+                      2 * 1024 * 1024, btrfs_get_alloc_profile(root, 0),
+                      CHUNK_ALLOC_NO_FORCE);
+
        delayed_refs = &trans->transaction->delayed_refs;
        INIT_LIST_HEAD(&cluster);
 again:
@@ -2993,9 +2998,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
                INIT_LIST_HEAD(&found->block_groups[i]);
        init_rwsem(&found->groups_sem);
        spin_lock_init(&found->lock);
-       found->flags = flags & (BTRFS_BLOCK_GROUP_DATA |
-                               BTRFS_BLOCK_GROUP_SYSTEM |
-                               BTRFS_BLOCK_GROUP_METADATA);
+       found->flags = flags & BTRFS_BLOCK_GROUP_TYPE_MASK;
        found->total_bytes = total_bytes;
        found->disk_total = total_bytes * factor;
        found->bytes_used = bytes_used;
@@ -3016,20 +3019,27 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
 
 static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
 {
-       u64 extra_flags = flags & (BTRFS_BLOCK_GROUP_RAID0 |
-                                  BTRFS_BLOCK_GROUP_RAID1 |
-                                  BTRFS_BLOCK_GROUP_RAID10 |
-                                  BTRFS_BLOCK_GROUP_DUP);
-       if (extra_flags) {
-               if (flags & BTRFS_BLOCK_GROUP_DATA)
-                       fs_info->avail_data_alloc_bits |= extra_flags;
-               if (flags & BTRFS_BLOCK_GROUP_METADATA)
-                       fs_info->avail_metadata_alloc_bits |= extra_flags;
-               if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
-                       fs_info->avail_system_alloc_bits |= extra_flags;
-       }
+       u64 extra_flags = flags & BTRFS_BLOCK_GROUP_PROFILE_MASK;
+
+       /* chunk -> extended profile */
+       if (extra_flags == 0)
+               extra_flags = BTRFS_AVAIL_ALLOC_BIT_SINGLE;
+
+       if (flags & BTRFS_BLOCK_GROUP_DATA)
+               fs_info->avail_data_alloc_bits |= extra_flags;
+       if (flags & BTRFS_BLOCK_GROUP_METADATA)
+               fs_info->avail_metadata_alloc_bits |= extra_flags;
+       if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
+               fs_info->avail_system_alloc_bits |= extra_flags;
 }
 
+/*
+ * @flags: available profiles in extended format (see ctree.h)
+ *
+ * Returns reduced profile in chunk format.  If profile changing is in
+ * progress (either running or paused) picks the target profile (if it's
+ * already available), otherwise falls back to plain reducing.
+ */
 u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags)
 {
        /*
@@ -3040,6 +3050,34 @@ u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags)
        u64 num_devices = root->fs_info->fs_devices->rw_devices +
                root->fs_info->fs_devices->missing_devices;
 
+       /* pick restriper's target profile if it's available */
+       spin_lock(&root->fs_info->balance_lock);
+       if (root->fs_info->balance_ctl) {
+               struct btrfs_balance_control *bctl = root->fs_info->balance_ctl;
+               u64 tgt = 0;
+
+               if ((flags & BTRFS_BLOCK_GROUP_DATA) &&
+                   (bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
+                   (flags & bctl->data.target)) {
+                       tgt = BTRFS_BLOCK_GROUP_DATA | bctl->data.target;
+               } else if ((flags & BTRFS_BLOCK_GROUP_SYSTEM) &&
+                          (bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
+                          (flags & bctl->sys.target)) {
+                       tgt = BTRFS_BLOCK_GROUP_SYSTEM | bctl->sys.target;
+               } else if ((flags & BTRFS_BLOCK_GROUP_METADATA) &&
+                          (bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
+                          (flags & bctl->meta.target)) {
+                       tgt = BTRFS_BLOCK_GROUP_METADATA | bctl->meta.target;
+               }
+
+               if (tgt) {
+                       spin_unlock(&root->fs_info->balance_lock);
+                       flags = tgt;
+                       goto out;
+               }
+       }
+       spin_unlock(&root->fs_info->balance_lock);
+
        if (num_devices == 1)
                flags &= ~(BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID0);
        if (num_devices < 4)
@@ -3059,22 +3097,25 @@ u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags)
        if ((flags & BTRFS_BLOCK_GROUP_RAID0) &&
            ((flags & BTRFS_BLOCK_GROUP_RAID1) |
             (flags & BTRFS_BLOCK_GROUP_RAID10) |
-            (flags & BTRFS_BLOCK_GROUP_DUP)))
+            (flags & BTRFS_BLOCK_GROUP_DUP))) {
                flags &= ~BTRFS_BLOCK_GROUP_RAID0;
+       }
+
+out:
+       /* extended -> chunk profile */
+       flags &= ~BTRFS_AVAIL_ALLOC_BIT_SINGLE;
        return flags;
 }
 
 static u64 get_alloc_profile(struct btrfs_root *root, u64 flags)
 {
        if (flags & BTRFS_BLOCK_GROUP_DATA)
-               flags |= root->fs_info->avail_data_alloc_bits &
-                        root->fs_info->data_alloc_profile;
+               flags |= root->fs_info->avail_data_alloc_bits;
        else if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
-               flags |= root->fs_info->avail_system_alloc_bits &
-                        root->fs_info->system_alloc_profile;
+               flags |= root->fs_info->avail_system_alloc_bits;
        else if (flags & BTRFS_BLOCK_GROUP_METADATA)
-               flags |= root->fs_info->avail_metadata_alloc_bits &
-                        root->fs_info->metadata_alloc_profile;
+               flags |= root->fs_info->avail_metadata_alloc_bits;
+
        return btrfs_reduce_alloc_profile(root, flags);
 }
 
@@ -3257,27 +3298,12 @@ static int should_alloc_chunk(struct btrfs_root *root,
                if (num_bytes - num_allocated < thresh)
                        return 1;
        }
-
-       /*
-        * we have two similar checks here, one based on percentage
-        * and once based on a hard number of 256MB.  The idea
-        * is that if we have a good amount of free
-        * room, don't allocate a chunk.  A good mount is
-        * less than 80% utilized of the chunks we have allocated,
-        * or more than 256MB free
-        */
-       if (num_allocated + alloc_bytes + 256 * 1024 * 1024 < num_bytes)
-               return 0;
-
-       if (num_allocated + alloc_bytes < div_factor(num_bytes, 8))
-               return 0;
-
        thresh = btrfs_super_total_bytes(root->fs_info->super_copy);
 
-       /* 256MB or 5% of the FS */
-       thresh = max_t(u64, 256 * 1024 * 1024, div_factor_fine(thresh, 5));
+       /* 256MB or 2% of the FS */
+       thresh = max_t(u64, 256 * 1024 * 1024, div_factor_fine(thresh, 2));
 
-       if (num_bytes > thresh && sinfo->bytes_used < div_factor(num_bytes, 3))
+       if (num_bytes > thresh && sinfo->bytes_used < div_factor(num_bytes, 8))
                return 0;
        return 1;
 }
@@ -3291,7 +3317,7 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
        int wait_for_alloc = 0;
        int ret = 0;
 
-       flags = btrfs_reduce_alloc_profile(extent_root, flags);
+       BUG_ON(!profile_is_valid(flags, 0));
 
        space_info = __find_space_info(extent_root->fs_info, flags);
        if (!space_info) {
@@ -3416,7 +3442,8 @@ static int shrink_delalloc(struct btrfs_root *root, u64 to_reclaim,
                smp_mb();
                nr_pages = min_t(unsigned long, nr_pages,
                       root->fs_info->delalloc_bytes >> PAGE_CACHE_SHIFT);
-               writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages);
+               writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages,
+                                               WB_REASON_FS_FREE_SPACE);
 
                spin_lock(&space_info->lock);
                if (reserved > space_info->bytes_may_use)
@@ -5294,15 +5321,6 @@ alloc:
                if (unlikely(block_group->ro))
                        goto loop;
 
-               spin_lock(&block_group->free_space_ctl->tree_lock);
-               if (cached &&
-                   block_group->free_space_ctl->free_space <
-                   num_bytes + empty_cluster + empty_size) {
-                       spin_unlock(&block_group->free_space_ctl->tree_lock);
-                       goto loop;
-               }
-               spin_unlock(&block_group->free_space_ctl->tree_lock);
-
                /*
                 * Ok we want to try and use the cluster allocator, so
                 * lets look there
@@ -5348,8 +5366,15 @@ refill_cluster:
                         * plenty of times and not have found
                         * anything, so we are likely way too
                         * fragmented for the clustering stuff to find
-                        * anything.  */
-                       if (loop >= LOOP_NO_EMPTY_SIZE) {
+                        * anything.
+                        *
+                        * However, if the cluster is taken from the
+                        * current block group, release the cluster
+                        * first, so that we stand a better chance of
+                        * succeeding in the unclustered
+                        * allocation.  */
+                       if (loop >= LOOP_NO_EMPTY_SIZE &&
+                           last_ptr->block_group != block_group) {
                                spin_unlock(&last_ptr->refill_lock);
                                goto unclustered_alloc;
                        }
@@ -5360,6 +5385,11 @@ refill_cluster:
                         */
                        btrfs_return_cluster_to_free_space(NULL, last_ptr);
 
+                       if (loop >= LOOP_NO_EMPTY_SIZE) {
+                               spin_unlock(&last_ptr->refill_lock);
+                               goto unclustered_alloc;
+                       }
+
                        /* allocate a cluster in this block group */
                        ret = btrfs_find_space_cluster(trans, root,
                                               block_group, last_ptr,
@@ -5400,6 +5430,15 @@ refill_cluster:
                }
 
 unclustered_alloc:
+               spin_lock(&block_group->free_space_ctl->tree_lock);
+               if (cached &&
+                   block_group->free_space_ctl->free_space <
+                   num_bytes + empty_cluster + empty_size) {
+                       spin_unlock(&block_group->free_space_ctl->tree_lock);
+                       goto loop;
+               }
+               spin_unlock(&block_group->free_space_ctl->tree_lock);
+
                offset = btrfs_find_space_for_alloc(block_group, search_start,
                                                    num_bytes, empty_size);
                /*
@@ -5437,9 +5476,6 @@ checks:
                        goto loop;
                }
 
-               ins->objectid = search_start;
-               ins->offset = num_bytes;
-
                if (offset < search_start)
                        btrfs_add_free_space(used_block_group, offset,
                                             search_start - offset);
@@ -6791,6 +6827,29 @@ static u64 update_block_group_flags(struct btrfs_root *root, u64 flags)
        u64 stripped = BTRFS_BLOCK_GROUP_RAID0 |
                BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10;
 
+       if (root->fs_info->balance_ctl) {
+               struct btrfs_balance_control *bctl = root->fs_info->balance_ctl;
+               u64 tgt = 0;
+
+               /* pick restriper's target profile and return */
+               if (flags & BTRFS_BLOCK_GROUP_DATA &&
+                   bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) {
+                       tgt = BTRFS_BLOCK_GROUP_DATA | bctl->data.target;
+               } else if (flags & BTRFS_BLOCK_GROUP_SYSTEM &&
+                          bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) {
+                       tgt = BTRFS_BLOCK_GROUP_SYSTEM | bctl->sys.target;
+               } else if (flags & BTRFS_BLOCK_GROUP_METADATA &&
+                          bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) {
+                       tgt = BTRFS_BLOCK_GROUP_METADATA | bctl->meta.target;
+               }
+
+               if (tgt) {
+                       /* extended -> chunk profile */
+                       tgt &= ~BTRFS_AVAIL_ALLOC_BIT_SINGLE;
+                       return tgt;
+               }
+       }
+
        /*
         * we add in the count of missing devices because we want
         * to make sure that any RAID levels on a degraded FS
@@ -7466,6 +7525,22 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
        return 0;
 }
 
+static void clear_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
+{
+       u64 extra_flags = flags & BTRFS_BLOCK_GROUP_PROFILE_MASK;
+
+       /* chunk -> extended profile */
+       if (extra_flags == 0)
+               extra_flags = BTRFS_AVAIL_ALLOC_BIT_SINGLE;
+
+       if (flags & BTRFS_BLOCK_GROUP_DATA)
+               fs_info->avail_data_alloc_bits &= ~extra_flags;
+       if (flags & BTRFS_BLOCK_GROUP_METADATA)
+               fs_info->avail_metadata_alloc_bits &= ~extra_flags;
+       if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
+               fs_info->avail_system_alloc_bits &= ~extra_flags;
+}
+
 int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
                             struct btrfs_root *root, u64 group_start)
 {
@@ -7476,6 +7551,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
        struct btrfs_key key;
        struct inode *inode;
        int ret;
+       int index;
        int factor;
 
        root = root->fs_info->extent_root;
@@ -7491,6 +7567,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
        free_excluded_extents(root, block_group);
 
        memcpy(&key, &block_group->key, sizeof(key));
+       index = get_block_group_index(block_group);
        if (block_group->flags & (BTRFS_BLOCK_GROUP_DUP |
                                  BTRFS_BLOCK_GROUP_RAID1 |
                                  BTRFS_BLOCK_GROUP_RAID10))
@@ -7565,6 +7642,8 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
         * are still on the list after taking the semaphore
         */
        list_del_init(&block_group->list);
+       if (list_empty(&block_group->space_info->block_groups[index]))
+               clear_avail_alloc_bits(root->fs_info, block_group->flags);
        up_write(&block_group->space_info->groups_sem);
 
        if (block_group->cached == BTRFS_CACHE_STARTED)