Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs...
authorLinus Torvalds <torvalds@linux-foundation.org>
Sat, 30 Oct 2010 16:05:48 +0000 (09:05 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Sat, 30 Oct 2010 16:05:48 +0000 (09:05 -0700)
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable: (39 commits)
  Btrfs: deal with errors from updating the tree log
  Btrfs: allow subvol deletion by unprivileged user with -o user_subvol_rm_allowed
  Btrfs: make SNAP_DESTROY async
  Btrfs: add SNAP_CREATE_ASYNC ioctl
  Btrfs: add START_SYNC, WAIT_SYNC ioctls
  Btrfs: async transaction commit
  Btrfs: fix deadlock in btrfs_commit_transaction
  Btrfs: fix lockdep warning on clone ioctl
  Btrfs: fix clone ioctl where range is adjacent to extent
  Btrfs: fix delalloc checks in clone ioctl
  Btrfs: drop unused variable in block_alloc_rsv
  Btrfs: cleanup warnings from gcc 4.6 (nonbugs)
  Btrfs: Fix variables set but not read (bugs found by gcc 4.6)
  Btrfs: Use ERR_CAST helpers
  Btrfs: use memdup_user helpers
  Btrfs: fix raid code for removing missing drives
  Btrfs: Switch the extent buffer rbtree into a radix tree
  Btrfs: restructure try_release_extent_buffer()
  Btrfs: use the flusher threads for delalloc throttling
  Btrfs: tune the chunk allocation to 5% of the FS as metadata
  ...

Fix up trivial conflicts in fs/btrfs/super.c and fs/fs-writeback.c, and
remove use of INIT_RCU_HEAD in fs/btrfs/extent_io.c (that init macro was
useless and removed in commit 5e8067adfdba: "rcu head remove init")

27 files changed:
fs/btrfs/compression.c
fs/btrfs/ctree.c
fs/btrfs/ctree.h
fs/btrfs/dir-item.c
fs/btrfs/disk-io.c
fs/btrfs/extent-tree.c
fs/btrfs/extent_io.c
fs/btrfs/extent_io.h
fs/btrfs/extent_map.c
fs/btrfs/free-space-cache.c
fs/btrfs/free-space-cache.h
fs/btrfs/inode.c
fs/btrfs/ioctl.c
fs/btrfs/ioctl.h
fs/btrfs/ordered-data.c
fs/btrfs/relocation.c
fs/btrfs/root-tree.c
fs/btrfs/super.c
fs/btrfs/transaction.c
fs/btrfs/transaction.h
fs/btrfs/tree-defrag.c
fs/btrfs/tree-log.c
fs/btrfs/volumes.c
fs/btrfs/xattr.c
fs/btrfs/zlib.c
fs/fs-writeback.c
include/linux/writeback.h

index 396039b3a8a24ecb7f37fbec9ef831ed05d5cf94..7845d1f7d1d927f79ac45c09af730963a426b8e5 100644 (file)
@@ -163,7 +163,6 @@ fail:
  */
 static void end_compressed_bio_read(struct bio *bio, int err)
 {
-       struct extent_io_tree *tree;
        struct compressed_bio *cb = bio->bi_private;
        struct inode *inode;
        struct page *page;
@@ -187,7 +186,6 @@ static void end_compressed_bio_read(struct bio *bio, int err)
        /* ok, we're the last bio for this extent, lets start
         * the decompression.
         */
-       tree = &BTRFS_I(inode)->io_tree;
        ret = btrfs_zlib_decompress_biovec(cb->compressed_pages,
                                        cb->start,
                                        cb->orig_bio->bi_io_vec,
index c3df14ce2cc2c00d232028d1238121ff9c37e35c..9ac17159925819a399dc10413d5261dd67bb5584 100644 (file)
@@ -200,7 +200,6 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
                      struct extent_buffer **cow_ret, u64 new_root_objectid)
 {
        struct extent_buffer *cow;
-       u32 nritems;
        int ret = 0;
        int level;
        struct btrfs_disk_key disk_key;
@@ -210,7 +209,6 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
        WARN_ON(root->ref_cows && trans->transid != root->last_trans);
 
        level = btrfs_header_level(buf);
-       nritems = btrfs_header_nritems(buf);
        if (level == 0)
                btrfs_item_key(buf, &disk_key, 0);
        else
@@ -1008,7 +1006,6 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
        int wret;
        int pslot;
        int orig_slot = path->slots[level];
-       int err_on_enospc = 0;
        u64 orig_ptr;
 
        if (level == 0)
@@ -1071,8 +1068,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
            BTRFS_NODEPTRS_PER_BLOCK(root) / 4)
                return 0;
 
-       if (btrfs_header_nritems(mid) < 2)
-               err_on_enospc = 1;
+       btrfs_header_nritems(mid);
 
        left = read_node_slot(root, parent, pslot - 1);
        if (left) {
@@ -1103,8 +1099,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
                wret = push_node_left(trans, root, left, mid, 1);
                if (wret < 0)
                        ret = wret;
-               if (btrfs_header_nritems(mid) < 2)
-                       err_on_enospc = 1;
+               btrfs_header_nritems(mid);
        }
 
        /*
@@ -1224,14 +1219,12 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans,
        int wret;
        int pslot;
        int orig_slot = path->slots[level];
-       u64 orig_ptr;
 
        if (level == 0)
                return 1;
 
        mid = path->nodes[level];
        WARN_ON(btrfs_header_generation(mid) != trans->transid);
-       orig_ptr = btrfs_node_blockptr(mid, orig_slot);
 
        if (level < BTRFS_MAX_LEVEL - 1)
                parent = path->nodes[level + 1];
@@ -1577,13 +1570,33 @@ read_block_for_search(struct btrfs_trans_handle *trans,
        blocksize = btrfs_level_size(root, level - 1);
 
        tmp = btrfs_find_tree_block(root, blocknr, blocksize);
-       if (tmp && btrfs_buffer_uptodate(tmp, gen)) {
-               /*
-                * we found an up to date block without sleeping, return
-                * right away
-                */
-               *eb_ret = tmp;
-               return 0;
+       if (tmp) {
+               if (btrfs_buffer_uptodate(tmp, 0)) {
+                       if (btrfs_buffer_uptodate(tmp, gen)) {
+                               /*
+                                * we found an up to date block without
+                                * sleeping, return
+                                * right away
+                                */
+                               *eb_ret = tmp;
+                               return 0;
+                       }
+                       /* the pages were up to date, but we failed
+                        * the generation number check.  Do a full
+                        * read for the generation number that is correct.
+                        * We must do this without dropping locks so
+                        * we can trust our generation number
+                        */
+                       free_extent_buffer(tmp);
+                       tmp = read_tree_block(root, blocknr, blocksize, gen);
+                       if (tmp && btrfs_buffer_uptodate(tmp, gen)) {
+                               *eb_ret = tmp;
+                               return 0;
+                       }
+                       free_extent_buffer(tmp);
+                       btrfs_release_path(NULL, p);
+                       return -EIO;
+               }
        }
 
        /*
@@ -1596,8 +1609,7 @@ read_block_for_search(struct btrfs_trans_handle *trans,
        btrfs_unlock_up_safe(p, level + 1);
        btrfs_set_path_blocking(p);
 
-       if (tmp)
-               free_extent_buffer(tmp);
+       free_extent_buffer(tmp);
        if (p->reada)
                reada_for_search(root, p, level, slot, key->objectid);
 
@@ -2548,7 +2560,6 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
 {
        struct btrfs_disk_key disk_key;
        struct extent_buffer *right = path->nodes[0];
-       int slot;
        int i;
        int push_space = 0;
        int push_items = 0;
@@ -2560,8 +2571,6 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
        u32 this_item_size;
        u32 old_left_item_size;
 
-       slot = path->slots[1];
-
        if (empty)
                nr = min(right_nritems, max_slot);
        else
@@ -3330,7 +3339,6 @@ int btrfs_truncate_item(struct btrfs_trans_handle *trans,
 {
        int ret = 0;
        int slot;
-       int slot_orig;
        struct extent_buffer *leaf;
        struct btrfs_item *item;
        u32 nritems;
@@ -3340,7 +3348,6 @@ int btrfs_truncate_item(struct btrfs_trans_handle *trans,
        unsigned int size_diff;
        int i;
 
-       slot_orig = path->slots[0];
        leaf = path->nodes[0];
        slot = path->slots[0];
 
@@ -3445,7 +3452,6 @@ int btrfs_extend_item(struct btrfs_trans_handle *trans,
 {
        int ret = 0;
        int slot;
-       int slot_orig;
        struct extent_buffer *leaf;
        struct btrfs_item *item;
        u32 nritems;
@@ -3454,7 +3460,6 @@ int btrfs_extend_item(struct btrfs_trans_handle *trans,
        unsigned int old_size;
        int i;
 
-       slot_orig = path->slots[0];
        leaf = path->nodes[0];
 
        nritems = btrfs_header_nritems(leaf);
@@ -3787,7 +3792,6 @@ int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
                            struct btrfs_key *cpu_key, u32 *data_size,
                            int nr)
 {
-       struct extent_buffer *leaf;
        int ret = 0;
        int slot;
        int i;
@@ -3804,7 +3808,6 @@ int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
        if (ret < 0)
                goto out;
 
-       leaf = path->nodes[0];
        slot = path->slots[0];
        BUG_ON(slot < 0);
 
index eaf286abad1754f8cbc244cecd2a35c91c3f1abd..8db9234f6b418db0cc1f99afa6876f369116a1b8 100644 (file)
@@ -99,6 +99,9 @@ struct btrfs_ordered_sum;
  */
 #define BTRFS_EXTENT_CSUM_OBJECTID -10ULL
 
+/* For storing free space cache */
+#define BTRFS_FREE_SPACE_OBJECTID -11ULL
+
 /* dummy objectid represents multiple objectids */
 #define BTRFS_MULTIPLE_OBJECTIDS -255ULL
 
@@ -265,6 +268,22 @@ struct btrfs_chunk {
        /* additional stripes go here */
 } __attribute__ ((__packed__));
 
+#define BTRFS_FREE_SPACE_EXTENT        1
+#define BTRFS_FREE_SPACE_BITMAP        2
+
+struct btrfs_free_space_entry {
+       __le64 offset;
+       __le64 bytes;
+       u8 type;
+} __attribute__ ((__packed__));
+
+struct btrfs_free_space_header {
+       struct btrfs_disk_key location;
+       __le64 generation;
+       __le64 num_entries;
+       __le64 num_bitmaps;
+} __attribute__ ((__packed__));
+
 static inline unsigned long btrfs_chunk_item_size(int num_stripes)
 {
        BUG_ON(num_stripes == 0);
@@ -365,8 +384,10 @@ struct btrfs_super_block {
 
        char label[BTRFS_LABEL_SIZE];
 
+       __le64 cache_generation;
+
        /* future expansion */
-       __le64 reserved[32];
+       __le64 reserved[31];
        u8 sys_chunk_array[BTRFS_SYSTEM_CHUNK_ARRAY_SIZE];
 } __attribute__ ((__packed__));
 
@@ -375,13 +396,15 @@ struct btrfs_super_block {
  * ones specified below then we will fail to mount
  */
 #define BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF   (1ULL << 0)
-#define BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL  (2ULL << 0)
+#define BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL  (1ULL << 1)
+#define BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS    (1ULL << 2)
 
 #define BTRFS_FEATURE_COMPAT_SUPP              0ULL
 #define BTRFS_FEATURE_COMPAT_RO_SUPP           0ULL
-#define BTRFS_FEATURE_INCOMPAT_SUPP            \
-       (BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF | \
-        BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL)
+#define BTRFS_FEATURE_INCOMPAT_SUPP                    \
+       (BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF |         \
+        BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL |        \
+        BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)
 
 /*
  * A leaf is full of items. offset and size tell us where to find
@@ -675,7 +698,8 @@ struct btrfs_block_group_item {
 struct btrfs_space_info {
        u64 flags;
 
-       u64 total_bytes;        /* total bytes in the space */
+       u64 total_bytes;        /* total bytes in the space,
+                                  this doesn't take mirrors into account */
        u64 bytes_used;         /* total bytes used,
                                   this does't take mirrors into account */
        u64 bytes_pinned;       /* total bytes pinned, will be freed when the
@@ -687,6 +711,8 @@ struct btrfs_space_info {
        u64 bytes_may_use;      /* number of bytes that may be used for
                                   delalloc/allocations */
        u64 disk_used;          /* total bytes used on disk */
+       u64 disk_total;         /* total bytes on disk, takes mirrors into
+                                  account */
 
        int full;               /* indicates that we cannot allocate any more
                                   chunks for this space */
@@ -750,6 +776,14 @@ enum btrfs_caching_type {
        BTRFS_CACHE_FINISHED    = 2,
 };
 
+enum btrfs_disk_cache_state {
+       BTRFS_DC_WRITTEN        = 0,
+       BTRFS_DC_ERROR          = 1,
+       BTRFS_DC_CLEAR          = 2,
+       BTRFS_DC_SETUP          = 3,
+       BTRFS_DC_NEED_WRITE     = 4,
+};
+
 struct btrfs_caching_control {
        struct list_head list;
        struct mutex mutex;
@@ -763,6 +797,7 @@ struct btrfs_block_group_cache {
        struct btrfs_key key;
        struct btrfs_block_group_item item;
        struct btrfs_fs_info *fs_info;
+       struct inode *inode;
        spinlock_t lock;
        u64 pinned;
        u64 reserved;
@@ -773,8 +808,11 @@ struct btrfs_block_group_cache {
        int extents_thresh;
        int free_extents;
        int total_bitmaps;
-       int ro;
-       int dirty;
+       int ro:1;
+       int dirty:1;
+       int iref:1;
+
+       int disk_cache_state;
 
        /* cache tracking stuff */
        int cached;
@@ -863,6 +901,7 @@ struct btrfs_fs_info {
        struct btrfs_transaction *running_transaction;
        wait_queue_head_t transaction_throttle;
        wait_queue_head_t transaction_wait;
+       wait_queue_head_t transaction_blocked_wait;
        wait_queue_head_t async_submit_wait;
 
        struct btrfs_super_block super_copy;
@@ -949,6 +988,7 @@ struct btrfs_fs_info {
        struct btrfs_workers endio_meta_workers;
        struct btrfs_workers endio_meta_write_workers;
        struct btrfs_workers endio_write_workers;
+       struct btrfs_workers endio_freespace_worker;
        struct btrfs_workers submit_workers;
        /*
         * fixup workers take dirty pages that didn't properly go through
@@ -1192,6 +1232,9 @@ struct btrfs_root {
 #define BTRFS_MOUNT_NOSSD              (1 << 9)
 #define BTRFS_MOUNT_DISCARD            (1 << 10)
 #define BTRFS_MOUNT_FORCE_COMPRESS      (1 << 11)
+#define BTRFS_MOUNT_SPACE_CACHE                (1 << 12)
+#define BTRFS_MOUNT_CLEAR_CACHE                (1 << 13)
+#define BTRFS_MOUNT_USER_SUBVOL_RM_ALLOWED (1 << 14)
 
 #define btrfs_clear_opt(o, opt)                ((o) &= ~BTRFS_MOUNT_##opt)
 #define btrfs_set_opt(o, opt)          ((o) |= BTRFS_MOUNT_##opt)
@@ -1665,6 +1708,27 @@ static inline void btrfs_set_dir_item_key(struct extent_buffer *eb,
        write_eb_member(eb, item, struct btrfs_dir_item, location, key);
 }
 
+BTRFS_SETGET_FUNCS(free_space_entries, struct btrfs_free_space_header,
+                  num_entries, 64);
+BTRFS_SETGET_FUNCS(free_space_bitmaps, struct btrfs_free_space_header,
+                  num_bitmaps, 64);
+BTRFS_SETGET_FUNCS(free_space_generation, struct btrfs_free_space_header,
+                  generation, 64);
+
+static inline void btrfs_free_space_key(struct extent_buffer *eb,
+                                       struct btrfs_free_space_header *h,
+                                       struct btrfs_disk_key *key)
+{
+       read_eb_member(eb, h, struct btrfs_free_space_header, location, key);
+}
+
+static inline void btrfs_set_free_space_key(struct extent_buffer *eb,
+                                           struct btrfs_free_space_header *h,
+                                           struct btrfs_disk_key *key)
+{
+       write_eb_member(eb, h, struct btrfs_free_space_header, location, key);
+}
+
 /* struct btrfs_disk_key */
 BTRFS_SETGET_STACK_FUNCS(disk_key_objectid, struct btrfs_disk_key,
                         objectid, 64);
@@ -1876,6 +1940,8 @@ BTRFS_SETGET_STACK_FUNCS(super_incompat_flags, struct btrfs_super_block,
                         incompat_flags, 64);
 BTRFS_SETGET_STACK_FUNCS(super_csum_type, struct btrfs_super_block,
                         csum_type, 16);
+BTRFS_SETGET_STACK_FUNCS(super_cache_generation, struct btrfs_super_block,
+                        cache_generation, 64);
 
 static inline int btrfs_super_csum_size(struct btrfs_super_block *s)
 {
@@ -1988,6 +2054,12 @@ static inline struct dentry *fdentry(struct file *file)
        return file->f_path.dentry;
 }
 
+static inline bool btrfs_mixed_space_info(struct btrfs_space_info *space_info)
+{
+       return ((space_info->flags & BTRFS_BLOCK_GROUP_METADATA) &&
+               (space_info->flags & BTRFS_BLOCK_GROUP_DATA));
+}
+
 /* extent-tree.c */
 void btrfs_put_block_group(struct btrfs_block_group_cache *cache);
 int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
@@ -2079,7 +2151,7 @@ int btrfs_check_data_free_space(struct inode *inode, u64 bytes);
 void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes);
 int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans,
                                struct btrfs_root *root,
-                               int num_items, int *retries);
+                               int num_items);
 void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans,
                                struct btrfs_root *root);
 int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans,
@@ -2100,7 +2172,7 @@ void btrfs_add_durable_block_rsv(struct btrfs_fs_info *fs_info,
 int btrfs_block_rsv_add(struct btrfs_trans_handle *trans,
                        struct btrfs_root *root,
                        struct btrfs_block_rsv *block_rsv,
-                       u64 num_bytes, int *retries);
+                       u64 num_bytes);
 int btrfs_block_rsv_check(struct btrfs_trans_handle *trans,
                          struct btrfs_root *root,
                          struct btrfs_block_rsv *block_rsv,
@@ -2115,6 +2187,7 @@ int btrfs_set_block_group_ro(struct btrfs_root *root,
                             struct btrfs_block_group_cache *cache);
 int btrfs_set_block_group_rw(struct btrfs_root *root,
                             struct btrfs_block_group_cache *cache);
+void btrfs_put_block_group_cache(struct btrfs_fs_info *info);
 /* ctree.c */
 int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
                     int level, int *slot);
@@ -2373,7 +2446,8 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
                               u32 min_type);
 
 int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput);
-int btrfs_start_one_delalloc_inode(struct btrfs_root *root, int delay_iput);
+int btrfs_start_one_delalloc_inode(struct btrfs_root *root, int delay_iput,
+                                  int sync);
 int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end,
                              struct extent_state **cached_state);
 int btrfs_writepages(struct address_space *mapping,
@@ -2426,6 +2500,10 @@ void btrfs_run_delayed_iputs(struct btrfs_root *root);
 int btrfs_prealloc_file_range(struct inode *inode, int mode,
                              u64 start, u64 num_bytes, u64 min_size,
                              loff_t actual_len, u64 *alloc_hint);
+int btrfs_prealloc_file_range_trans(struct inode *inode,
+                                   struct btrfs_trans_handle *trans, int mode,
+                                   u64 start, u64 num_bytes, u64 min_size,
+                                   loff_t actual_len, u64 *alloc_hint);
 extern const struct dentry_operations btrfs_dentry_operations;
 
 /* ioctl.c */
index e9103b3baa49f4309c94eca9e0d7b7069665359c..f0cad5ae5be75679c273583b6c6f535118acc093 100644 (file)
@@ -427,5 +427,5 @@ int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans,
                ret = btrfs_truncate_item(trans, root, path,
                                          item_len - sub_item_len, 1);
        }
-       return 0;
+       return ret;
 }
index 5e789f4a3ed0f0da401526d6a86de10db5c74f67..fb827d0d71811baff7aec0f49a2584f7eedbaf84 100644 (file)
@@ -338,7 +338,6 @@ static int csum_dirty_buffer(struct btrfs_root *root, struct page *page)
        struct extent_io_tree *tree;
        u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
        u64 found_start;
-       int found_level;
        unsigned long len;
        struct extent_buffer *eb;
        int ret;
@@ -369,8 +368,6 @@ static int csum_dirty_buffer(struct btrfs_root *root, struct page *page)
                WARN_ON(1);
                goto err;
        }
-       found_level = btrfs_header_level(eb);
-
        csum_tree_block(root, eb, 0);
 err:
        free_extent_buffer(eb);
@@ -481,9 +478,12 @@ static void end_workqueue_bio(struct bio *bio, int err)
        end_io_wq->work.flags = 0;
 
        if (bio->bi_rw & REQ_WRITE) {
-               if (end_io_wq->metadata)
+               if (end_io_wq->metadata == 1)
                        btrfs_queue_worker(&fs_info->endio_meta_write_workers,
                                           &end_io_wq->work);
+               else if (end_io_wq->metadata == 2)
+                       btrfs_queue_worker(&fs_info->endio_freespace_worker,
+                                          &end_io_wq->work);
                else
                        btrfs_queue_worker(&fs_info->endio_write_workers,
                                           &end_io_wq->work);
@@ -497,6 +497,13 @@ static void end_workqueue_bio(struct bio *bio, int err)
        }
 }
 
+/*
+ * For the metadata arg you want
+ *
+ * 0 - if data
+ * 1 - if normal metadta
+ * 2 - if writing to the free space cache area
+ */
 int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
                        int metadata)
 {
@@ -533,11 +540,9 @@ int btrfs_congested_async(struct btrfs_fs_info *info, int iodone)
 
 static void run_one_async_start(struct btrfs_work *work)
 {
-       struct btrfs_fs_info *fs_info;
        struct async_submit_bio *async;
 
        async = container_of(work, struct  async_submit_bio, work);
-       fs_info = BTRFS_I(async->inode)->root->fs_info;
        async->submit_bio_start(async->inode, async->rw, async->bio,
                               async->mirror_num, async->bio_flags,
                               async->bio_offset);
@@ -850,12 +855,8 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr,
                                      u32 blocksize, u64 parent_transid)
 {
        struct extent_buffer *buf = NULL;
-       struct inode *btree_inode = root->fs_info->btree_inode;
-       struct extent_io_tree *io_tree;
        int ret;
 
-       io_tree = &BTRFS_I(btree_inode)->io_tree;
-
        buf = btrfs_find_create_tree_block(root, bytenr, blocksize);
        if (!buf)
                return NULL;
@@ -1377,7 +1378,6 @@ static int bio_ready_for_csum(struct bio *bio)
        u64 start = 0;
        struct page *page;
        struct extent_io_tree *io_tree = NULL;
-       struct btrfs_fs_info *info = NULL;
        struct bio_vec *bvec;
        int i;
        int ret;
@@ -1396,7 +1396,6 @@ static int bio_ready_for_csum(struct bio *bio)
                buf_len = page->private >> 2;
                start = page_offset(page) + bvec->bv_offset;
                io_tree = &BTRFS_I(page->mapping->host)->io_tree;
-               info = BTRFS_I(page->mapping->host)->root->fs_info;
        }
        /* are we fully contained in this bio? */
        if (buf_len <= length)
@@ -1680,12 +1679,12 @@ struct btrfs_root *open_ctree(struct super_block *sb,
 
        init_waitqueue_head(&fs_info->transaction_throttle);
        init_waitqueue_head(&fs_info->transaction_wait);
+       init_waitqueue_head(&fs_info->transaction_blocked_wait);
        init_waitqueue_head(&fs_info->async_submit_wait);
 
        __setup_root(4096, 4096, 4096, 4096, tree_root,
                     fs_info, BTRFS_ROOT_TREE_OBJECTID);
 
-
        bh = btrfs_read_dev_super(fs_devices->latest_bdev);
        if (!bh)
                goto fail_iput;
@@ -1775,6 +1774,8 @@ struct btrfs_root *open_ctree(struct super_block *sb,
        btrfs_init_workers(&fs_info->endio_write_workers, "endio-write",
                           fs_info->thread_pool_size,
                           &fs_info->generic_worker);
+       btrfs_init_workers(&fs_info->endio_freespace_worker, "freespace-write",
+                          1, &fs_info->generic_worker);
 
        /*
         * endios are largely parallel and should have a very
@@ -1795,6 +1796,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
        btrfs_start_workers(&fs_info->endio_meta_workers, 1);
        btrfs_start_workers(&fs_info->endio_meta_write_workers, 1);
        btrfs_start_workers(&fs_info->endio_write_workers, 1);
+       btrfs_start_workers(&fs_info->endio_freespace_worker, 1);
 
        fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super);
        fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages,
@@ -1993,6 +1995,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
        if (!(sb->s_flags & MS_RDONLY)) {
                down_read(&fs_info->cleanup_work_sem);
                btrfs_orphan_cleanup(fs_info->fs_root);
+               btrfs_orphan_cleanup(fs_info->tree_root);
                up_read(&fs_info->cleanup_work_sem);
        }
 
@@ -2035,6 +2038,7 @@ fail_sb_buffer:
        btrfs_stop_workers(&fs_info->endio_meta_workers);
        btrfs_stop_workers(&fs_info->endio_meta_write_workers);
        btrfs_stop_workers(&fs_info->endio_write_workers);
+       btrfs_stop_workers(&fs_info->endio_freespace_worker);
        btrfs_stop_workers(&fs_info->submit_workers);
 fail_iput:
        invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
@@ -2410,6 +2414,7 @@ int close_ctree(struct btrfs_root *root)
        fs_info->closing = 1;
        smp_mb();
 
+       btrfs_put_block_group_cache(fs_info);
        if (!(fs_info->sb->s_flags & MS_RDONLY)) {
                ret =  btrfs_commit_super(root);
                if (ret)
@@ -2456,6 +2461,7 @@ int close_ctree(struct btrfs_root *root)
        btrfs_stop_workers(&fs_info->endio_meta_workers);
        btrfs_stop_workers(&fs_info->endio_meta_write_workers);
        btrfs_stop_workers(&fs_info->endio_write_workers);
+       btrfs_stop_workers(&fs_info->endio_freespace_worker);
        btrfs_stop_workers(&fs_info->submit_workers);
 
        btrfs_close_devices(fs_info->fs_devices);
index 0b81ecdb101cc2dfea708a8ad9a6274b84acb81d..0c097f3aec418564e25df5b7e0af8d9a81a79d1d 100644 (file)
@@ -242,6 +242,12 @@ get_caching_control(struct btrfs_block_group_cache *cache)
                return NULL;
        }
 
+       /* We're loading it the fast way, so we don't have a caching_ctl. */
+       if (!cache->caching_ctl) {
+               spin_unlock(&cache->lock);
+               return NULL;
+       }
+
        ctl = cache->caching_ctl;
        atomic_inc(&ctl->count);
        spin_unlock(&cache->lock);
@@ -421,7 +427,9 @@ err:
        return 0;
 }
 
-static int cache_block_group(struct btrfs_block_group_cache *cache)
+static int cache_block_group(struct btrfs_block_group_cache *cache,
+                            struct btrfs_trans_handle *trans,
+                            int load_cache_only)
 {
        struct btrfs_fs_info *fs_info = cache->fs_info;
        struct btrfs_caching_control *caching_ctl;
@@ -432,6 +440,36 @@ static int cache_block_group(struct btrfs_block_group_cache *cache)
        if (cache->cached != BTRFS_CACHE_NO)
                return 0;
 
+       /*
+        * We can't do the read from on-disk cache during a commit since we need
+        * to have the normal tree locking.
+        */
+       if (!trans->transaction->in_commit) {
+               spin_lock(&cache->lock);
+               if (cache->cached != BTRFS_CACHE_NO) {
+                       spin_unlock(&cache->lock);
+                       return 0;
+               }
+               cache->cached = BTRFS_CACHE_STARTED;
+               spin_unlock(&cache->lock);
+
+               ret = load_free_space_cache(fs_info, cache);
+
+               spin_lock(&cache->lock);
+               if (ret == 1) {
+                       cache->cached = BTRFS_CACHE_FINISHED;
+                       cache->last_byte_to_unpin = (u64)-1;
+               } else {
+                       cache->cached = BTRFS_CACHE_NO;
+               }
+               spin_unlock(&cache->lock);
+               if (ret == 1)
+                       return 0;
+       }
+
+       if (load_cache_only)
+               return 0;
+
        caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_KERNEL);
        BUG_ON(!caching_ctl);
 
@@ -509,7 +547,7 @@ static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
 
        rcu_read_lock();
        list_for_each_entry_rcu(found, head, list) {
-               if (found->flags == flags) {
+               if (found->flags & flags) {
                        rcu_read_unlock();
                        return found;
                }
@@ -542,6 +580,15 @@ static u64 div_factor(u64 num, int factor)
        return num;
 }
 
+static u64 div_factor_fine(u64 num, int factor)
+{
+       if (factor == 100)
+               return num;
+       num *= factor;
+       do_div(num, 100);
+       return num;
+}
+
 u64 btrfs_find_block_group(struct btrfs_root *root,
                           u64 search_start, u64 search_hint, int owner)
 {
@@ -2687,6 +2734,109 @@ next_block_group(struct btrfs_root *root,
        return cache;
 }
 
+static int cache_save_setup(struct btrfs_block_group_cache *block_group,
+                           struct btrfs_trans_handle *trans,
+                           struct btrfs_path *path)
+{
+       struct btrfs_root *root = block_group->fs_info->tree_root;
+       struct inode *inode = NULL;
+       u64 alloc_hint = 0;
+       int num_pages = 0;
+       int retries = 0;
+       int ret = 0;
+
+       /*
+        * If this block group is smaller than 100 megs don't bother caching the
+        * block group.
+        */
+       if (block_group->key.offset < (100 * 1024 * 1024)) {
+               spin_lock(&block_group->lock);
+               block_group->disk_cache_state = BTRFS_DC_WRITTEN;
+               spin_unlock(&block_group->lock);
+               return 0;
+       }
+
+again:
+       inode = lookup_free_space_inode(root, block_group, path);
+       if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) {
+               ret = PTR_ERR(inode);
+               btrfs_release_path(root, path);
+               goto out;
+       }
+
+       if (IS_ERR(inode)) {
+               BUG_ON(retries);
+               retries++;
+
+               if (block_group->ro)
+                       goto out_free;
+
+               ret = create_free_space_inode(root, trans, block_group, path);
+               if (ret)
+                       goto out_free;
+               goto again;
+       }
+
+       /*
+        * We want to set the generation to 0, that way if anything goes wrong
+        * from here on out we know not to trust this cache when we load up next
+        * time.
+        */
+       BTRFS_I(inode)->generation = 0;
+       ret = btrfs_update_inode(trans, root, inode);
+       WARN_ON(ret);
+
+       if (i_size_read(inode) > 0) {
+               ret = btrfs_truncate_free_space_cache(root, trans, path,
+                                                     inode);
+               if (ret)
+                       goto out_put;
+       }
+
+       spin_lock(&block_group->lock);
+       if (block_group->cached != BTRFS_CACHE_FINISHED) {
+               spin_unlock(&block_group->lock);
+               goto out_put;
+       }
+       spin_unlock(&block_group->lock);
+
+       num_pages = (int)div64_u64(block_group->key.offset, 1024 * 1024 * 1024);
+       if (!num_pages)
+               num_pages = 1;
+
+       /*
+        * Just to make absolutely sure we have enough space, we're going to
+        * preallocate 12 pages worth of space for each block group.  In
+        * practice we ought to use at most 8, but we need extra space so we can
+        * add our header and have a terminator between the extents and the
+        * bitmaps.
+        */
+       num_pages *= 16;
+       num_pages *= PAGE_CACHE_SIZE;
+
+       ret = btrfs_check_data_free_space(inode, num_pages);
+       if (ret)
+               goto out_put;
+
+       ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, num_pages,
+                                             num_pages, num_pages,
+                                             &alloc_hint);
+       btrfs_free_reserved_data_space(inode, num_pages);
+out_put:
+       iput(inode);
+out_free:
+       btrfs_release_path(root, path);
+out:
+       spin_lock(&block_group->lock);
+       if (ret)
+               block_group->disk_cache_state = BTRFS_DC_ERROR;
+       else
+               block_group->disk_cache_state = BTRFS_DC_SETUP;
+       spin_unlock(&block_group->lock);
+
+       return ret;
+}
+
 int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
                                   struct btrfs_root *root)
 {
@@ -2699,6 +2849,25 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
        if (!path)
                return -ENOMEM;
 
+again:
+       while (1) {
+               cache = btrfs_lookup_first_block_group(root->fs_info, last);
+               while (cache) {
+                       if (cache->disk_cache_state == BTRFS_DC_CLEAR)
+                               break;
+                       cache = next_block_group(root, cache);
+               }
+               if (!cache) {
+                       if (last == 0)
+                               break;
+                       last = 0;
+                       continue;
+               }
+               err = cache_save_setup(cache, trans, path);
+               last = cache->key.objectid + cache->key.offset;
+               btrfs_put_block_group(cache);
+       }
+
        while (1) {
                if (last == 0) {
                        err = btrfs_run_delayed_refs(trans, root,
@@ -2708,6 +2877,11 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
 
                cache = btrfs_lookup_first_block_group(root->fs_info, last);
                while (cache) {
+                       if (cache->disk_cache_state == BTRFS_DC_CLEAR) {
+                               btrfs_put_block_group(cache);
+                               goto again;
+                       }
+
                        if (cache->dirty)
                                break;
                        cache = next_block_group(root, cache);
@@ -2719,6 +2893,8 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
                        continue;
                }
 
+               if (cache->disk_cache_state == BTRFS_DC_SETUP)
+                       cache->disk_cache_state = BTRFS_DC_NEED_WRITE;
                cache->dirty = 0;
                last = cache->key.objectid + cache->key.offset;
 
@@ -2727,6 +2903,52 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
                btrfs_put_block_group(cache);
        }
 
+       while (1) {
+               /*
+                * I don't think this is needed since we're just marking our
+                * preallocated extent as written, but just in case it can't
+                * hurt.
+                */
+               if (last == 0) {
+                       err = btrfs_run_delayed_refs(trans, root,
+                                                    (unsigned long)-1);
+                       BUG_ON(err);
+               }
+
+               cache = btrfs_lookup_first_block_group(root->fs_info, last);
+               while (cache) {
+                       /*
+                        * Really this shouldn't happen, but it could if we
+                        * couldn't write the entire preallocated extent and
+                        * splitting the extent resulted in a new block.
+                        */
+                       if (cache->dirty) {
+                               btrfs_put_block_group(cache);
+                               goto again;
+                       }
+                       if (cache->disk_cache_state == BTRFS_DC_NEED_WRITE)
+                               break;
+                       cache = next_block_group(root, cache);
+               }
+               if (!cache) {
+                       if (last == 0)
+                               break;
+                       last = 0;
+                       continue;
+               }
+
+               btrfs_write_out_cache(root, trans, cache, path);
+
+               /*
+                * If we didn't have an error then the cache state is still
+                * NEED_WRITE, so we can set it to WRITTEN.
+                */
+               if (cache->disk_cache_state == BTRFS_DC_NEED_WRITE)
+                       cache->disk_cache_state = BTRFS_DC_WRITTEN;
+               last = cache->key.objectid + cache->key.offset;
+               btrfs_put_block_group(cache);
+       }
+
        btrfs_free_path(path);
        return 0;
 }
@@ -2762,6 +2984,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
        if (found) {
                spin_lock(&found->lock);
                found->total_bytes += total_bytes;
+               found->disk_total += total_bytes * factor;
                found->bytes_used += bytes_used;
                found->disk_used += bytes_used * factor;
                found->full = 0;
@@ -2781,6 +3004,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
                                BTRFS_BLOCK_GROUP_SYSTEM |
                                BTRFS_BLOCK_GROUP_METADATA);
        found->total_bytes = total_bytes;
+       found->disk_total = total_bytes * factor;
        found->bytes_used = bytes_used;
        found->disk_used = bytes_used * factor;
        found->bytes_pinned = 0;
@@ -2882,11 +3106,16 @@ int btrfs_check_data_free_space(struct inode *inode, u64 bytes)
        struct btrfs_space_info *data_sinfo;
        struct btrfs_root *root = BTRFS_I(inode)->root;
        u64 used;
-       int ret = 0, committed = 0;
+       int ret = 0, committed = 0, alloc_chunk = 1;
 
        /* make sure bytes are sectorsize aligned */
        bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1);
 
+       if (root == root->fs_info->tree_root) {
+               alloc_chunk = 0;
+               committed = 1;
+       }
+
        data_sinfo = BTRFS_I(inode)->space_info;
        if (!data_sinfo)
                goto alloc;
@@ -2905,7 +3134,7 @@ again:
                 * if we don't have enough free bytes in this space then we need
                 * to alloc a new chunk.
                 */
-               if (!data_sinfo->full) {
+               if (!data_sinfo->full && alloc_chunk) {
                        u64 alloc_target;
 
                        data_sinfo->force_alloc = 1;
@@ -2997,10 +3226,11 @@ static void force_metadata_allocation(struct btrfs_fs_info *info)
        rcu_read_unlock();
 }
 
-static int should_alloc_chunk(struct btrfs_space_info *sinfo,
-                             u64 alloc_bytes)
+static int should_alloc_chunk(struct btrfs_root *root,
+                             struct btrfs_space_info *sinfo, u64 alloc_bytes)
 {
        u64 num_bytes = sinfo->total_bytes - sinfo->bytes_readonly;
+       u64 thresh;
 
        if (sinfo->bytes_used + sinfo->bytes_reserved +
            alloc_bytes + 256 * 1024 * 1024 < num_bytes)
@@ -3010,6 +3240,12 @@ static int should_alloc_chunk(struct btrfs_space_info *sinfo,
            alloc_bytes < div_factor(num_bytes, 8))
                return 0;
 
+       thresh = btrfs_super_total_bytes(&root->fs_info->super_copy);
+       thresh = max_t(u64, 256 * 1024 * 1024, div_factor_fine(thresh, 5));
+
+       if (num_bytes > thresh && sinfo->bytes_used < div_factor(num_bytes, 3))
+               return 0;
+
        return 1;
 }
 
@@ -3041,12 +3277,20 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
                goto out;
        }
 
-       if (!force && !should_alloc_chunk(space_info, alloc_bytes)) {
+       if (!force && !should_alloc_chunk(extent_root, space_info,
+                                         alloc_bytes)) {
                spin_unlock(&space_info->lock);
                goto out;
        }
        spin_unlock(&space_info->lock);
 
+       /*
+        * If we have mixed data/metadata chunks we want to make sure we keep
+        * allocating mixed chunks instead of individual chunks.
+        */
+       if (btrfs_mixed_space_info(space_info))
+               flags |= (BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA);
+
        /*
         * if we're doing a data chunk, go ahead and make sure that
         * we keep a reasonable number of metadata chunks allocated in the
@@ -3072,55 +3316,25 @@ out:
        return ret;
 }
 
-static int maybe_allocate_chunk(struct btrfs_trans_handle *trans,
-                               struct btrfs_root *root,
-                               struct btrfs_space_info *sinfo, u64 num_bytes)
-{
-       int ret;
-       int end_trans = 0;
-
-       if (sinfo->full)
-               return 0;
-
-       spin_lock(&sinfo->lock);
-       ret = should_alloc_chunk(sinfo, num_bytes + 2 * 1024 * 1024);
-       spin_unlock(&sinfo->lock);
-       if (!ret)
-               return 0;
-
-       if (!trans) {
-               trans = btrfs_join_transaction(root, 1);
-               BUG_ON(IS_ERR(trans));
-               end_trans = 1;
-       }
-
-       ret = do_chunk_alloc(trans, root->fs_info->extent_root,
-                            num_bytes + 2 * 1024 * 1024,
-                            get_alloc_profile(root, sinfo->flags), 0);
-
-       if (end_trans)
-               btrfs_end_transaction(trans, root);
-
-       return ret == 1 ? 1 : 0;
-}
-
 /*
  * shrink metadata reservation for delalloc
  */
 static int shrink_delalloc(struct btrfs_trans_handle *trans,
-                          struct btrfs_root *root, u64 to_reclaim)
+                          struct btrfs_root *root, u64 to_reclaim, int sync)
 {
        struct btrfs_block_rsv *block_rsv;
+       struct btrfs_space_info *space_info;
        u64 reserved;
        u64 max_reclaim;
        u64 reclaimed = 0;
        int pause = 1;
-       int ret;
+       int nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT;
 
        block_rsv = &root->fs_info->delalloc_block_rsv;
-       spin_lock(&block_rsv->lock);
-       reserved = block_rsv->reserved;
-       spin_unlock(&block_rsv->lock);
+       space_info = block_rsv->space_info;
+
+       smp_mb();
+       reserved = space_info->bytes_reserved;
 
        if (reserved == 0)
                return 0;
@@ -3128,104 +3342,169 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
        max_reclaim = min(reserved, to_reclaim);
 
        while (1) {
-               ret = btrfs_start_one_delalloc_inode(root, trans ? 1 : 0);
-               if (!ret) {
-                       __set_current_state(TASK_INTERRUPTIBLE);
-                       schedule_timeout(pause);
-                       pause <<= 1;
-                       if (pause > HZ / 10)
-                               pause = HZ / 10;
-               } else {
-                       pause = 1;
-               }
+               /* have the flusher threads jump in and do some IO */
+               smp_mb();
+               nr_pages = min_t(unsigned long, nr_pages,
+                      root->fs_info->delalloc_bytes >> PAGE_CACHE_SHIFT);
+               writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages);
 
-               spin_lock(&block_rsv->lock);
-               if (reserved > block_rsv->reserved)
-                       reclaimed = reserved - block_rsv->reserved;
-               reserved = block_rsv->reserved;
-               spin_unlock(&block_rsv->lock);
+               spin_lock(&space_info->lock);
+               if (reserved > space_info->bytes_reserved)
+                       reclaimed += reserved - space_info->bytes_reserved;
+               reserved = space_info->bytes_reserved;
+               spin_unlock(&space_info->lock);
 
                if (reserved == 0 || reclaimed >= max_reclaim)
                        break;
 
                if (trans && trans->transaction->blocked)
                        return -EAGAIN;
+
+               __set_current_state(TASK_INTERRUPTIBLE);
+               schedule_timeout(pause);
+               pause <<= 1;
+               if (pause > HZ / 10)
+                       pause = HZ / 10;
+
        }
        return reclaimed >= to_reclaim;
 }
 
-static int should_retry_reserve(struct btrfs_trans_handle *trans,
-                               struct btrfs_root *root,
-                               struct btrfs_block_rsv *block_rsv,
-                               u64 num_bytes, int *retries)
+/*
+ * Retries tells us how many times we've called reserve_metadata_bytes.  The
+ * idea is if this is the first call (retries == 0) then we will add to our
+ * reserved count if we can't make the allocation in order to hold our place
+ * while we go and try and free up space.  That way for retries > 1 we don't try
+ * and add space, we just check to see if the amount of unused space is >= the
+ * total space, meaning that our reservation is valid.
+ *
+ * However if we don't intend to retry this reservation, pass -1 as retries so
+ * that it short circuits this logic.
+ */
+static int reserve_metadata_bytes(struct btrfs_trans_handle *trans,
+                                 struct btrfs_root *root,
+                                 struct btrfs_block_rsv *block_rsv,
+                                 u64 orig_bytes, int flush)
 {
        struct btrfs_space_info *space_info = block_rsv->space_info;
-       int ret;
+       u64 unused;
+       u64 num_bytes = orig_bytes;
+       int retries = 0;
+       int ret = 0;
+       bool reserved = false;
+       bool committed = false;
 
-       if ((*retries) > 2)
-               return -ENOSPC;
+again:
+       ret = -ENOSPC;
+       if (reserved)
+               num_bytes = 0;
 
-       ret = maybe_allocate_chunk(trans, root, space_info, num_bytes);
-       if (ret)
-               return 1;
+       spin_lock(&space_info->lock);
+       unused = space_info->bytes_used + space_info->bytes_reserved +
+                space_info->bytes_pinned + space_info->bytes_readonly +
+                space_info->bytes_may_use;
 
-       if (trans && trans->transaction->in_commit)
-               return -ENOSPC;
+       /*
+        * The idea here is that we've not already over-reserved the block group
+        * then we can go ahead and save our reservation first and then start
+        * flushing if we need to.  Otherwise if we've already overcommitted
+        * lets start flushing stuff first and then come back and try to make
+        * our reservation.
+        */
+       if (unused <= space_info->total_bytes) {
+               unused -= space_info->total_bytes;
+               if (unused >= num_bytes) {
+                       if (!reserved)
+                               space_info->bytes_reserved += orig_bytes;
+                       ret = 0;
+               } else {
+                       /*
+                        * Ok set num_bytes to orig_bytes since we aren't
+                        * overocmmitted, this way we only try and reclaim what
+                        * we need.
+                        */
+                       num_bytes = orig_bytes;
+               }
+       } else {
+               /*
+                * Ok we're over committed, set num_bytes to the overcommitted
+                * amount plus the amount of bytes that we need for this
+                * reservation.
+                */
+               num_bytes = unused - space_info->total_bytes +
+                       (orig_bytes * (retries + 1));
+       }
 
-       ret = shrink_delalloc(trans, root, num_bytes);
-       if (ret)
-               return ret;
+       /*
+        * Couldn't make our reservation, save our place so while we're trying
+        * to reclaim space we can actually use it instead of somebody else
+        * stealing it from us.
+        */
+       if (ret && !reserved) {
+               space_info->bytes_reserved += orig_bytes;
+               reserved = true;
+       }
 
-       spin_lock(&space_info->lock);
-       if (space_info->bytes_pinned < num_bytes)
-               ret = 1;
        spin_unlock(&space_info->lock);
-       if (ret)
-               return -ENOSPC;
-
-       (*retries)++;
 
-       if (trans)
-               return -EAGAIN;
+       if (!ret)
+               return 0;
 
-       trans = btrfs_join_transaction(root, 1);
-       BUG_ON(IS_ERR(trans));
-       ret = btrfs_commit_transaction(trans, root);
-       BUG_ON(ret);
+       if (!flush)
+               goto out;
 
-       return 1;
-}
+       /*
+        * We do synchronous shrinking since we don't actually unreserve
+        * metadata until after the IO is completed.
+        */
+       ret = shrink_delalloc(trans, root, num_bytes, 1);
+       if (ret > 0)
+               return 0;
+       else if (ret < 0)
+               goto out;
 
-static int reserve_metadata_bytes(struct btrfs_block_rsv *block_rsv,
-                                 u64 num_bytes)
-{
-       struct btrfs_space_info *space_info = block_rsv->space_info;
-       u64 unused;
-       int ret = -ENOSPC;
+       /*
+        * So if we were overcommitted it's possible that somebody else flushed
+        * out enough space and we simply didn't have enough space to reclaim,
+        * so go back around and try again.
+        */
+       if (retries < 2) {
+               retries++;
+               goto again;
+       }
 
        spin_lock(&space_info->lock);
-       unused = space_info->bytes_used + space_info->bytes_reserved +
-                space_info->bytes_pinned + space_info->bytes_readonly;
+       /*
+        * Not enough space to be reclaimed, don't bother committing the
+        * transaction.
+        */
+       if (space_info->bytes_pinned < orig_bytes)
+               ret = -ENOSPC;
+       spin_unlock(&space_info->lock);
+       if (ret)
+               goto out;
 
-       if (unused < space_info->total_bytes)
-               unused = space_info->total_bytes - unused;
-       else
-               unused = 0;
+       ret = -EAGAIN;
+       if (trans || committed)
+               goto out;
 
-       if (unused >= num_bytes) {
-               if (block_rsv->priority >= 10) {
-                       space_info->bytes_reserved += num_bytes;
-                       ret = 0;
-               } else {
-                       if ((unused + block_rsv->reserved) *
-                           block_rsv->priority >=
-                           (num_bytes + block_rsv->reserved) * 10) {
-                               space_info->bytes_reserved += num_bytes;
-                               ret = 0;
-                       }
-               }
+       ret = -ENOSPC;
+       trans = btrfs_join_transaction(root, 1);
+       if (IS_ERR(trans))
+               goto out;
+       ret = btrfs_commit_transaction(trans, root);
+       if (!ret) {
+               trans = NULL;
+               committed = true;
+               goto again;
+       }
+
+out:
+       if (reserved) {
+               spin_lock(&space_info->lock);
+               space_info->bytes_reserved -= orig_bytes;
+               spin_unlock(&space_info->lock);
        }
-       spin_unlock(&space_info->lock);
 
        return ret;
 }
@@ -3327,18 +3606,14 @@ struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root)
 {
        struct btrfs_block_rsv *block_rsv;
        struct btrfs_fs_info *fs_info = root->fs_info;
-       u64 alloc_target;
 
        block_rsv = kmalloc(sizeof(*block_rsv), GFP_NOFS);
        if (!block_rsv)
                return NULL;
 
        btrfs_init_block_rsv(block_rsv);
-
-       alloc_target = btrfs_get_alloc_profile(root, 0);
        block_rsv->space_info = __find_space_info(fs_info,
                                                  BTRFS_BLOCK_GROUP_METADATA);
-
        return block_rsv;
 }
 
@@ -3369,23 +3644,19 @@ void btrfs_add_durable_block_rsv(struct btrfs_fs_info *fs_info,
 int btrfs_block_rsv_add(struct btrfs_trans_handle *trans,
                        struct btrfs_root *root,
                        struct btrfs_block_rsv *block_rsv,
-                       u64 num_bytes, int *retries)
+                       u64 num_bytes)
 {
        int ret;
 
        if (num_bytes == 0)
                return 0;
-again:
-       ret = reserve_metadata_bytes(block_rsv, num_bytes);
+
+       ret = reserve_metadata_bytes(trans, root, block_rsv, num_bytes, 1);
        if (!ret) {
                block_rsv_add_bytes(block_rsv, num_bytes, 1);
                return 0;
        }
 
-       ret = should_retry_reserve(trans, root, block_rsv, num_bytes, retries);
-       if (ret > 0)
-               goto again;
-
        return ret;
 }
 
@@ -3420,7 +3691,8 @@ int btrfs_block_rsv_check(struct btrfs_trans_handle *trans,
                return 0;
 
        if (block_rsv->refill_used) {
-               ret = reserve_metadata_bytes(block_rsv, num_bytes);
+               ret = reserve_metadata_bytes(trans, root, block_rsv,
+                                            num_bytes, 0);
                if (!ret) {
                        block_rsv_add_bytes(block_rsv, num_bytes, 0);
                        return 0;
@@ -3499,6 +3771,8 @@ static u64 calc_global_metadata_size(struct btrfs_fs_info *fs_info)
 
        sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
        spin_lock(&sinfo->lock);
+       if (sinfo->flags & BTRFS_BLOCK_GROUP_DATA)
+               data_used = 0;
        meta_used = sinfo->bytes_used;
        spin_unlock(&sinfo->lock);
 
@@ -3526,7 +3800,8 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
        block_rsv->size = num_bytes;
 
        num_bytes = sinfo->bytes_used + sinfo->bytes_pinned +
-                   sinfo->bytes_reserved + sinfo->bytes_readonly;
+                   sinfo->bytes_reserved + sinfo->bytes_readonly +
+                   sinfo->bytes_may_use;
 
        if (sinfo->total_bytes > num_bytes) {
                num_bytes = sinfo->total_bytes - num_bytes;
@@ -3597,7 +3872,7 @@ static u64 calc_trans_metadata_size(struct btrfs_root *root, int num_items)
 
 int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans,
                                 struct btrfs_root *root,
-                                int num_items, int *retries)
+                                int num_items)
 {
        u64 num_bytes;
        int ret;
@@ -3607,7 +3882,7 @@ int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans,
 
        num_bytes = calc_trans_metadata_size(root, num_items);
        ret = btrfs_block_rsv_add(trans, root, &root->fs_info->trans_block_rsv,
-                                 num_bytes, retries);
+                                 num_bytes);
        if (!ret) {
                trans->bytes_reserved += num_bytes;
                trans->block_rsv = &root->fs_info->trans_block_rsv;
@@ -3681,14 +3956,13 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
        struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv;
        u64 to_reserve;
        int nr_extents;
-       int retries = 0;
        int ret;
 
        if (btrfs_transaction_in_commit(root->fs_info))
                schedule_timeout(1);
 
        num_bytes = ALIGN(num_bytes, root->sectorsize);
-again:
+
        spin_lock(&BTRFS_I(inode)->accounting_lock);
        nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents) + 1;
        if (nr_extents > BTRFS_I(inode)->reserved_extents) {
@@ -3698,18 +3972,14 @@ again:
                nr_extents = 0;
                to_reserve = 0;
        }
+       spin_unlock(&BTRFS_I(inode)->accounting_lock);
 
        to_reserve += calc_csum_metadata_size(inode, num_bytes);
-       ret = reserve_metadata_bytes(block_rsv, to_reserve);
-       if (ret) {
-               spin_unlock(&BTRFS_I(inode)->accounting_lock);
-               ret = should_retry_reserve(NULL, root, block_rsv, to_reserve,
-                                          &retries);
-               if (ret > 0)
-                       goto again;
+       ret = reserve_metadata_bytes(NULL, root, block_rsv, to_reserve, 1);
+       if (ret)
                return ret;
-       }
 
+       spin_lock(&BTRFS_I(inode)->accounting_lock);
        BTRFS_I(inode)->reserved_extents += nr_extents;
        atomic_inc(&BTRFS_I(inode)->outstanding_extents);
        spin_unlock(&BTRFS_I(inode)->accounting_lock);
@@ -3717,7 +3987,7 @@ again:
        block_rsv_add_bytes(block_rsv, to_reserve, 1);
 
        if (block_rsv->size > 512 * 1024 * 1024)
-               shrink_delalloc(NULL, root, to_reserve);
+               shrink_delalloc(NULL, root, to_reserve, 0);
 
        return 0;
 }
@@ -3776,12 +4046,12 @@ static int update_block_group(struct btrfs_trans_handle *trans,
                              struct btrfs_root *root,
                              u64 bytenr, u64 num_bytes, int alloc)
 {
-       struct btrfs_block_group_cache *cache;
+       struct btrfs_block_group_cache *cache = NULL;
        struct btrfs_fs_info *info = root->fs_info;
-       int factor;
        u64 total = num_bytes;
        u64 old_val;
        u64 byte_in_group;
+       int factor;
 
        /* block accounting for super block */
        spin_lock(&info->delalloc_lock);
@@ -3803,11 +4073,25 @@ static int update_block_group(struct btrfs_trans_handle *trans,
                        factor = 2;
                else
                        factor = 1;
+               /*
+                * If this block group has free space cache written out, we
+                * need to make sure to load it if we are removing space.  This
+                * is because we need the unpinning stage to actually add the
+                * space back to the block group, otherwise we will leak space.
+                */
+               if (!alloc && cache->cached == BTRFS_CACHE_NO)
+                       cache_block_group(cache, trans, 1);
+
                byte_in_group = bytenr - cache->key.objectid;
                WARN_ON(byte_in_group > cache->key.offset);
 
                spin_lock(&cache->space_info->lock);
                spin_lock(&cache->lock);
+
+               if (btrfs_super_cache_generation(&info->super_copy) != 0 &&
+                   cache->disk_cache_state < BTRFS_DC_CLEAR)
+                       cache->disk_cache_state = BTRFS_DC_CLEAR;
+
                cache->dirty = 1;
                old_val = btrfs_block_group_used(&cache->item);
                num_bytes = min(total, cache->key.offset - byte_in_group);
@@ -4554,6 +4838,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
        bool found_uncached_bg = false;
        bool failed_cluster_refill = false;
        bool failed_alloc = false;
+       bool use_cluster = true;
        u64 ideal_cache_percent = 0;
        u64 ideal_cache_offset = 0;
 
@@ -4568,16 +4853,24 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
                return -ENOSPC;
        }
 
+       /*
+        * If the space info is for both data and metadata it means we have a
+        * small filesystem and we can't use the clustering stuff.
+        */
+       if (btrfs_mixed_space_info(space_info))
+               use_cluster = false;
+
        if (orig_root->ref_cows || empty_size)
                allowed_chunk_alloc = 1;
 
-       if (data & BTRFS_BLOCK_GROUP_METADATA) {
+       if (data & BTRFS_BLOCK_GROUP_METADATA && use_cluster) {
                last_ptr = &root->fs_info->meta_alloc_cluster;
                if (!btrfs_test_opt(root, SSD))
                        empty_cluster = 64 * 1024;
        }
 
-       if ((data & BTRFS_BLOCK_GROUP_DATA) && btrfs_test_opt(root, SSD)) {
+       if ((data & BTRFS_BLOCK_GROUP_DATA) && use_cluster &&
+           btrfs_test_opt(root, SSD)) {
                last_ptr = &root->fs_info->data_alloc_cluster;
        }
 
@@ -4641,6 +4934,10 @@ have_block_group:
                if (unlikely(block_group->cached == BTRFS_CACHE_NO)) {
                        u64 free_percent;
 
+                       ret = cache_block_group(block_group, trans, 1);
+                       if (block_group->cached == BTRFS_CACHE_FINISHED)
+                               goto have_block_group;
+
                        free_percent = btrfs_block_group_used(&block_group->item);
                        free_percent *= 100;
                        free_percent = div64_u64(free_percent,
@@ -4661,7 +4958,7 @@ have_block_group:
                        if (loop > LOOP_CACHING_NOWAIT ||
                            (loop > LOOP_FIND_IDEAL &&
                             atomic_read(&space_info->caching_threads) < 2)) {
-                               ret = cache_block_group(block_group);
+                               ret = cache_block_group(block_group, trans, 0);
                                BUG_ON(ret);
                        }
                        found_uncached_bg = true;
@@ -5218,7 +5515,7 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
        u64 num_bytes = ins->offset;
 
        block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid);
-       cache_block_group(block_group);
+       cache_block_group(block_group, trans, 0);
        caching_ctl = get_caching_control(block_group);
 
        if (!caching_ctl) {
@@ -5308,7 +5605,8 @@ use_block_rsv(struct btrfs_trans_handle *trans,
        block_rsv = get_block_rsv(trans, root);
 
        if (block_rsv->size == 0) {
-               ret = reserve_metadata_bytes(block_rsv, blocksize);
+               ret = reserve_metadata_bytes(trans, root, block_rsv,
+                                            blocksize, 0);
                if (ret)
                        return ERR_PTR(ret);
                return block_rsv;
@@ -5318,11 +5616,6 @@ use_block_rsv(struct btrfs_trans_handle *trans,
        if (!ret)
                return block_rsv;
 
-       WARN_ON(1);
-       printk(KERN_INFO"block_rsv size %llu reserved %llu freed %llu %llu\n",
-               block_rsv->size, block_rsv->reserved,
-               block_rsv->freed[0], block_rsv->freed[1]);
-
        return ERR_PTR(-ENOSPC);
 }
 
@@ -5421,7 +5714,6 @@ static noinline void reada_walk_down(struct btrfs_trans_handle *trans,
        u64 generation;
        u64 refs;
        u64 flags;
-       u64 last = 0;
        u32 nritems;
        u32 blocksize;
        struct btrfs_key key;
@@ -5489,7 +5781,6 @@ reada:
                                           generation);
                if (ret)
                        break;
-               last = bytenr + blocksize;
                nread++;
        }
        wc->reada_slot = slot;
@@ -7813,6 +8104,40 @@ out:
        return ret;
 }
 
+void btrfs_put_block_group_cache(struct btrfs_fs_info *info)
+{
+       struct btrfs_block_group_cache *block_group;
+       u64 last = 0;
+
+       while (1) {
+               struct inode *inode;
+
+               block_group = btrfs_lookup_first_block_group(info, last);
+               while (block_group) {
+                       spin_lock(&block_group->lock);
+                       if (block_group->iref)
+                               break;
+                       spin_unlock(&block_group->lock);
+                       block_group = next_block_group(info->tree_root,
+                                                      block_group);
+               }
+               if (!block_group) {
+                       if (last == 0)
+                               break;
+                       last = 0;
+                       continue;
+               }
+
+               inode = block_group->inode;
+               block_group->iref = 0;
+               block_group->inode = NULL;
+               spin_unlock(&block_group->lock);
+               iput(inode);
+               last = block_group->key.objectid + block_group->key.offset;
+               btrfs_put_block_group(block_group);
+       }
+}
+
 int btrfs_free_block_groups(struct btrfs_fs_info *info)
 {
        struct btrfs_block_group_cache *block_group;
@@ -7896,6 +8221,8 @@ int btrfs_read_block_groups(struct btrfs_root *root)
        struct btrfs_key key;
        struct btrfs_key found_key;
        struct extent_buffer *leaf;
+       int need_clear = 0;
+       u64 cache_gen;
 
        root = info->extent_root;
        key.objectid = 0;
@@ -7905,6 +8232,15 @@ int btrfs_read_block_groups(struct btrfs_root *root)
        if (!path)
                return -ENOMEM;
 
+       cache_gen = btrfs_super_cache_generation(&root->fs_info->super_copy);
+       if (cache_gen != 0 &&
+           btrfs_super_generation(&root->fs_info->super_copy) != cache_gen)
+               need_clear = 1;
+       if (btrfs_test_opt(root, CLEAR_CACHE))
+               need_clear = 1;
+       if (!btrfs_test_opt(root, SPACE_CACHE) && cache_gen)
+               printk(KERN_INFO "btrfs: disk space caching is enabled\n");
+
        while (1) {
                ret = find_first_block_group(root, path, &key);
                if (ret > 0)
@@ -7927,6 +8263,9 @@ int btrfs_read_block_groups(struct btrfs_root *root)
                INIT_LIST_HEAD(&cache->list);
                INIT_LIST_HEAD(&cache->cluster_list);
 
+               if (need_clear)
+                       cache->disk_cache_state = BTRFS_DC_CLEAR;
+
                /*
                 * we only want to have 32k of ram per block group for keeping
                 * track of free space, and if we pass 1/2 of that we want to
@@ -8031,6 +8370,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
        cache->key.offset = size;
        cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
        cache->sectorsize = root->sectorsize;
+       cache->fs_info = root->fs_info;
 
        /*
         * we only want to have 32k of ram per block group for keeping track
@@ -8087,8 +8427,11 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
        struct btrfs_path *path;
        struct btrfs_block_group_cache *block_group;
        struct btrfs_free_cluster *cluster;
+       struct btrfs_root *tree_root = root->fs_info->tree_root;
        struct btrfs_key key;
+       struct inode *inode;
        int ret;
+       int factor;
 
        root = root->fs_info->extent_root;
 
@@ -8097,6 +8440,12 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
        BUG_ON(!block_group->ro);
 
        memcpy(&key, &block_group->key, sizeof(key));
+       if (block_group->flags & (BTRFS_BLOCK_GROUP_DUP |
+                                 BTRFS_BLOCK_GROUP_RAID1 |
+                                 BTRFS_BLOCK_GROUP_RAID10))
+               factor = 2;
+       else
+               factor = 1;
 
        /* make sure this block group isn't part of an allocation cluster */
        cluster = &root->fs_info->data_alloc_cluster;
@@ -8116,6 +8465,40 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
        path = btrfs_alloc_path();
        BUG_ON(!path);
 
+       inode = lookup_free_space_inode(root, block_group, path);
+       if (!IS_ERR(inode)) {
+               btrfs_orphan_add(trans, inode);
+               clear_nlink(inode);
+               /* One for the block groups ref */
+               spin_lock(&block_group->lock);
+               if (block_group->iref) {
+                       block_group->iref = 0;
+                       block_group->inode = NULL;
+                       spin_unlock(&block_group->lock);
+                       iput(inode);
+               } else {
+                       spin_unlock(&block_group->lock);
+               }
+               /* One for our lookup ref */
+               iput(inode);
+       }
+
+       key.objectid = BTRFS_FREE_SPACE_OBJECTID;
+       key.offset = block_group->key.objectid;
+       key.type = 0;
+
+       ret = btrfs_search_slot(trans, tree_root, &key, path, -1, 1);
+       if (ret < 0)
+               goto out;
+       if (ret > 0)
+               btrfs_release_path(tree_root, path);
+       if (ret == 0) {
+               ret = btrfs_del_item(trans, tree_root, path);
+               if (ret)
+                       goto out;
+               btrfs_release_path(tree_root, path);
+       }
+
        spin_lock(&root->fs_info->block_group_cache_lock);
        rb_erase(&block_group->cache_node,
                 &root->fs_info->block_group_cache_tree);
@@ -8137,8 +8520,11 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
        spin_lock(&block_group->space_info->lock);
        block_group->space_info->total_bytes -= block_group->key.offset;
        block_group->space_info->bytes_readonly -= block_group->key.offset;
+       block_group->space_info->disk_total -= block_group->key.offset * factor;
        spin_unlock(&block_group->space_info->lock);
 
+       memcpy(&key, &block_group->key, sizeof(key));
+
        btrfs_clear_space_info_full(root->fs_info);
 
        btrfs_put_block_group(block_group);
index d74e6af9b53a598a26bf83357f1bed61d2103ef2..eac10e3260a982e24149957981c3f2a7be792d09 100644 (file)
@@ -104,7 +104,7 @@ void extent_io_tree_init(struct extent_io_tree *tree,
                          struct address_space *mapping, gfp_t mask)
 {
        tree->state = RB_ROOT;
-       tree->buffer = RB_ROOT;
+       INIT_RADIX_TREE(&tree->buffer, GFP_ATOMIC);
        tree->ops = NULL;
        tree->dirty_bytes = 0;
        spin_lock_init(&tree->lock);
@@ -235,50 +235,6 @@ static inline struct rb_node *tree_search(struct extent_io_tree *tree,
        return ret;
 }
 
-static struct extent_buffer *buffer_tree_insert(struct extent_io_tree *tree,
-                                         u64 offset, struct rb_node *node)
-{
-       struct rb_root *root = &tree->buffer;
-       struct rb_node **p = &root->rb_node;
-       struct rb_node *parent = NULL;
-       struct extent_buffer *eb;
-
-       while (*p) {
-               parent = *p;
-               eb = rb_entry(parent, struct extent_buffer, rb_node);
-
-               if (offset < eb->start)
-                       p = &(*p)->rb_left;
-               else if (offset > eb->start)
-                       p = &(*p)->rb_right;
-               else
-                       return eb;
-       }
-
-       rb_link_node(node, parent, p);
-       rb_insert_color(node, root);
-       return NULL;
-}
-
-static struct extent_buffer *buffer_search(struct extent_io_tree *tree,
-                                          u64 offset)
-{
-       struct rb_root *root = &tree->buffer;
-       struct rb_node *n = root->rb_node;
-       struct extent_buffer *eb;
-
-       while (n) {
-               eb = rb_entry(n, struct extent_buffer, rb_node);
-               if (offset < eb->start)
-                       n = n->rb_left;
-               else if (offset > eb->start)
-                       n = n->rb_right;
-               else
-                       return eb;
-       }
-       return NULL;
-}
-
 static void merge_cb(struct extent_io_tree *tree, struct extent_state *new,
                     struct extent_state *other)
 {
@@ -1901,10 +1857,8 @@ static int submit_one_bio(int rw, struct bio *bio, int mirror_num,
        struct page *page = bvec->bv_page;
        struct extent_io_tree *tree = bio->bi_private;
        u64 start;
-       u64 end;
 
        start = ((u64)page->index << PAGE_CACHE_SHIFT) + bvec->bv_offset;
-       end = start + bvec->bv_len - 1;
 
        bio->bi_private = NULL;
 
@@ -2204,7 +2158,6 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
        u64 last_byte = i_size_read(inode);
        u64 block_start;
        u64 iosize;
-       u64 unlock_start;
        sector_t sector;
        struct extent_state *cached_state = NULL;
        struct extent_map *em;
@@ -2329,7 +2282,6 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
                if (tree->ops && tree->ops->writepage_end_io_hook)
                        tree->ops->writepage_end_io_hook(page, start,
                                                         page_end, NULL, 1);
-               unlock_start = page_end + 1;
                goto done;
        }
 
@@ -2340,7 +2292,6 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
                        if (tree->ops && tree->ops->writepage_end_io_hook)
                                tree->ops->writepage_end_io_hook(page, cur,
                                                         page_end, NULL, 1);
-                       unlock_start = page_end + 1;
                        break;
                }
                em = epd->get_extent(inode, page, pg_offset, cur,
@@ -2387,7 +2338,6 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
 
                        cur += iosize;
                        pg_offset += iosize;
-                       unlock_start = cur;
                        continue;
                }
                /* leave this out until we have a page_mkwrite call */
@@ -2473,7 +2423,6 @@ static int extent_write_cache_pages(struct extent_io_tree *tree,
        pgoff_t index;
        pgoff_t end;            /* Inclusive */
        int scanned = 0;
-       int range_whole = 0;
 
        pagevec_init(&pvec, 0);
        if (wbc->range_cyclic) {
@@ -2482,8 +2431,6 @@ static int extent_write_cache_pages(struct extent_io_tree *tree,
        } else {
                index = wbc->range_start >> PAGE_CACHE_SHIFT;
                end = wbc->range_end >> PAGE_CACHE_SHIFT;
-               if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
-                       range_whole = 1;
                scanned = 1;
        }
 retry:
@@ -2823,6 +2770,8 @@ int extent_prepare_write(struct extent_io_tree *tree,
                                         NULL, 1,
                                         end_bio_extent_preparewrite, 0,
                                         0, 0);
+                       if (ret && !err)
+                               err = ret;
                        iocount++;
                        block_start = block_start + iosize;
                } else {
@@ -3104,6 +3053,39 @@ static void __free_extent_buffer(struct extent_buffer *eb)
        kmem_cache_free(extent_buffer_cache, eb);
 }
 
+/*
+ * Helper for releasing extent buffer page.
+ */
+static void btrfs_release_extent_buffer_page(struct extent_buffer *eb,
+                                               unsigned long start_idx)
+{
+       unsigned long index;
+       struct page *page;
+
+       if (!eb->first_page)
+               return;
+
+       index = num_extent_pages(eb->start, eb->len);
+       if (start_idx >= index)
+               return;
+
+       do {
+               index--;
+               page = extent_buffer_page(eb, index);
+               if (page)
+                       page_cache_release(page);
+       } while (index != start_idx);
+}
+
+/*
+ * Helper for releasing the extent buffer.
+ */
+static inline void btrfs_release_extent_buffer(struct extent_buffer *eb)
+{
+       btrfs_release_extent_buffer_page(eb, 0);
+       __free_extent_buffer(eb);
+}
+
 struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
                                          u64 start, unsigned long len,
                                          struct page *page0,
@@ -3117,16 +3099,16 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
        struct page *p;
        struct address_space *mapping = tree->mapping;
        int uptodate = 1;
+       int ret;
 
-       spin_lock(&tree->buffer_lock);
-       eb = buffer_search(tree, start);
-       if (eb) {
-               atomic_inc(&eb->refs);
-               spin_unlock(&tree->buffer_lock);
+       rcu_read_lock();
+       eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT);
+       if (eb && atomic_inc_not_zero(&eb->refs)) {
+               rcu_read_unlock();
                mark_page_accessed(eb->first_page);
                return eb;
        }
-       spin_unlock(&tree->buffer_lock);
+       rcu_read_unlock();
 
        eb = __alloc_extent_buffer(tree, start, len, mask);
        if (!eb)
@@ -3165,26 +3147,31 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
        if (uptodate)
                set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
 
+       ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM);
+       if (ret)
+               goto free_eb;
+
        spin_lock(&tree->buffer_lock);
-       exists = buffer_tree_insert(tree, start, &eb->rb_node);
-       if (exists) {
+       ret = radix_tree_insert(&tree->buffer, start >> PAGE_CACHE_SHIFT, eb);
+       if (ret == -EEXIST) {
+               exists = radix_tree_lookup(&tree->buffer,
+                                               start >> PAGE_CACHE_SHIFT);
                /* add one reference for the caller */
                atomic_inc(&exists->refs);
                spin_unlock(&tree->buffer_lock);
+               radix_tree_preload_end();
                goto free_eb;
        }
        /* add one reference for the tree */
        atomic_inc(&eb->refs);
        spin_unlock(&tree->buffer_lock);
+       radix_tree_preload_end();
        return eb;
 
 free_eb:
        if (!atomic_dec_and_test(&eb->refs))
                return exists;
-       for (index = 1; index < i; index++)
-               page_cache_release(extent_buffer_page(eb, index));
-       page_cache_release(extent_buffer_page(eb, 0));
-       __free_extent_buffer(eb);
+       btrfs_release_extent_buffer(eb);
        return exists;
 }
 
@@ -3194,16 +3181,16 @@ struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree,
 {
        struct extent_buffer *eb;
 
-       spin_lock(&tree->buffer_lock);
-       eb = buffer_search(tree, start);
-       if (eb)
-               atomic_inc(&eb->refs);
-       spin_unlock(&tree->buffer_lock);
-
-       if (eb)
+       rcu_read_lock();
+       eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT);
+       if (eb && atomic_inc_not_zero(&eb->refs)) {
+               rcu_read_unlock();
                mark_page_accessed(eb->first_page);
+               return eb;
+       }
+       rcu_read_unlock();
 
-       return eb;
+       return NULL;
 }
 
 void free_extent_buffer(struct extent_buffer *eb)
@@ -3833,34 +3820,45 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
        }
 }
 
+static inline void btrfs_release_extent_buffer_rcu(struct rcu_head *head)
+{
+       struct extent_buffer *eb =
+                       container_of(head, struct extent_buffer, rcu_head);
+
+       btrfs_release_extent_buffer(eb);
+}
+
 int try_release_extent_buffer(struct extent_io_tree *tree, struct page *page)
 {
        u64 start = page_offset(page);
        struct extent_buffer *eb;
        int ret = 1;
-       unsigned long i;
-       unsigned long num_pages;
 
        spin_lock(&tree->buffer_lock);
-       eb = buffer_search(tree, start);
+       eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT);
        if (!eb)
                goto out;
 
-       if (atomic_read(&eb->refs) > 1) {
+       if (test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) {
                ret = 0;
                goto out;
        }
-       if (test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) {
+
+       /*
+        * set @eb->refs to 0 if it is already 1, and then release the @eb.
+        * Or go back.
+        */
+       if (atomic_cmpxchg(&eb->refs, 1, 0) != 1) {
                ret = 0;
                goto out;
        }
-       /* at this point we can safely release the extent buffer */
-       num_pages = num_extent_pages(eb->start, eb->len);
-       for (i = 0; i < num_pages; i++)
-               page_cache_release(extent_buffer_page(eb, i));
-       rb_erase(&eb->rb_node, &tree->buffer);
-       __free_extent_buffer(eb);
+
+       radix_tree_delete(&tree->buffer, start >> PAGE_CACHE_SHIFT);
 out:
        spin_unlock(&tree->buffer_lock);
+
+       /* at this point we can safely release the extent buffer */
+       if (atomic_read(&eb->refs) == 0)
+               call_rcu(&eb->rcu_head, btrfs_release_extent_buffer_rcu);
        return ret;
 }
index 5691c7b590dae86844ccbeccecdb5d8a2b3e2554..1c6d4f342ef7e8089ac6f17e3e6ed40e83722e45 100644 (file)
@@ -85,7 +85,7 @@ struct extent_io_ops {
 
 struct extent_io_tree {
        struct rb_root state;
-       struct rb_root buffer;
+       struct radix_tree_root buffer;
        struct address_space *mapping;
        u64 dirty_bytes;
        spinlock_t lock;
@@ -123,7 +123,7 @@ struct extent_buffer {
        unsigned long bflags;
        atomic_t refs;
        struct list_head leak_list;
-       struct rb_node rb_node;
+       struct rcu_head rcu_head;
 
        /* the spinlock is used to protect most operations */
        spinlock_t lock;
index 454ca52d6451b649b78b48cba908f3fd1eb2bada..23cb8da3ff6633dd83ba0b4cb435e8f742ad0632 100644 (file)
@@ -335,7 +335,7 @@ struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree,
                goto out;
        }
        if (IS_ERR(rb_node)) {
-               em = ERR_PTR(PTR_ERR(rb_node));
+               em = ERR_CAST(rb_node);
                goto out;
        }
        em = rb_entry(rb_node, struct extent_map, rb_node);
@@ -384,7 +384,7 @@ struct extent_map *search_extent_mapping(struct extent_map_tree *tree,
                goto out;
        }
        if (IS_ERR(rb_node)) {
-               em = ERR_PTR(PTR_ERR(rb_node));
+               em = ERR_CAST(rb_node);
                goto out;
        }
        em = rb_entry(rb_node, struct extent_map, rb_node);
index f488fac04d99ea45eea93607bbf17c021b5b2207..22ee0dc2e6b8a712900c68a8c44df4a453fb86f0 100644 (file)
 #include "ctree.h"
 #include "free-space-cache.h"
 #include "transaction.h"
+#include "disk-io.h"
 
 #define BITS_PER_BITMAP                (PAGE_CACHE_SIZE * 8)
 #define MAX_CACHE_BYTES_PER_GIG        (32 * 1024)
 
+static void recalculate_thresholds(struct btrfs_block_group_cache
+                                  *block_group);
+static int link_free_space(struct btrfs_block_group_cache *block_group,
+                          struct btrfs_free_space *info);
+
+struct inode *lookup_free_space_inode(struct btrfs_root *root,
+                                     struct btrfs_block_group_cache
+                                     *block_group, struct btrfs_path *path)
+{
+       struct btrfs_key key;
+       struct btrfs_key location;
+       struct btrfs_disk_key disk_key;
+       struct btrfs_free_space_header *header;
+       struct extent_buffer *leaf;
+       struct inode *inode = NULL;
+       int ret;
+
+       spin_lock(&block_group->lock);
+       if (block_group->inode)
+               inode = igrab(block_group->inode);
+       spin_unlock(&block_group->lock);
+       if (inode)
+               return inode;
+
+       key.objectid = BTRFS_FREE_SPACE_OBJECTID;
+       key.offset = block_group->key.objectid;
+       key.type = 0;
+
+       ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+       if (ret < 0)
+               return ERR_PTR(ret);
+       if (ret > 0) {
+               btrfs_release_path(root, path);
+               return ERR_PTR(-ENOENT);
+       }
+
+       leaf = path->nodes[0];
+       header = btrfs_item_ptr(leaf, path->slots[0],
+                               struct btrfs_free_space_header);
+       btrfs_free_space_key(leaf, header, &disk_key);
+       btrfs_disk_key_to_cpu(&location, &disk_key);
+       btrfs_release_path(root, path);
+
+       inode = btrfs_iget(root->fs_info->sb, &location, root, NULL);
+       if (!inode)
+               return ERR_PTR(-ENOENT);
+       if (IS_ERR(inode))
+               return inode;
+       if (is_bad_inode(inode)) {
+               iput(inode);
+               return ERR_PTR(-ENOENT);
+       }
+
+       spin_lock(&block_group->lock);
+       if (!root->fs_info->closing) {
+               block_group->inode = igrab(inode);
+               block_group->iref = 1;
+       }
+       spin_unlock(&block_group->lock);
+
+       return inode;
+}
+
+int create_free_space_inode(struct btrfs_root *root,
+                           struct btrfs_trans_handle *trans,
+                           struct btrfs_block_group_cache *block_group,
+                           struct btrfs_path *path)
+{
+       struct btrfs_key key;
+       struct btrfs_disk_key disk_key;
+       struct btrfs_free_space_header *header;
+       struct btrfs_inode_item *inode_item;
+       struct extent_buffer *leaf;
+       u64 objectid;
+       int ret;
+
+       ret = btrfs_find_free_objectid(trans, root, 0, &objectid);
+       if (ret < 0)
+               return ret;
+
+       ret = btrfs_insert_empty_inode(trans, root, path, objectid);
+       if (ret)
+               return ret;
+
+       leaf = path->nodes[0];
+       inode_item = btrfs_item_ptr(leaf, path->slots[0],
+                                   struct btrfs_inode_item);
+       btrfs_item_key(leaf, &disk_key, path->slots[0]);
+       memset_extent_buffer(leaf, 0, (unsigned long)inode_item,
+                            sizeof(*inode_item));
+       btrfs_set_inode_generation(leaf, inode_item, trans->transid);
+       btrfs_set_inode_size(leaf, inode_item, 0);
+       btrfs_set_inode_nbytes(leaf, inode_item, 0);
+       btrfs_set_inode_uid(leaf, inode_item, 0);
+       btrfs_set_inode_gid(leaf, inode_item, 0);
+       btrfs_set_inode_mode(leaf, inode_item, S_IFREG | 0600);
+       btrfs_set_inode_flags(leaf, inode_item, BTRFS_INODE_NOCOMPRESS |
+                             BTRFS_INODE_PREALLOC | BTRFS_INODE_NODATASUM);
+       btrfs_set_inode_nlink(leaf, inode_item, 1);
+       btrfs_set_inode_transid(leaf, inode_item, trans->transid);
+       btrfs_set_inode_block_group(leaf, inode_item,
+                                   block_group->key.objectid);
+       btrfs_mark_buffer_dirty(leaf);
+       btrfs_release_path(root, path);
+
+       key.objectid = BTRFS_FREE_SPACE_OBJECTID;
+       key.offset = block_group->key.objectid;
+       key.type = 0;
+
+       ret = btrfs_insert_empty_item(trans, root, path, &key,
+                                     sizeof(struct btrfs_free_space_header));
+       if (ret < 0) {
+               btrfs_release_path(root, path);
+               return ret;
+       }
+       leaf = path->nodes[0];
+       header = btrfs_item_ptr(leaf, path->slots[0],
+                               struct btrfs_free_space_header);
+       memset_extent_buffer(leaf, 0, (unsigned long)header, sizeof(*header));
+       btrfs_set_free_space_key(leaf, header, &disk_key);
+       btrfs_mark_buffer_dirty(leaf);
+       btrfs_release_path(root, path);
+
+       return 0;
+}
+
+int btrfs_truncate_free_space_cache(struct btrfs_root *root,
+                                   struct btrfs_trans_handle *trans,
+                                   struct btrfs_path *path,
+                                   struct inode *inode)
+{
+       loff_t oldsize;
+       int ret = 0;
+
+       trans->block_rsv = root->orphan_block_rsv;
+       ret = btrfs_block_rsv_check(trans, root,
+                                   root->orphan_block_rsv,
+                                   0, 5);
+       if (ret)
+               return ret;
+
+       oldsize = i_size_read(inode);
+       btrfs_i_size_write(inode, 0);
+       truncate_pagecache(inode, oldsize, 0);
+
+       /*
+        * We don't need an orphan item because truncating the free space cache
+        * will never be split across transactions.
+        */
+       ret = btrfs_truncate_inode_items(trans, root, inode,
+                                        0, BTRFS_EXTENT_DATA_KEY);
+       if (ret) {
+               WARN_ON(1);
+               return ret;
+       }
+
+       return btrfs_update_inode(trans, root, inode);
+}
+
+static int readahead_cache(struct inode *inode)
+{
+       struct file_ra_state *ra;
+       unsigned long last_index;
+
+       ra = kzalloc(sizeof(*ra), GFP_NOFS);
+       if (!ra)
+               return -ENOMEM;
+
+       file_ra_state_init(ra, inode->i_mapping);
+       last_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT;
+
+       page_cache_sync_readahead(inode->i_mapping, ra, NULL, 0, last_index);
+
+       kfree(ra);
+
+       return 0;
+}
+
+int load_free_space_cache(struct btrfs_fs_info *fs_info,
+                         struct btrfs_block_group_cache *block_group)
+{
+       struct btrfs_root *root = fs_info->tree_root;
+       struct inode *inode;
+       struct btrfs_free_space_header *header;
+       struct extent_buffer *leaf;
+       struct page *page;
+       struct btrfs_path *path;
+       u32 *checksums = NULL, *crc;
+       char *disk_crcs = NULL;
+       struct btrfs_key key;
+       struct list_head bitmaps;
+       u64 num_entries;
+       u64 num_bitmaps;
+       u64 generation;
+       u32 cur_crc = ~(u32)0;
+       pgoff_t index = 0;
+       unsigned long first_page_offset;
+       int num_checksums;
+       int ret = 0;
+
+       /*
+        * If we're unmounting then just return, since this does a search on the
+        * normal root and not the commit root and we could deadlock.
+        */
+       smp_mb();
+       if (fs_info->closing)
+               return 0;
+
+       /*
+        * If this block group has been marked to be cleared for one reason or
+        * another then we can't trust the on disk cache, so just return.
+        */
+       spin_lock(&block_group->lock);
+       if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) {
+               spin_unlock(&block_group->lock);
+               return 0;
+       }
+       spin_unlock(&block_group->lock);
+
+       INIT_LIST_HEAD(&bitmaps);
+
+       path = btrfs_alloc_path();
+       if (!path)
+               return 0;
+
+       inode = lookup_free_space_inode(root, block_group, path);
+       if (IS_ERR(inode)) {
+               btrfs_free_path(path);
+               return 0;
+       }
+
+       /* Nothing in the space cache, goodbye */
+       if (!i_size_read(inode)) {
+               btrfs_free_path(path);
+               goto out;
+       }
+
+       key.objectid = BTRFS_FREE_SPACE_OBJECTID;
+       key.offset = block_group->key.objectid;
+       key.type = 0;
+
+       ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+       if (ret) {
+               btrfs_free_path(path);
+               goto out;
+       }
+
+       leaf = path->nodes[0];
+       header = btrfs_item_ptr(leaf, path->slots[0],
+                               struct btrfs_free_space_header);
+       num_entries = btrfs_free_space_entries(leaf, header);
+       num_bitmaps = btrfs_free_space_bitmaps(leaf, header);
+       generation = btrfs_free_space_generation(leaf, header);
+       btrfs_free_path(path);
+
+       if (BTRFS_I(inode)->generation != generation) {
+               printk(KERN_ERR "btrfs: free space inode generation (%llu) did"
+                      " not match free space cache generation (%llu) for "
+                      "block group %llu\n",
+                      (unsigned long long)BTRFS_I(inode)->generation,
+                      (unsigned long long)generation,
+                      (unsigned long long)block_group->key.objectid);
+               goto out;
+       }
+
+       if (!num_entries)
+               goto out;
+
+       /* Setup everything for doing checksumming */
+       num_checksums = i_size_read(inode) / PAGE_CACHE_SIZE;
+       checksums = crc = kzalloc(sizeof(u32) * num_checksums, GFP_NOFS);
+       if (!checksums)
+               goto out;
+       first_page_offset = (sizeof(u32) * num_checksums) + sizeof(u64);
+       disk_crcs = kzalloc(first_page_offset, GFP_NOFS);
+       if (!disk_crcs)
+               goto out;
+
+       ret = readahead_cache(inode);
+       if (ret) {
+               ret = 0;
+               goto out;
+       }
+
+       while (1) {
+               struct btrfs_free_space_entry *entry;
+               struct btrfs_free_space *e;
+               void *addr;
+               unsigned long offset = 0;
+               unsigned long start_offset = 0;
+               int need_loop = 0;
+
+               if (!num_entries && !num_bitmaps)
+                       break;
+
+               if (index == 0) {
+                       start_offset = first_page_offset;
+                       offset = start_offset;
+               }
+
+               page = grab_cache_page(inode->i_mapping, index);
+               if (!page) {
+                       ret = 0;
+                       goto free_cache;
+               }
+
+               if (!PageUptodate(page)) {
+                       btrfs_readpage(NULL, page);
+                       lock_page(page);
+                       if (!PageUptodate(page)) {
+                               unlock_page(page);
+                               page_cache_release(page);
+                               printk(KERN_ERR "btrfs: error reading free "
+                                      "space cache: %llu\n",
+                                      (unsigned long long)
+                                      block_group->key.objectid);
+                               goto free_cache;
+                       }
+               }
+               addr = kmap(page);
+
+               if (index == 0) {
+                       u64 *gen;
+
+                       memcpy(disk_crcs, addr, first_page_offset);
+                       gen = addr + (sizeof(u32) * num_checksums);
+                       if (*gen != BTRFS_I(inode)->generation) {
+                               printk(KERN_ERR "btrfs: space cache generation"
+                                      " (%llu) does not match inode (%llu) "
+                                      "for block group %llu\n",
+                                      (unsigned long long)*gen,
+                                      (unsigned long long)
+                                      BTRFS_I(inode)->generation,
+                                      (unsigned long long)
+                                      block_group->key.objectid);
+                               kunmap(page);
+                               unlock_page(page);
+                               page_cache_release(page);
+                               goto free_cache;
+                       }
+                       crc = (u32 *)disk_crcs;
+               }
+               entry = addr + start_offset;
+
+               /* First lets check our crc before we do anything fun */
+               cur_crc = ~(u32)0;
+               cur_crc = btrfs_csum_data(root, addr + start_offset, cur_crc,
+                                         PAGE_CACHE_SIZE - start_offset);
+               btrfs_csum_final(cur_crc, (char *)&cur_crc);
+               if (cur_crc != *crc) {
+                       printk(KERN_ERR "btrfs: crc mismatch for page %lu in "
+                              "block group %llu\n", index,
+                              (unsigned long long)block_group->key.objectid);
+                       kunmap(page);
+                       unlock_page(page);
+                       page_cache_release(page);
+                       goto free_cache;
+               }
+               crc++;
+
+               while (1) {
+                       if (!num_entries)
+                               break;
+
+                       need_loop = 1;
+                       e = kzalloc(sizeof(struct btrfs_free_space), GFP_NOFS);
+                       if (!e) {
+                               kunmap(page);
+                               unlock_page(page);
+                               page_cache_release(page);
+                               goto free_cache;
+                       }
+
+                       e->offset = le64_to_cpu(entry->offset);
+                       e->bytes = le64_to_cpu(entry->bytes);
+                       if (!e->bytes) {
+                               kunmap(page);
+                               kfree(e);
+                               unlock_page(page);
+                               page_cache_release(page);
+                               goto free_cache;
+                       }
+
+                       if (entry->type == BTRFS_FREE_SPACE_EXTENT) {
+                               spin_lock(&block_group->tree_lock);
+                               ret = link_free_space(block_group, e);
+                               spin_unlock(&block_group->tree_lock);
+                               BUG_ON(ret);
+                       } else {
+                               e->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
+                               if (!e->bitmap) {
+                                       kunmap(page);
+                                       kfree(e);
+                                       unlock_page(page);
+                                       page_cache_release(page);
+                                       goto free_cache;
+                               }
+                               spin_lock(&block_group->tree_lock);
+                               ret = link_free_space(block_group, e);
+                               block_group->total_bitmaps++;
+                               recalculate_thresholds(block_group);
+                               spin_unlock(&block_group->tree_lock);
+                               list_add_tail(&e->list, &bitmaps);
+                       }
+
+                       num_entries--;
+                       offset += sizeof(struct btrfs_free_space_entry);
+                       if (offset + sizeof(struct btrfs_free_space_entry) >=
+                           PAGE_CACHE_SIZE)
+                               break;
+                       entry++;
+               }
+
+               /*
+                * We read an entry out of this page, we need to move on to the
+                * next page.
+                */
+               if (need_loop) {
+                       kunmap(page);
+                       goto next;
+               }
+
+               /*
+                * We add the bitmaps at the end of the entries in order that
+                * the bitmap entries are added to the cache.
+                */
+               e = list_entry(bitmaps.next, struct btrfs_free_space, list);
+               list_del_init(&e->list);
+               memcpy(e->bitmap, addr, PAGE_CACHE_SIZE);
+               kunmap(page);
+               num_bitmaps--;
+next:
+               unlock_page(page);
+               page_cache_release(page);
+               index++;
+       }
+
+       ret = 1;
+out:
+       kfree(checksums);
+       kfree(disk_crcs);
+       iput(inode);
+       return ret;
+
+free_cache:
+       /* This cache is bogus, make sure it gets cleared */
+       spin_lock(&block_group->lock);
+       block_group->disk_cache_state = BTRFS_DC_CLEAR;
+       spin_unlock(&block_group->lock);
+       btrfs_remove_free_space_cache(block_group);
+       goto out;
+}
+
+int btrfs_write_out_cache(struct btrfs_root *root,
+                         struct btrfs_trans_handle *trans,
+                         struct btrfs_block_group_cache *block_group,
+                         struct btrfs_path *path)
+{
+       struct btrfs_free_space_header *header;
+       struct extent_buffer *leaf;
+       struct inode *inode;
+       struct rb_node *node;
+       struct list_head *pos, *n;
+       struct page *page;
+       struct extent_state *cached_state = NULL;
+       struct list_head bitmap_list;
+       struct btrfs_key key;
+       u64 bytes = 0;
+       u32 *crc, *checksums;
+       pgoff_t index = 0, last_index = 0;
+       unsigned long first_page_offset;
+       int num_checksums;
+       int entries = 0;
+       int bitmaps = 0;
+       int ret = 0;
+
+       root = root->fs_info->tree_root;
+
+       INIT_LIST_HEAD(&bitmap_list);
+
+       spin_lock(&block_group->lock);
+       if (block_group->disk_cache_state < BTRFS_DC_SETUP) {
+               spin_unlock(&block_group->lock);
+               return 0;
+       }
+       spin_unlock(&block_group->lock);
+
+       inode = lookup_free_space_inode(root, block_group, path);
+       if (IS_ERR(inode))
+               return 0;
+
+       if (!i_size_read(inode)) {
+               iput(inode);
+               return 0;
+       }
+
+       last_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT;
+       filemap_write_and_wait(inode->i_mapping);
+       btrfs_wait_ordered_range(inode, inode->i_size &
+                                ~(root->sectorsize - 1), (u64)-1);
+
+       /* We need a checksum per page. */
+       num_checksums = i_size_read(inode) / PAGE_CACHE_SIZE;
+       crc = checksums  = kzalloc(sizeof(u32) * num_checksums, GFP_NOFS);
+       if (!crc) {
+               iput(inode);
+               return 0;
+       }
+
+       /* Since the first page has all of our checksums and our generation we
+        * need to calculate the offset into the page that we can start writing
+        * our entries.
+        */
+       first_page_offset = (sizeof(u32) * num_checksums) + sizeof(u64);
+
+       node = rb_first(&block_group->free_space_offset);
+       if (!node)
+               goto out_free;
+
+       /*
+        * Lock all pages first so we can lock the extent safely.
+        *
+        * NOTE: Because we hold the ref the entire time we're going to write to
+        * the page find_get_page should never fail, so we don't do a check
+        * after find_get_page at this point.  Just putting this here so people
+        * know and don't freak out.
+        */
+       while (index <= last_index) {
+               page = grab_cache_page(inode->i_mapping, index);
+               if (!page) {
+                       pgoff_t i = 0;
+
+                       while (i < index) {
+                               page = find_get_page(inode->i_mapping, i);
+                               unlock_page(page);
+                               page_cache_release(page);
+                               page_cache_release(page);
+                               i++;
+                       }
+                       goto out_free;
+               }
+               index++;
+       }
+
+       index = 0;
+       lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1,
+                        0, &cached_state, GFP_NOFS);
+
+       /* Write out the extent entries */
+       do {
+               struct btrfs_free_space_entry *entry;
+               void *addr;
+               unsigned long offset = 0;
+               unsigned long start_offset = 0;
+
+               if (index == 0) {
+                       start_offset = first_page_offset;
+                       offset = start_offset;
+               }
+
+               page = find_get_page(inode->i_mapping, index);
+
+               addr = kmap(page);
+               entry = addr + start_offset;
+
+               memset(addr, 0, PAGE_CACHE_SIZE);
+               while (1) {
+                       struct btrfs_free_space *e;
+
+                       e = rb_entry(node, struct btrfs_free_space, offset_index);
+                       entries++;
+
+                       entry->offset = cpu_to_le64(e->offset);
+                       entry->bytes = cpu_to_le64(e->bytes);
+                       if (e->bitmap) {
+                               entry->type = BTRFS_FREE_SPACE_BITMAP;
+                               list_add_tail(&e->list, &bitmap_list);
+                               bitmaps++;
+                       } else {
+                               entry->type = BTRFS_FREE_SPACE_EXTENT;
+                       }
+                       node = rb_next(node);
+                       if (!node)
+                               break;
+                       offset += sizeof(struct btrfs_free_space_entry);
+                       if (offset + sizeof(struct btrfs_free_space_entry) >=
+                           PAGE_CACHE_SIZE)
+                               break;
+                       entry++;
+               }
+               *crc = ~(u32)0;
+               *crc = btrfs_csum_data(root, addr + start_offset, *crc,
+                                      PAGE_CACHE_SIZE - start_offset);
+               kunmap(page);
+
+               btrfs_csum_final(*crc, (char *)crc);
+               crc++;
+
+               bytes += PAGE_CACHE_SIZE;
+
+               ClearPageChecked(page);
+               set_page_extent_mapped(page);
+               SetPageUptodate(page);
+               set_page_dirty(page);
+
+               /*
+                * We need to release our reference we got for grab_cache_page,
+                * except for the first page which will hold our checksums, we
+                * do that below.
+                */
+               if (index != 0) {
+                       unlock_page(page);
+                       page_cache_release(page);
+               }
+
+               page_cache_release(page);
+
+               index++;
+       } while (node);
+
+       /* Write out the bitmaps */
+       list_for_each_safe(pos, n, &bitmap_list) {
+               void *addr;
+               struct btrfs_free_space *entry =
+                       list_entry(pos, struct btrfs_free_space, list);
+
+               page = find_get_page(inode->i_mapping, index);
+
+               addr = kmap(page);
+               memcpy(addr, entry->bitmap, PAGE_CACHE_SIZE);
+               *crc = ~(u32)0;
+               *crc = btrfs_csum_data(root, addr, *crc, PAGE_CACHE_SIZE);
+               kunmap(page);
+               btrfs_csum_final(*crc, (char *)crc);
+               crc++;
+               bytes += PAGE_CACHE_SIZE;
+
+               ClearPageChecked(page);
+               set_page_extent_mapped(page);
+               SetPageUptodate(page);
+               set_page_dirty(page);
+               unlock_page(page);
+               page_cache_release(page);
+               page_cache_release(page);
+               list_del_init(&entry->list);
+               index++;
+       }
+
+       /* Zero out the rest of the pages just to make sure */
+       while (index <= last_index) {
+               void *addr;
+
+               page = find_get_page(inode->i_mapping, index);
+
+               addr = kmap(page);
+               memset(addr, 0, PAGE_CACHE_SIZE);
+               kunmap(page);
+               ClearPageChecked(page);
+               set_page_extent_mapped(page);
+               SetPageUptodate(page);
+               set_page_dirty(page);
+               unlock_page(page);
+               page_cache_release(page);
+               page_cache_release(page);
+               bytes += PAGE_CACHE_SIZE;
+               index++;
+       }
+
+       btrfs_set_extent_delalloc(inode, 0, bytes - 1, &cached_state);
+
+       /* Write the checksums and trans id to the first page */
+       {
+               void *addr;
+               u64 *gen;
+
+               page = find_get_page(inode->i_mapping, 0);
+
+               addr = kmap(page);
+               memcpy(addr, checksums, sizeof(u32) * num_checksums);
+               gen = addr + (sizeof(u32) * num_checksums);
+               *gen = trans->transid;
+               kunmap(page);
+               ClearPageChecked(page);
+               set_page_extent_mapped(page);
+               SetPageUptodate(page);
+               set_page_dirty(page);
+               unlock_page(page);
+               page_cache_release(page);
+               page_cache_release(page);
+       }
+       BTRFS_I(inode)->generation = trans->transid;
+
+       unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
+                            i_size_read(inode) - 1, &cached_state, GFP_NOFS);
+
+       filemap_write_and_wait(inode->i_mapping);
+
+       key.objectid = BTRFS_FREE_SPACE_OBJECTID;
+       key.offset = block_group->key.objectid;
+       key.type = 0;
+
+       ret = btrfs_search_slot(trans, root, &key, path, 1, 1);
+       if (ret < 0) {
+               ret = 0;
+               clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, bytes - 1,
+                                EXTENT_DIRTY | EXTENT_DELALLOC |
+                                EXTENT_DO_ACCOUNTING, 0, 0, NULL, GFP_NOFS);
+               goto out_free;
+       }
+       leaf = path->nodes[0];
+       if (ret > 0) {
+               struct btrfs_key found_key;
+               BUG_ON(!path->slots[0]);
+               path->slots[0]--;
+               btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
+               if (found_key.objectid != BTRFS_FREE_SPACE_OBJECTID ||
+                   found_key.offset != block_group->key.objectid) {
+                       ret = 0;
+                       clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, bytes - 1,
+                                        EXTENT_DIRTY | EXTENT_DELALLOC |
+                                        EXTENT_DO_ACCOUNTING, 0, 0, NULL,
+                                        GFP_NOFS);
+                       btrfs_release_path(root, path);
+                       goto out_free;
+               }
+       }
+       header = btrfs_item_ptr(leaf, path->slots[0],
+                               struct btrfs_free_space_header);
+       btrfs_set_free_space_entries(leaf, header, entries);
+       btrfs_set_free_space_bitmaps(leaf, header, bitmaps);
+       btrfs_set_free_space_generation(leaf, header, trans->transid);
+       btrfs_mark_buffer_dirty(leaf);
+       btrfs_release_path(root, path);
+
+       ret = 1;
+
+out_free:
+       if (ret == 0) {
+               invalidate_inode_pages2_range(inode->i_mapping, 0, index);
+               spin_lock(&block_group->lock);
+               block_group->disk_cache_state = BTRFS_DC_ERROR;
+               spin_unlock(&block_group->lock);
+               BTRFS_I(inode)->generation = 0;
+       }
+       kfree(checksums);
+       btrfs_update_inode(trans, root, inode);
+       iput(inode);
+       return ret;
+}
+
 static inline unsigned long offset_to_bit(u64 bitmap_start, u64 sectorsize,
                                          u64 offset)
 {
index 890a8e79011b2cadc6adc545909289ed90707a27..e49ca5c321b571e5886c557e76d131441f1acc87 100644 (file)
@@ -27,6 +27,24 @@ struct btrfs_free_space {
        struct list_head list;
 };
 
+struct inode *lookup_free_space_inode(struct btrfs_root *root,
+                                     struct btrfs_block_group_cache
+                                     *block_group, struct btrfs_path *path);
+int create_free_space_inode(struct btrfs_root *root,
+                           struct btrfs_trans_handle *trans,
+                           struct btrfs_block_group_cache *block_group,
+                           struct btrfs_path *path);
+
+int btrfs_truncate_free_space_cache(struct btrfs_root *root,
+                                   struct btrfs_trans_handle *trans,
+                                   struct btrfs_path *path,
+                                   struct inode *inode);
+int load_free_space_cache(struct btrfs_fs_info *fs_info,
+                         struct btrfs_block_group_cache *block_group);
+int btrfs_write_out_cache(struct btrfs_root *root,
+                         struct btrfs_trans_handle *trans,
+                         struct btrfs_block_group_cache *block_group,
+                         struct btrfs_path *path);
 int btrfs_add_free_space(struct btrfs_block_group_cache *block_group,
                         u64 bytenr, u64 size);
 int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
index 64f99cf69ce0c6b2da2cb62c4884fa44c150bdda..558cac2dfa547032637bd0a746153eba46d73f8d 100644 (file)
@@ -319,8 +319,6 @@ static noinline int compress_file_range(struct inode *inode,
        struct btrfs_root *root = BTRFS_I(inode)->root;
        struct btrfs_trans_handle *trans;
        u64 num_bytes;
-       u64 orig_start;
-       u64 disk_num_bytes;
        u64 blocksize = root->sectorsize;
        u64 actual_end;
        u64 isize = i_size_read(inode);
@@ -335,8 +333,6 @@ static noinline int compress_file_range(struct inode *inode,
        int i;
        int will_compress;
 
-       orig_start = start;
-
        actual_end = min_t(u64, isize, end + 1);
 again:
        will_compress = 0;
@@ -371,7 +367,6 @@ again:
        total_compressed = min(total_compressed, max_uncompressed);
        num_bytes = (end - start + blocksize) & ~(blocksize - 1);
        num_bytes = max(blocksize,  num_bytes);
-       disk_num_bytes = num_bytes;
        total_in = 0;
        ret = 0;
 
@@ -467,7 +462,6 @@ again:
                if (total_compressed >= total_in) {
                        will_compress = 0;
                } else {
-                       disk_num_bytes = total_compressed;
                        num_bytes = total_in;
                }
        }
@@ -757,20 +751,17 @@ static noinline int cow_file_range(struct inode *inode,
        u64 disk_num_bytes;
        u64 cur_alloc_size;
        u64 blocksize = root->sectorsize;
-       u64 actual_end;
-       u64 isize = i_size_read(inode);
        struct btrfs_key ins;
        struct extent_map *em;
        struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
        int ret = 0;
 
+       BUG_ON(root == root->fs_info->tree_root);
        trans = btrfs_join_transaction(root, 1);
        BUG_ON(!trans);
        btrfs_set_trans_block_group(trans, inode);
        trans->block_rsv = &root->fs_info->delalloc_block_rsv;
 
-       actual_end = min_t(u64, isize, end + 1);
-
        num_bytes = (end - start + blocksize) & ~(blocksize - 1);
        num_bytes = max(blocksize,  num_bytes);
        disk_num_bytes = num_bytes;
@@ -1035,10 +1026,16 @@ static noinline int run_delalloc_nocow(struct inode *inode,
        int type;
        int nocow;
        int check_prev = 1;
+       bool nolock = false;
 
        path = btrfs_alloc_path();
        BUG_ON(!path);
-       trans = btrfs_join_transaction(root, 1);
+       if (root == root->fs_info->tree_root) {
+               nolock = true;
+               trans = btrfs_join_transaction_nolock(root, 1);
+       } else {
+               trans = btrfs_join_transaction(root, 1);
+       }
        BUG_ON(!trans);
 
        cow_start = (u64)-1;
@@ -1211,8 +1208,13 @@ out_check:
                BUG_ON(ret);
        }
 
-       ret = btrfs_end_transaction(trans, root);
-       BUG_ON(ret);
+       if (nolock) {
+               ret = btrfs_end_transaction_nolock(trans, root);
+               BUG_ON(ret);
+       } else {
+               ret = btrfs_end_transaction(trans, root);
+               BUG_ON(ret);
+       }
        btrfs_free_path(path);
        return 0;
 }
@@ -1289,6 +1291,8 @@ static int btrfs_set_bit_hook(struct inode *inode,
        if (!(state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
                struct btrfs_root *root = BTRFS_I(inode)->root;
                u64 len = state->end + 1 - state->start;
+               int do_list = (root->root_key.objectid !=
+                              BTRFS_ROOT_TREE_OBJECTID);
 
                if (*bits & EXTENT_FIRST_DELALLOC)
                        *bits &= ~EXTENT_FIRST_DELALLOC;
@@ -1298,7 +1302,7 @@ static int btrfs_set_bit_hook(struct inode *inode,
                spin_lock(&root->fs_info->delalloc_lock);
                BTRFS_I(inode)->delalloc_bytes += len;
                root->fs_info->delalloc_bytes += len;
-               if (list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
+               if (do_list && list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
                        list_add_tail(&BTRFS_I(inode)->delalloc_inodes,
                                      &root->fs_info->delalloc_inodes);
                }
@@ -1321,6 +1325,8 @@ static int btrfs_clear_bit_hook(struct inode *inode,
        if ((state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
                struct btrfs_root *root = BTRFS_I(inode)->root;
                u64 len = state->end + 1 - state->start;
+               int do_list = (root->root_key.objectid !=
+                              BTRFS_ROOT_TREE_OBJECTID);
 
                if (*bits & EXTENT_FIRST_DELALLOC)
                        *bits &= ~EXTENT_FIRST_DELALLOC;
@@ -1330,14 +1336,15 @@ static int btrfs_clear_bit_hook(struct inode *inode,
                if (*bits & EXTENT_DO_ACCOUNTING)
                        btrfs_delalloc_release_metadata(inode, len);
 
-               if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID)
+               if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID
+                   && do_list)
                        btrfs_free_reserved_data_space(inode, len);
 
                spin_lock(&root->fs_info->delalloc_lock);
                root->fs_info->delalloc_bytes -= len;
                BTRFS_I(inode)->delalloc_bytes -= len;
 
-               if (BTRFS_I(inode)->delalloc_bytes == 0 &&
+               if (do_list && BTRFS_I(inode)->delalloc_bytes == 0 &&
                    !list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
                        list_del_init(&BTRFS_I(inode)->delalloc_inodes);
                }
@@ -1372,7 +1379,7 @@ int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
 
        if (map_length < length + size)
                return 1;
-       return 0;
+       return ret;
 }
 
 /*
@@ -1426,7 +1433,10 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
 
        skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
 
-       ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
+       if (root == root->fs_info->tree_root)
+               ret = btrfs_bio_wq_end_io(root->fs_info, bio, 2);
+       else
+               ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
        BUG_ON(ret);
 
        if (!(rw & REQ_WRITE)) {
@@ -1662,6 +1672,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
        struct extent_state *cached_state = NULL;
        int compressed = 0;
        int ret;
+       bool nolock = false;
 
        ret = btrfs_dec_test_ordered_pending(inode, &ordered_extent, start,
                                             end - start + 1);
@@ -1669,11 +1680,17 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
                return 0;
        BUG_ON(!ordered_extent);
 
+       nolock = (root == root->fs_info->tree_root);
+
        if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) {
                BUG_ON(!list_empty(&ordered_extent->list));
                ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent);
                if (!ret) {
-                       trans = btrfs_join_transaction(root, 1);
+                       if (nolock)
+                               trans = btrfs_join_transaction_nolock(root, 1);
+                       else
+                               trans = btrfs_join_transaction(root, 1);
+                       BUG_ON(!trans);
                        btrfs_set_trans_block_group(trans, inode);
                        trans->block_rsv = &root->fs_info->delalloc_block_rsv;
                        ret = btrfs_update_inode(trans, root, inode);
@@ -1686,7 +1703,10 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
                         ordered_extent->file_offset + ordered_extent->len - 1,
                         0, &cached_state, GFP_NOFS);
 
-       trans = btrfs_join_transaction(root, 1);
+       if (nolock)
+               trans = btrfs_join_transaction_nolock(root, 1);
+       else
+               trans = btrfs_join_transaction(root, 1);
        btrfs_set_trans_block_group(trans, inode);
        trans->block_rsv = &root->fs_info->delalloc_block_rsv;
 
@@ -1700,6 +1720,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
                                                ordered_extent->len);
                BUG_ON(ret);
        } else {
+               BUG_ON(root == root->fs_info->tree_root);
                ret = insert_reserved_file_extent(trans, inode,
                                                ordered_extent->file_offset,
                                                ordered_extent->start,
@@ -1724,9 +1745,15 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
        ret = btrfs_update_inode(trans, root, inode);
        BUG_ON(ret);
 out:
-       btrfs_delalloc_release_metadata(inode, ordered_extent->len);
-       if (trans)
-               btrfs_end_transaction(trans, root);
+       if (nolock) {
+               if (trans)
+                       btrfs_end_transaction_nolock(trans, root);
+       } else {
+               btrfs_delalloc_release_metadata(inode, ordered_extent->len);
+               if (trans)
+                       btrfs_end_transaction(trans, root);
+       }
+
        /* once for us */
        btrfs_put_ordered_extent(ordered_extent);
        /* once for the tree */
@@ -2237,7 +2264,6 @@ void btrfs_orphan_cleanup(struct btrfs_root *root)
 {
        struct btrfs_path *path;
        struct extent_buffer *leaf;
-       struct btrfs_item *item;
        struct btrfs_key key, found_key;
        struct btrfs_trans_handle *trans;
        struct inode *inode;
@@ -2275,7 +2301,6 @@ void btrfs_orphan_cleanup(struct btrfs_root *root)
 
                /* pull out the item */
                leaf = path->nodes[0];
-               item = btrfs_item_nr(leaf, path->slots[0]);
                btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
 
                /* make sure the item matches what we want */
@@ -2651,7 +2676,8 @@ int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
 
        ret = btrfs_del_dir_entries_in_log(trans, root, name, name_len,
                                           dir, index);
-       BUG_ON(ret);
+       if (ret == -ENOENT)
+               ret = 0;
 err:
        btrfs_free_path(path);
        if (ret)
@@ -2672,8 +2698,8 @@ static int check_path_shared(struct btrfs_root *root,
 {
        struct extent_buffer *eb;
        int level;
-       int ret;
        u64 refs = 1;
+       int uninitialized_var(ret);
 
        for (level = 0; level < BTRFS_MAX_LEVEL; level++) {
                if (!path->nodes[level])
@@ -2686,7 +2712,7 @@ static int check_path_shared(struct btrfs_root *root,
                if (refs > 1)
                        return 1;
        }
-       return 0;
+       return ret; /* XXX callers? */
 }
 
 /*
@@ -3196,7 +3222,7 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
 
        BUG_ON(new_size > 0 && min_type != BTRFS_EXTENT_DATA_KEY);
 
-       if (root->ref_cows)
+       if (root->ref_cows || root == root->fs_info->tree_root)
                btrfs_drop_extent_cache(inode, new_size & (~mask), (u64)-1, 0);
 
        path = btrfs_alloc_path();
@@ -3344,7 +3370,8 @@ delete:
                } else {
                        break;
                }
-               if (found_extent && root->ref_cows) {
+               if (found_extent && (root->ref_cows ||
+                                    root == root->fs_info->tree_root)) {
                        btrfs_set_path_blocking(path);
                        ret = btrfs_free_extent(trans, root, extent_start,
                                                extent_num_bytes, 0,
@@ -3675,7 +3702,8 @@ void btrfs_evict_inode(struct inode *inode)
        int ret;
 
        truncate_inode_pages(&inode->i_data, 0);
-       if (inode->i_nlink && btrfs_root_refs(&root->root_item) != 0)
+       if (inode->i_nlink && (btrfs_root_refs(&root->root_item) != 0 ||
+                              root == root->fs_info->tree_root))
                goto no_delete;
 
        if (is_bad_inode(inode)) {
@@ -3888,7 +3916,14 @@ static void inode_tree_del(struct inode *inode)
        }
        spin_unlock(&root->inode_lock);
 
-       if (empty && btrfs_root_refs(&root->root_item) == 0) {
+       /*
+        * Free space cache has inodes in the tree root, but the tree root has a
+        * root_refs of 0, so this could end up dropping the tree root as a
+        * snapshot, so we need the extra !root->fs_info->tree_root check to
+        * make sure we don't drop it.
+        */
+       if (empty && btrfs_root_refs(&root->root_item) == 0 &&
+           root != root->fs_info->tree_root) {
                synchronize_srcu(&root->fs_info->subvol_srcu);
                spin_lock(&root->inode_lock);
                empty = RB_EMPTY_ROOT(&root->inode_tree);
@@ -4282,14 +4317,24 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc)
        struct btrfs_root *root = BTRFS_I(inode)->root;
        struct btrfs_trans_handle *trans;
        int ret = 0;
+       bool nolock = false;
 
        if (BTRFS_I(inode)->dummy_inode)
                return 0;
 
+       smp_mb();
+       nolock = (root->fs_info->closing && root == root->fs_info->tree_root);
+
        if (wbc->sync_mode == WB_SYNC_ALL) {
-               trans = btrfs_join_transaction(root, 1);
+               if (nolock)
+                       trans = btrfs_join_transaction_nolock(root, 1);
+               else
+                       trans = btrfs_join_transaction(root, 1);
                btrfs_set_trans_block_group(trans, inode);
-               ret = btrfs_commit_transaction(trans, root);
+               if (nolock)
+                       ret = btrfs_end_transaction_nolock(trans, root);
+               else
+                       ret = btrfs_commit_transaction(trans, root);
        }
        return ret;
 }
@@ -5645,7 +5690,6 @@ static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode,
        struct btrfs_root *root = BTRFS_I(inode)->root;
        struct btrfs_dio_private *dip;
        struct bio_vec *bvec = bio->bi_io_vec;
-       u64 start;
        int skip_sum;
        int write = rw & REQ_WRITE;
        int ret = 0;
@@ -5671,7 +5715,6 @@ static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode,
        dip->inode = inode;
        dip->logical_offset = file_offset;
 
-       start = dip->logical_offset;
        dip->bytes = 0;
        do {
                dip->bytes += bvec->bv_len;
@@ -6308,6 +6351,21 @@ void btrfs_destroy_inode(struct inode *inode)
                spin_unlock(&root->fs_info->ordered_extent_lock);
        }
 
+       if (root == root->fs_info->tree_root) {
+               struct btrfs_block_group_cache *block_group;
+
+               block_group = btrfs_lookup_block_group(root->fs_info,
+                                               BTRFS_I(inode)->block_group);
+               if (block_group && block_group->inode == inode) {
+                       spin_lock(&block_group->lock);
+                       block_group->inode = NULL;
+                       spin_unlock(&block_group->lock);
+                       btrfs_put_block_group(block_group);
+               } else if (block_group) {
+                       btrfs_put_block_group(block_group);
+               }
+       }
+
        spin_lock(&root->orphan_lock);
        if (!list_empty(&BTRFS_I(inode)->i_orphan)) {
                printk(KERN_INFO "BTRFS: inode %lu still on the orphan list\n",
@@ -6340,7 +6398,8 @@ int btrfs_drop_inode(struct inode *inode)
 {
        struct btrfs_root *root = BTRFS_I(inode)->root;
 
-       if (btrfs_root_refs(&root->root_item) == 0)
+       if (btrfs_root_refs(&root->root_item) == 0 &&
+           root != root->fs_info->tree_root)
                return 1;
        else
                return generic_drop_inode(inode);
@@ -6609,7 +6668,8 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
        return 0;
 }
 
-int btrfs_start_one_delalloc_inode(struct btrfs_root *root, int delay_iput)
+int btrfs_start_one_delalloc_inode(struct btrfs_root *root, int delay_iput,
+                                  int sync)
 {
        struct btrfs_inode *binode;
        struct inode *inode = NULL;
@@ -6631,7 +6691,26 @@ int btrfs_start_one_delalloc_inode(struct btrfs_root *root, int delay_iput)
        spin_unlock(&root->fs_info->delalloc_lock);
 
        if (inode) {
-               write_inode_now(inode, 0);
+               if (sync) {
+                       filemap_write_and_wait(inode->i_mapping);
+                       /*
+                        * We have to do this because compression doesn't
+                        * actually set PG_writeback until it submits the pages
+                        * for IO, which happens in an async thread, so we could
+                        * race and not actually wait for any writeback pages
+                        * because they've not been submitted yet.  Technically
+                        * this could still be the case for the ordered stuff
+                        * since the async thread may not have started to do its
+                        * work yet.  If this becomes the case then we need to
+                        * figure out a way to make sure that in writepage we
+                        * wait for any async pages to be submitted before
+                        * returning so that fdatawait does what its supposed to
+                        * do.
+                        */
+                       btrfs_wait_ordered_range(inode, 0, (u64)-1);
+               } else {
+                       filemap_flush(inode->i_mapping);
+               }
                if (delay_iput)
                        btrfs_add_delayed_iput(inode);
                else
@@ -6757,27 +6836,33 @@ out_unlock:
        return err;
 }
 
-int btrfs_prealloc_file_range(struct inode *inode, int mode,
-                             u64 start, u64 num_bytes, u64 min_size,
-                             loff_t actual_len, u64 *alloc_hint)
+static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
+                                      u64 start, u64 num_bytes, u64 min_size,
+                                      loff_t actual_len, u64 *alloc_hint,
+                                      struct btrfs_trans_handle *trans)
 {
-       struct btrfs_trans_handle *trans;
        struct btrfs_root *root = BTRFS_I(inode)->root;
        struct btrfs_key ins;
        u64 cur_offset = start;
        int ret = 0;
+       bool own_trans = true;
 
+       if (trans)
+               own_trans = false;
        while (num_bytes > 0) {
-               trans = btrfs_start_transaction(root, 3);
-               if (IS_ERR(trans)) {
-                       ret = PTR_ERR(trans);
-                       break;
+               if (own_trans) {
+                       trans = btrfs_start_transaction(root, 3);
+                       if (IS_ERR(trans)) {
+                               ret = PTR_ERR(trans);
+                               break;
+                       }
                }
 
                ret = btrfs_reserve_extent(trans, root, num_bytes, min_size,
                                           0, *alloc_hint, (u64)-1, &ins, 1);
                if (ret) {
-                       btrfs_end_transaction(trans, root);
+                       if (own_trans)
+                               btrfs_end_transaction(trans, root);
                        break;
                }
 
@@ -6810,11 +6895,30 @@ int btrfs_prealloc_file_range(struct inode *inode, int mode,
                ret = btrfs_update_inode(trans, root, inode);
                BUG_ON(ret);
 
-               btrfs_end_transaction(trans, root);
+               if (own_trans)
+                       btrfs_end_transaction(trans, root);
        }
        return ret;
 }
 
+int btrfs_prealloc_file_range(struct inode *inode, int mode,
+                             u64 start, u64 num_bytes, u64 min_size,
+                             loff_t actual_len, u64 *alloc_hint)
+{
+       return __btrfs_prealloc_file_range(inode, mode, start, num_bytes,
+                                          min_size, actual_len, alloc_hint,
+                                          NULL);
+}
+
+int btrfs_prealloc_file_range_trans(struct inode *inode,
+                                   struct btrfs_trans_handle *trans, int mode,
+                                   u64 start, u64 num_bytes, u64 min_size,
+                                   loff_t actual_len, u64 *alloc_hint)
+{
+       return __btrfs_prealloc_file_range(inode, mode, start, num_bytes,
+                                          min_size, actual_len, alloc_hint, trans);
+}
+
 static long btrfs_fallocate(struct inode *inode, int mode,
                            loff_t offset, loff_t len)
 {
index 9254b3d58dbef22974af3c7c61dbc9a4af7a33b6..463d91b4dd3a720ece2afb592b2edefca855aaba 100644 (file)
@@ -224,7 +224,8 @@ static int btrfs_ioctl_getversion(struct file *file, int __user *arg)
 
 static noinline int create_subvol(struct btrfs_root *root,
                                  struct dentry *dentry,
-                                 char *name, int namelen)
+                                 char *name, int namelen,
+                                 u64 *async_transid)
 {
        struct btrfs_trans_handle *trans;
        struct btrfs_key key;
@@ -338,13 +339,19 @@ static noinline int create_subvol(struct btrfs_root *root,
 
        d_instantiate(dentry, btrfs_lookup_dentry(dir, dentry));
 fail:
-       err = btrfs_commit_transaction(trans, root);
+       if (async_transid) {
+               *async_transid = trans->transid;
+               err = btrfs_commit_transaction_async(trans, root, 1);
+       } else {
+               err = btrfs_commit_transaction(trans, root);
+       }
        if (err && !ret)
                ret = err;
        return ret;
 }
 
-static int create_snapshot(struct btrfs_root *root, struct dentry *dentry)
+static int create_snapshot(struct btrfs_root *root, struct dentry *dentry,
+                          char *name, int namelen, u64 *async_transid)
 {
        struct inode *inode;
        struct btrfs_pending_snapshot *pending_snapshot;
@@ -373,7 +380,14 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry)
 
        list_add(&pending_snapshot->list,
                 &trans->transaction->pending_snapshots);
-       ret = btrfs_commit_transaction(trans, root->fs_info->extent_root);
+       if (async_transid) {
+               *async_transid = trans->transid;
+               ret = btrfs_commit_transaction_async(trans,
+                                    root->fs_info->extent_root, 1);
+       } else {
+               ret = btrfs_commit_transaction(trans,
+                                              root->fs_info->extent_root);
+       }
        BUG_ON(ret);
 
        ret = pending_snapshot->error;
@@ -395,6 +409,76 @@ fail:
        return ret;
 }
 
+/*  copy of check_sticky in fs/namei.c()
+* It's inline, so penalty for filesystems that don't use sticky bit is
+* minimal.
+*/
+static inline int btrfs_check_sticky(struct inode *dir, struct inode *inode)
+{
+       uid_t fsuid = current_fsuid();
+
+       if (!(dir->i_mode & S_ISVTX))
+               return 0;
+       if (inode->i_uid == fsuid)
+               return 0;
+       if (dir->i_uid == fsuid)
+               return 0;
+       return !capable(CAP_FOWNER);
+}
+
+/*  copy of may_delete in fs/namei.c()
+ *     Check whether we can remove a link victim from directory dir, check
+ *  whether the type of victim is right.
+ *  1. We can't do it if dir is read-only (done in permission())
+ *  2. We should have write and exec permissions on dir
+ *  3. We can't remove anything from append-only dir
+ *  4. We can't do anything with immutable dir (done in permission())
+ *  5. If the sticky bit on dir is set we should either
+ *     a. be owner of dir, or
+ *     b. be owner of victim, or
+ *     c. have CAP_FOWNER capability
+ *  6. If the victim is append-only or immutable we can't do antyhing with
+ *     links pointing to it.
+ *  7. If we were asked to remove a directory and victim isn't one - ENOTDIR.
+ *  8. If we were asked to remove a non-directory and victim isn't one - EISDIR.
+ *  9. We can't remove a root or mountpoint.
+ * 10. We don't allow removal of NFS sillyrenamed files; it's handled by
+ *     nfs_async_unlink().
+ */
+
+static int btrfs_may_delete(struct inode *dir,struct dentry *victim,int isdir)
+{
+       int error;
+
+       if (!victim->d_inode)
+               return -ENOENT;
+
+       BUG_ON(victim->d_parent->d_inode != dir);
+       audit_inode_child(victim, dir);
+
+       error = inode_permission(dir, MAY_WRITE | MAY_EXEC);
+       if (error)
+               return error;
+       if (IS_APPEND(dir))
+               return -EPERM;
+       if (btrfs_check_sticky(dir, victim->d_inode)||
+               IS_APPEND(victim->d_inode)||
+           IS_IMMUTABLE(victim->d_inode) || IS_SWAPFILE(victim->d_inode))
+               return -EPERM;
+       if (isdir) {
+               if (!S_ISDIR(victim->d_inode->i_mode))
+                       return -ENOTDIR;
+               if (IS_ROOT(victim))
+                       return -EBUSY;
+       } else if (S_ISDIR(victim->d_inode->i_mode))
+               return -EISDIR;
+       if (IS_DEADDIR(dir))
+               return -ENOENT;
+       if (victim->d_flags & DCACHE_NFSFS_RENAMED)
+               return -EBUSY;
+       return 0;
+}
+
 /* copy of may_create in fs/namei.c() */
 static inline int btrfs_may_create(struct inode *dir, struct dentry *child)
 {
@@ -412,7 +496,8 @@ static inline int btrfs_may_create(struct inode *dir, struct dentry *child)
  */
 static noinline int btrfs_mksubvol(struct path *parent,
                                   char *name, int namelen,
-                                  struct btrfs_root *snap_src)
+                                  struct btrfs_root *snap_src,
+                                  u64 *async_transid)
 {
        struct inode *dir  = parent->dentry->d_inode;
        struct dentry *dentry;
@@ -443,10 +528,11 @@ static noinline int btrfs_mksubvol(struct path *parent,
                goto out_up_read;
 
        if (snap_src) {
-               error = create_snapshot(snap_src, dentry);
+               error = create_snapshot(snap_src, dentry,
+                                       name, namelen, async_transid);
        } else {
                error = create_subvol(BTRFS_I(dir)->root, dentry,
-                                     name, namelen);
+                                     name, namelen, async_transid);
        }
        if (!error)
                fsnotify_mkdir(dir, dentry);
@@ -708,7 +794,6 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root,
        char *sizestr;
        char *devstr = NULL;
        int ret = 0;
-       int namelen;
        int mod = 0;
 
        if (root->fs_info->sb->s_flags & MS_RDONLY)
@@ -722,7 +807,6 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root,
                return PTR_ERR(vol_args);
 
        vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
-       namelen = strlen(vol_args->name);
 
        mutex_lock(&root->fs_info->volume_mutex);
        sizestr = vol_args->name;
@@ -801,11 +885,13 @@ out_unlock:
        return ret;
 }
 
-static noinline int btrfs_ioctl_snap_create(struct file *file,
-                                           void __user *arg, int subvol)
+static noinline int btrfs_ioctl_snap_create_transid(struct file *file,
+                                                   char *name,
+                                                   unsigned long fd,
+                                                   int subvol,
+                                                   u64 *transid)
 {
        struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root;
-       struct btrfs_ioctl_vol_args *vol_args;
        struct file *src_file;
        int namelen;
        int ret = 0;
@@ -813,23 +899,18 @@ static noinline int btrfs_ioctl_snap_create(struct file *file,
        if (root->fs_info->sb->s_flags & MS_RDONLY)
                return -EROFS;
 
-       vol_args = memdup_user(arg, sizeof(*vol_args));
-       if (IS_ERR(vol_args))
-               return PTR_ERR(vol_args);
-
-       vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
-       namelen = strlen(vol_args->name);
-       if (strchr(vol_args->name, '/')) {
+       namelen = strlen(name);
+       if (strchr(name, '/')) {
                ret = -EINVAL;
                goto out;
        }
 
        if (subvol) {
-               ret = btrfs_mksubvol(&file->f_path, vol_args->name, namelen,
-                                    NULL);
+               ret = btrfs_mksubvol(&file->f_path, name, namelen,
+                                    NULL, transid);
        } else {
                struct inode *src_inode;
-               src_file = fget(vol_args->fd);
+               src_file = fget(fd);
                if (!src_file) {
                        ret = -EINVAL;
                        goto out;
@@ -843,12 +924,56 @@ static noinline int btrfs_ioctl_snap_create(struct file *file,
                        fput(src_file);
                        goto out;
                }
-               ret = btrfs_mksubvol(&file->f_path, vol_args->name, namelen,
-                                    BTRFS_I(src_inode)->root);
+               ret = btrfs_mksubvol(&file->f_path, name, namelen,
+                                    BTRFS_I(src_inode)->root,
+                                    transid);
                fput(src_file);
        }
 out:
+       return ret;
+}
+
+static noinline int btrfs_ioctl_snap_create(struct file *file,
+                                           void __user *arg, int subvol,
+                                           int async)
+{
+       struct btrfs_ioctl_vol_args *vol_args = NULL;
+       struct btrfs_ioctl_async_vol_args *async_vol_args = NULL;
+       char *name;
+       u64 fd;
+       u64 transid = 0;
+       int ret;
+
+       if (async) {
+               async_vol_args = memdup_user(arg, sizeof(*async_vol_args));
+               if (IS_ERR(async_vol_args))
+                       return PTR_ERR(async_vol_args);
+
+               name = async_vol_args->name;
+               fd = async_vol_args->fd;
+               async_vol_args->name[BTRFS_SNAPSHOT_NAME_MAX] = '\0';
+       } else {
+               vol_args = memdup_user(arg, sizeof(*vol_args));
+               if (IS_ERR(vol_args))
+                       return PTR_ERR(vol_args);
+               name = vol_args->name;
+               fd = vol_args->fd;
+               vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
+       }
+
+       ret = btrfs_ioctl_snap_create_transid(file, name, fd,
+                                             subvol, &transid);
+
+       if (!ret && async) {
+               if (copy_to_user(arg +
+                               offsetof(struct btrfs_ioctl_async_vol_args,
+                               transid), &transid, sizeof(transid)))
+                       return -EFAULT;
+       }
+
        kfree(vol_args);
+       kfree(async_vol_args);
+
        return ret;
 }
 
@@ -1073,14 +1198,10 @@ static noinline int btrfs_ioctl_tree_search(struct file *file,
        if (!capable(CAP_SYS_ADMIN))
                return -EPERM;
 
-       args = kmalloc(sizeof(*args), GFP_KERNEL);
-       if (!args)
-               return -ENOMEM;
+       args = memdup_user(argp, sizeof(*args));
+       if (IS_ERR(args))
+               return PTR_ERR(args);
 
-       if (copy_from_user(args, argp, sizeof(*args))) {
-               kfree(args);
-               return -EFAULT;
-       }
        inode = fdentry(file)->d_inode;
        ret = search_ioctl(inode, args);
        if (ret == 0 && copy_to_user(argp, args, sizeof(*args)))
@@ -1188,14 +1309,10 @@ static noinline int btrfs_ioctl_ino_lookup(struct file *file,
        if (!capable(CAP_SYS_ADMIN))
                return -EPERM;
 
-       args = kmalloc(sizeof(*args), GFP_KERNEL);
-       if (!args)
-               return -ENOMEM;
+       args = memdup_user(argp, sizeof(*args));
+       if (IS_ERR(args))
+               return PTR_ERR(args);
 
-       if (copy_from_user(args, argp, sizeof(*args))) {
-               kfree(args);
-               return -EFAULT;
-       }
        inode = fdentry(file)->d_inode;
 
        if (args->treeid == 0)
@@ -1227,9 +1344,6 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
        int ret;
        int err = 0;
 
-       if (!capable(CAP_SYS_ADMIN))
-               return -EPERM;
-
        vol_args = memdup_user(arg, sizeof(*vol_args));
        if (IS_ERR(vol_args))
                return PTR_ERR(vol_args);
@@ -1259,13 +1373,51 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
        }
 
        inode = dentry->d_inode;
+       dest = BTRFS_I(inode)->root;
+       if (!capable(CAP_SYS_ADMIN)){
+               /*
+                * Regular user.  Only allow this with a special mount
+                * option, when the user has write+exec access to the
+                * subvol root, and when rmdir(2) would have been
+                * allowed.
+                *
+                * Note that this is _not_ check that the subvol is
+                * empty or doesn't contain data that we wouldn't
+                * otherwise be able to delete.
+                *
+                * Users who want to delete empty subvols should try
+                * rmdir(2).
+                */
+               err = -EPERM;
+               if (!btrfs_test_opt(root, USER_SUBVOL_RM_ALLOWED))
+                       goto out_dput;
+
+               /*
+                * Do not allow deletion if the parent dir is the same
+                * as the dir to be deleted.  That means the ioctl
+                * must be called on the dentry referencing the root
+                * of the subvol, not a random directory contained
+                * within it.
+                */
+               err = -EINVAL;
+               if (root == dest)
+                       goto out_dput;
+
+               err = inode_permission(inode, MAY_WRITE | MAY_EXEC);
+               if (err)
+                       goto out_dput;
+
+               /* check if subvolume may be deleted by a non-root user */
+               err = btrfs_may_delete(dir, dentry, 1);
+               if (err)
+                       goto out_dput;
+       }
+
        if (inode->i_ino != BTRFS_FIRST_FREE_OBJECTID) {
                err = -EINVAL;
                goto out_dput;
        }
 
-       dest = BTRFS_I(inode)->root;
-
        mutex_lock(&inode->i_mutex);
        err = d_invalidate(dentry);
        if (err)
@@ -1304,7 +1456,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
                BUG_ON(ret);
        }
 
-       ret = btrfs_commit_transaction(trans, root);
+       ret = btrfs_end_transaction(trans, root);
        BUG_ON(ret);
        inode->i_flags |= S_DEAD;
 out_up_write:
@@ -1502,11 +1654,11 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
        path->reada = 2;
 
        if (inode < src) {
-               mutex_lock(&inode->i_mutex);
-               mutex_lock(&src->i_mutex);
+               mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT);
+               mutex_lock_nested(&src->i_mutex, I_MUTEX_CHILD);
        } else {
-               mutex_lock(&src->i_mutex);
-               mutex_lock(&inode->i_mutex);
+               mutex_lock_nested(&src->i_mutex, I_MUTEX_PARENT);
+               mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD);
        }
 
        /* determine range to clone */
@@ -1530,13 +1682,15 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
        while (1) {
                struct btrfs_ordered_extent *ordered;
                lock_extent(&BTRFS_I(src)->io_tree, off, off+len, GFP_NOFS);
-               ordered = btrfs_lookup_first_ordered_extent(inode, off+len);
-               if (BTRFS_I(src)->delalloc_bytes == 0 && !ordered)
+               ordered = btrfs_lookup_first_ordered_extent(src, off+len);
+               if (!ordered &&
+                   !test_range_bit(&BTRFS_I(src)->io_tree, off, off+len,
+                                  EXTENT_DELALLOC, 0, NULL))
                        break;
                unlock_extent(&BTRFS_I(src)->io_tree, off, off+len, GFP_NOFS);
                if (ordered)
                        btrfs_put_ordered_extent(ordered);
-               btrfs_wait_ordered_range(src, off, off+len);
+               btrfs_wait_ordered_range(src, off, len);
        }
 
        /* clone data */
@@ -1605,7 +1759,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
                        }
                        btrfs_release_path(root, path);
 
-                       if (key.offset + datal < off ||
+                       if (key.offset + datal <= off ||
                            key.offset >= off+len)
                                goto next;
 
@@ -1879,6 +2033,22 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp)
        return 0;
 }
 
+static void get_block_group_info(struct list_head *groups_list,
+                                struct btrfs_ioctl_space_info *space)
+{
+       struct btrfs_block_group_cache *block_group;
+
+       space->total_bytes = 0;
+       space->used_bytes = 0;
+       space->flags = 0;
+       list_for_each_entry(block_group, groups_list, list) {
+               space->flags = block_group->flags;
+               space->total_bytes += block_group->key.offset;
+               space->used_bytes +=
+                       btrfs_block_group_used(&block_group->item);
+       }
+}
+
 long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg)
 {
        struct btrfs_ioctl_space_args space_args;
@@ -1887,27 +2057,56 @@ long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg)
        struct btrfs_ioctl_space_info *dest_orig;
        struct btrfs_ioctl_space_info *user_dest;
        struct btrfs_space_info *info;
+       u64 types[] = {BTRFS_BLOCK_GROUP_DATA,
+                      BTRFS_BLOCK_GROUP_SYSTEM,
+                      BTRFS_BLOCK_GROUP_METADATA,
+                      BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA};
+       int num_types = 4;
        int alloc_size;
        int ret = 0;
        int slot_count = 0;
+       int i, c;
 
        if (copy_from_user(&space_args,
                           (struct btrfs_ioctl_space_args __user *)arg,
                           sizeof(space_args)))
                return -EFAULT;
 
-       /* first we count slots */
-       rcu_read_lock();
-       list_for_each_entry_rcu(info, &root->fs_info->space_info, list)
-               slot_count++;
-       rcu_read_unlock();
+       for (i = 0; i < num_types; i++) {
+               struct btrfs_space_info *tmp;
+
+               info = NULL;
+               rcu_read_lock();
+               list_for_each_entry_rcu(tmp, &root->fs_info->space_info,
+                                       list) {
+                       if (tmp->flags == types[i]) {
+                               info = tmp;
+                               break;
+                       }
+               }
+               rcu_read_unlock();
+
+               if (!info)
+                       continue;
+
+               down_read(&info->groups_sem);
+               for (c = 0; c < BTRFS_NR_RAID_TYPES; c++) {
+                       if (!list_empty(&info->block_groups[c]))
+                               slot_count++;
+               }
+               up_read(&info->groups_sem);
+       }
 
        /* space_slots == 0 means they are asking for a count */
        if (space_args.space_slots == 0) {
                space_args.total_spaces = slot_count;
                goto out;
        }
+
+       slot_count = min_t(int, space_args.space_slots, slot_count);
+
        alloc_size = sizeof(*dest) * slot_count;
+
        /* we generally have at most 6 or so space infos, one for each raid
         * level.  So, a whole page should be more than enough for everyone
         */
@@ -1921,27 +2120,34 @@ long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg)
        dest_orig = dest;
 
        /* now we have a buffer to copy into */
-       rcu_read_lock();
-       list_for_each_entry_rcu(info, &root->fs_info->space_info, list) {
-               /* make sure we don't copy more than we allocated
-                * in our buffer
-                */
-               if (slot_count == 0)
-                       break;
-               slot_count--;
-
-               /* make sure userland has enough room in their buffer */
-               if (space_args.total_spaces >= space_args.space_slots)
-                       break;
+       for (i = 0; i < num_types; i++) {
+               struct btrfs_space_info *tmp;
+
+               info = NULL;
+               rcu_read_lock();
+               list_for_each_entry_rcu(tmp, &root->fs_info->space_info,
+                                       list) {
+                       if (tmp->flags == types[i]) {
+                               info = tmp;
+                               break;
+                       }
+               }
+               rcu_read_unlock();
 
-               space.flags = info->flags;
-               space.total_bytes = info->total_bytes;
-               space.used_bytes = info->bytes_used;
-               memcpy(dest, &space, sizeof(space));
-               dest++;
-               space_args.total_spaces++;
+               if (!info)
+                       continue;
+               down_read(&info->groups_sem);
+               for (c = 0; c < BTRFS_NR_RAID_TYPES; c++) {
+                       if (!list_empty(&info->block_groups[c])) {
+                               get_block_group_info(&info->block_groups[c],
+                                                    &space);
+                               memcpy(dest, &space, sizeof(space));
+                               dest++;
+                               space_args.total_spaces++;
+                       }
+               }
+               up_read(&info->groups_sem);
        }
-       rcu_read_unlock();
 
        user_dest = (struct btrfs_ioctl_space_info *)
                (arg + sizeof(struct btrfs_ioctl_space_args));
@@ -1984,6 +2190,36 @@ long btrfs_ioctl_trans_end(struct file *file)
        return 0;
 }
 
+static noinline long btrfs_ioctl_start_sync(struct file *file, void __user *argp)
+{
+       struct btrfs_root *root = BTRFS_I(file->f_dentry->d_inode)->root;
+       struct btrfs_trans_handle *trans;
+       u64 transid;
+
+       trans = btrfs_start_transaction(root, 0);
+       transid = trans->transid;
+       btrfs_commit_transaction_async(trans, root, 0);
+
+       if (argp)
+               if (copy_to_user(argp, &transid, sizeof(transid)))
+                       return -EFAULT;
+       return 0;
+}
+
+static noinline long btrfs_ioctl_wait_sync(struct file *file, void __user *argp)
+{
+       struct btrfs_root *root = BTRFS_I(file->f_dentry->d_inode)->root;
+       u64 transid;
+
+       if (argp) {
+               if (copy_from_user(&transid, argp, sizeof(transid)))
+                       return -EFAULT;
+       } else {
+               transid = 0;  /* current trans */
+       }
+       return btrfs_wait_for_commit(root, transid);
+}
+
 long btrfs_ioctl(struct file *file, unsigned int
                cmd, unsigned long arg)
 {
@@ -1998,9 +2234,11 @@ long btrfs_ioctl(struct file *file, unsigned int
        case FS_IOC_GETVERSION:
                return btrfs_ioctl_getversion(file, argp);
        case BTRFS_IOC_SNAP_CREATE:
-               return btrfs_ioctl_snap_create(file, argp, 0);
+               return btrfs_ioctl_snap_create(file, argp, 0, 0);
+       case BTRFS_IOC_SNAP_CREATE_ASYNC:
+               return btrfs_ioctl_snap_create(file, argp, 0, 1);
        case BTRFS_IOC_SUBVOL_CREATE:
-               return btrfs_ioctl_snap_create(file, argp, 1);
+               return btrfs_ioctl_snap_create(file, argp, 1, 0);
        case BTRFS_IOC_SNAP_DESTROY:
                return btrfs_ioctl_snap_destroy(file, argp);
        case BTRFS_IOC_DEFAULT_SUBVOL:
@@ -2034,6 +2272,10 @@ long btrfs_ioctl(struct file *file, unsigned int
        case BTRFS_IOC_SYNC:
                btrfs_sync_fs(file->f_dentry->d_sb, 1);
                return 0;
+       case BTRFS_IOC_START_SYNC:
+               return btrfs_ioctl_start_sync(file, argp);
+       case BTRFS_IOC_WAIT_SYNC:
+               return btrfs_ioctl_wait_sync(file, argp);
        }
 
        return -ENOTTY;
index 424694aa517f5b03fd5ee16e4052d4c6c7fd6777..17c99ebdf96049a8ad028cd6e138355df86f0c9e 100644 (file)
 
 #define BTRFS_IOCTL_MAGIC 0x94
 #define BTRFS_VOL_NAME_MAX 255
-#define BTRFS_PATH_NAME_MAX 4087
 
 /* this should be 4k */
+#define BTRFS_PATH_NAME_MAX 4087
 struct btrfs_ioctl_vol_args {
        __s64 fd;
        char name[BTRFS_PATH_NAME_MAX + 1];
 };
 
+#define BTRFS_SNAPSHOT_NAME_MAX 4079
+struct btrfs_ioctl_async_vol_args {
+       __s64 fd;
+       __u64 transid;
+       char name[BTRFS_SNAPSHOT_NAME_MAX + 1];
+};
+
 #define BTRFS_INO_LOOKUP_PATH_MAX 4080
 struct btrfs_ioctl_ino_lookup_args {
        __u64 treeid;
@@ -178,4 +185,8 @@ struct btrfs_ioctl_space_args {
 #define BTRFS_IOC_DEFAULT_SUBVOL _IOW(BTRFS_IOCTL_MAGIC, 19, u64)
 #define BTRFS_IOC_SPACE_INFO _IOWR(BTRFS_IOCTL_MAGIC, 20, \
                                    struct btrfs_ioctl_space_args)
+#define BTRFS_IOC_START_SYNC _IOR(BTRFS_IOCTL_MAGIC, 24, __u64)
+#define BTRFS_IOC_WAIT_SYNC  _IOW(BTRFS_IOCTL_MAGIC, 22, __u64)
+#define BTRFS_IOC_SNAP_CREATE_ASYNC _IOW(BTRFS_IOCTL_MAGIC, 23, \
+                                  struct btrfs_ioctl_async_vol_args)
 #endif
index e56c72bc5adddcdb4338b573a2f7cbc05e2b4a37..f4621f6deca1d51f91db57ea406c4c7351a7fd5d 100644 (file)
@@ -526,7 +526,6 @@ int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
 {
        u64 end;
        u64 orig_end;
-       u64 wait_end;
        struct btrfs_ordered_extent *ordered;
        int found;
 
@@ -537,7 +536,6 @@ int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
                if (orig_end > INT_LIMIT(loff_t))
                        orig_end = INT_LIMIT(loff_t);
        }
-       wait_end = orig_end;
 again:
        /* start IO across the range first to instantiate any delalloc
         * extents
index b37d723b9d4a8a137b05256a85788074b5f03c5e..045c9c2b2d7ef7399f00370e4137d9da0b370836 100644 (file)
@@ -29,6 +29,7 @@
 #include "locking.h"
 #include "btrfs_inode.h"
 #include "async-thread.h"
+#include "free-space-cache.h"
 
 /*
  * backref_node, mapping_node and tree_block start with this
@@ -178,8 +179,6 @@ struct reloc_control {
        u64 search_start;
        u64 extents_found;
 
-       int block_rsv_retries;
-
        unsigned int stage:8;
        unsigned int create_reloc_tree:1;
        unsigned int merge_reloc_tree:1;
@@ -2133,7 +2132,6 @@ int prepare_to_merge(struct reloc_control *rc, int err)
        LIST_HEAD(reloc_roots);
        u64 num_bytes = 0;
        int ret;
-       int retries = 0;
 
        mutex_lock(&root->fs_info->trans_mutex);
        rc->merging_rsv_size += root->nodesize * (BTRFS_MAX_LEVEL - 1) * 2;
@@ -2143,7 +2141,7 @@ again:
        if (!err) {
                num_bytes = rc->merging_rsv_size;
                ret = btrfs_block_rsv_add(NULL, root, rc->block_rsv,
-                                         num_bytes, &retries);
+                                         num_bytes);
                if (ret)
                        err = ret;
        }
@@ -2155,7 +2153,6 @@ again:
                        btrfs_end_transaction(trans, rc->extent_root);
                        btrfs_block_rsv_release(rc->extent_root,
                                                rc->block_rsv, num_bytes);
-                       retries = 0;
                        goto again;
                }
        }
@@ -2405,15 +2402,13 @@ static int reserve_metadata_space(struct btrfs_trans_handle *trans,
        num_bytes = calcu_metadata_size(rc, node, 1) * 2;
 
        trans->block_rsv = rc->block_rsv;
-       ret = btrfs_block_rsv_add(trans, root, rc->block_rsv, num_bytes,
-                                 &rc->block_rsv_retries);
+       ret = btrfs_block_rsv_add(trans, root, rc->block_rsv, num_bytes);
        if (ret) {
                if (ret == -EAGAIN)
                        rc->commit_transaction = 1;
                return ret;
        }
 
-       rc->block_rsv_retries = 0;
        return 0;
 }
 
@@ -3099,6 +3094,8 @@ static int add_tree_block(struct reloc_control *rc,
                BUG_ON(item_size != sizeof(struct btrfs_extent_item_v0));
                ret = get_ref_objectid_v0(rc, path, extent_key,
                                          &ref_owner, NULL);
+               if (ret < 0)
+                       return ret;
                BUG_ON(ref_owner >= BTRFS_MAX_LEVEL);
                level = (int)ref_owner;
                /* FIXME: get real generation */
@@ -3191,6 +3188,54 @@ static int block_use_full_backref(struct reloc_control *rc,
        return ret;
 }
 
+static int delete_block_group_cache(struct btrfs_fs_info *fs_info,
+                                   struct inode *inode, u64 ino)
+{
+       struct btrfs_key key;
+       struct btrfs_path *path;
+       struct btrfs_root *root = fs_info->tree_root;
+       struct btrfs_trans_handle *trans;
+       unsigned long nr;
+       int ret = 0;
+
+       if (inode)
+               goto truncate;
+
+       key.objectid = ino;
+       key.type = BTRFS_INODE_ITEM_KEY;
+       key.offset = 0;
+
+       inode = btrfs_iget(fs_info->sb, &key, root, NULL);
+       if (!inode || IS_ERR(inode) || is_bad_inode(inode)) {
+               if (inode && !IS_ERR(inode))
+                       iput(inode);
+               return -ENOENT;
+       }
+
+truncate:
+       path = btrfs_alloc_path();
+       if (!path) {
+               ret = -ENOMEM;
+               goto out;
+       }
+
+       trans = btrfs_join_transaction(root, 0);
+       if (IS_ERR(trans)) {
+               btrfs_free_path(path);
+               goto out;
+       }
+
+       ret = btrfs_truncate_free_space_cache(root, trans, path, inode);
+
+       btrfs_free_path(path);
+       nr = trans->blocks_used;
+       btrfs_end_transaction(trans, root);
+       btrfs_btree_balance_dirty(root, nr);
+out:
+       iput(inode);
+       return ret;
+}
+
 /*
  * helper to add tree blocks for backref of type BTRFS_EXTENT_DATA_REF_KEY
  * this function scans fs tree to find blocks reference the data extent
@@ -3217,15 +3262,27 @@ static int find_data_references(struct reloc_control *rc,
        int counted;
        int ret;
 
-       path = btrfs_alloc_path();
-       if (!path)
-               return -ENOMEM;
-
        ref_root = btrfs_extent_data_ref_root(leaf, ref);
        ref_objectid = btrfs_extent_data_ref_objectid(leaf, ref);
        ref_offset = btrfs_extent_data_ref_offset(leaf, ref);
        ref_count = btrfs_extent_data_ref_count(leaf, ref);
 
+       /*
+        * This is an extent belonging to the free space cache, lets just delete
+        * it and redo the search.
+        */
+       if (ref_root == BTRFS_ROOT_TREE_OBJECTID) {
+               ret = delete_block_group_cache(rc->extent_root->fs_info,
+                                              NULL, ref_objectid);
+               if (ret != -ENOENT)
+                       return ret;
+               ret = 0;
+       }
+
+       path = btrfs_alloc_path();
+       if (!path)
+               return -ENOMEM;
+
        root = read_fs_root(rc->extent_root->fs_info, ref_root);
        if (IS_ERR(root)) {
                err = PTR_ERR(root);
@@ -3554,8 +3611,7 @@ int prepare_to_relocate(struct reloc_control *rc)
         * is no reservation in transaction handle.
         */
        ret = btrfs_block_rsv_add(NULL, rc->extent_root, rc->block_rsv,
-                                 rc->extent_root->nodesize * 256,
-                                 &rc->block_rsv_retries);
+                                 rc->extent_root->nodesize * 256);
        if (ret)
                return ret;
 
@@ -3567,7 +3623,6 @@ int prepare_to_relocate(struct reloc_control *rc)
        rc->extents_found = 0;
        rc->nodes_relocated = 0;
        rc->merging_rsv_size = 0;
-       rc->block_rsv_retries = 0;
 
        rc->create_reloc_tree = 1;
        set_reloc_control(rc);
@@ -3860,6 +3915,8 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
 {
        struct btrfs_fs_info *fs_info = extent_root->fs_info;
        struct reloc_control *rc;
+       struct inode *inode;
+       struct btrfs_path *path;
        int ret;
        int rw = 0;
        int err = 0;
@@ -3882,6 +3939,26 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
                rw = 1;
        }
 
+       path = btrfs_alloc_path();
+       if (!path) {
+               err = -ENOMEM;
+               goto out;
+       }
+
+       inode = lookup_free_space_inode(fs_info->tree_root, rc->block_group,
+                                       path);
+       btrfs_free_path(path);
+
+       if (!IS_ERR(inode))
+               ret = delete_block_group_cache(fs_info, inode, 0);
+       else
+               ret = PTR_ERR(inode);
+
+       if (ret && ret != -ENOENT) {
+               err = ret;
+               goto out;
+       }
+
        rc->data_inode = create_reloc_inode(fs_info, rc->block_group);
        if (IS_ERR(rc->data_inode)) {
                err = PTR_ERR(rc->data_inode);
@@ -4143,7 +4220,7 @@ int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len)
                btrfs_add_ordered_sum(inode, ordered, sums);
        }
        btrfs_put_ordered_extent(ordered);
-       return 0;
+       return ret;
 }
 
 void btrfs_reloc_cow_block(struct btrfs_trans_handle *trans,
index 2d958be761c84556b39c60afa39999b0f3fd75d6..6a1086e83ffc898ff3ab7f7b33d91c2c442019d7 100644 (file)
@@ -181,7 +181,6 @@ int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root
 int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid)
 {
        struct btrfs_root *dead_root;
-       struct btrfs_item *item;
        struct btrfs_root_item *ri;
        struct btrfs_key key;
        struct btrfs_key found_key;
@@ -214,7 +213,6 @@ again:
                        nritems = btrfs_header_nritems(leaf);
                        slot = path->slots[0];
                }
-               item = btrfs_item_nr(leaf, slot);
                btrfs_item_key_to_cpu(leaf, &key, slot);
                if (btrfs_key_type(&key) != BTRFS_ROOT_ITEM_KEY)
                        goto next;
index ebe46c628748336eddee1e70502f2a8e08709f38..8299a25ffc8f8313064cbd3e54278e9f5ebbc662 100644 (file)
@@ -61,6 +61,8 @@ static void btrfs_put_super(struct super_block *sb)
 
        ret = close_ctree(root);
        sb->s_fs_info = NULL;
+
+       (void)ret; /* FIXME: need to fix VFS to return error? */
 }
 
 enum {
@@ -68,7 +70,8 @@ enum {
        Opt_nodatacow, Opt_max_inline, Opt_alloc_start, Opt_nobarrier, Opt_ssd,
        Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl, Opt_compress,
        Opt_compress_force, Opt_notreelog, Opt_ratio, Opt_flushoncommit,
-       Opt_discard, Opt_err,
+       Opt_discard, Opt_space_cache, Opt_clear_cache, Opt_err,
+       Opt_user_subvol_rm_allowed,
 };
 
 static match_table_t tokens = {
@@ -92,6 +95,9 @@ static match_table_t tokens = {
        {Opt_flushoncommit, "flushoncommit"},
        {Opt_ratio, "metadata_ratio=%d"},
        {Opt_discard, "discard"},
+       {Opt_space_cache, "space_cache"},
+       {Opt_clear_cache, "clear_cache"},
+       {Opt_user_subvol_rm_allowed, "user_subvol_rm_allowed"},
        {Opt_err, NULL},
 };
 
@@ -235,6 +241,16 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
                case Opt_discard:
                        btrfs_set_opt(info->mount_opt, DISCARD);
                        break;
+               case Opt_space_cache:
+                       printk(KERN_INFO "btrfs: enabling disk space caching\n");
+                       btrfs_set_opt(info->mount_opt, SPACE_CACHE);
+               case Opt_clear_cache:
+                       printk(KERN_INFO "btrfs: force clearing of disk cache\n");
+                       btrfs_set_opt(info->mount_opt, CLEAR_CACHE);
+                       break;
+               case Opt_user_subvol_rm_allowed:
+                       btrfs_set_opt(info->mount_opt, USER_SUBVOL_RM_ALLOWED);
+                       break;
                case Opt_err:
                        printk(KERN_INFO "btrfs: unrecognized mount option "
                               "'%s'\n", p);
@@ -380,7 +396,7 @@ static struct dentry *get_default_root(struct super_block *sb,
 find_root:
        new_root = btrfs_read_fs_root_no_name(root->fs_info, &location);
        if (IS_ERR(new_root))
-               return ERR_PTR(PTR_ERR(new_root));
+               return ERR_CAST(new_root);
 
        if (btrfs_root_refs(&new_root->root_item) == 0)
                return ERR_PTR(-ENOENT);
@@ -436,7 +452,6 @@ static int btrfs_fill_super(struct super_block *sb,
 {
        struct inode *inode;
        struct dentry *root_dentry;
-       struct btrfs_super_block *disk_super;
        struct btrfs_root *tree_root;
        struct btrfs_key key;
        int err;
@@ -458,7 +473,6 @@ static int btrfs_fill_super(struct super_block *sb,
                return PTR_ERR(tree_root);
        }
        sb->s_fs_info = tree_root;
-       disk_super = &tree_root->fs_info->super_copy;
 
        key.objectid = BTRFS_FIRST_FREE_OBJECTID;
        key.type = BTRFS_INODE_ITEM_KEY;
@@ -571,7 +585,6 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
        char *subvol_name = NULL;
        u64 subvol_objectid = 0;
        int error = 0;
-       int found = 0;
 
        if (!(flags & MS_RDONLY))
                mode |= FMODE_WRITE;
@@ -607,7 +620,6 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
                        goto error_close_devices;
                }
 
-               found = 1;
                btrfs_close_devices(fs_devices);
        } else {
                char b[BDEVNAME_SIZE];
@@ -629,7 +641,7 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
        if (IS_ERR(root)) {
                error = PTR_ERR(root);
                deactivate_locked_super(s);
-               goto error;
+               goto error_free_subvol_name;
        }
        /* if they gave us a subvolume name bind mount into that */
        if (strcmp(subvol_name, ".")) {
@@ -643,14 +655,14 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
                        deactivate_locked_super(s);
                        error = PTR_ERR(new_root);
                        dput(root);
-                       goto error_close_devices;
+                       goto error_free_subvol_name;
                }
                if (!new_root->d_inode) {
                        dput(root);
                        dput(new_root);
                        deactivate_locked_super(s);
                        error = -ENXIO;
-                       goto error_close_devices;
+                       goto error_free_subvol_name;
                }
                dput(root);
                root = new_root;
@@ -665,7 +677,6 @@ error_close_devices:
        btrfs_close_devices(fs_devices);
 error_free_subvol_name:
        kfree(subvol_name);
-error:
        return ERR_PTR(error);
 }
 
@@ -713,18 +724,25 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
        struct list_head *head = &root->fs_info->space_info;
        struct btrfs_space_info *found;
        u64 total_used = 0;
+       u64 total_used_data = 0;
        int bits = dentry->d_sb->s_blocksize_bits;
        __be32 *fsid = (__be32 *)root->fs_info->fsid;
 
        rcu_read_lock();
-       list_for_each_entry_rcu(found, head, list)
+       list_for_each_entry_rcu(found, head, list) {
+               if (found->flags & (BTRFS_BLOCK_GROUP_METADATA |
+                                   BTRFS_BLOCK_GROUP_SYSTEM))
+                       total_used_data += found->disk_total;
+               else
+                       total_used_data += found->disk_used;
                total_used += found->disk_used;
+       }
        rcu_read_unlock();
 
        buf->f_namelen = BTRFS_NAME_LEN;
        buf->f_blocks = btrfs_super_total_bytes(disk_super) >> bits;
        buf->f_bfree = buf->f_blocks - (total_used >> bits);
-       buf->f_bavail = buf->f_bfree;
+       buf->f_bavail = buf->f_blocks - (total_used_data >> bits);
        buf->f_bsize = dentry->d_sb->s_blocksize;
        buf->f_type = BTRFS_SUPER_MAGIC;
 
index 66e4c66cc63bf114900c6da4462c32e6bd0e65e0..1fffbc017bdfe0451cc2e67ccf5fd85623a12aee 100644 (file)
@@ -163,6 +163,7 @@ enum btrfs_trans_type {
        TRANS_START,
        TRANS_JOIN,
        TRANS_USERSPACE,
+       TRANS_JOIN_NOLOCK,
 };
 
 static int may_wait_transaction(struct btrfs_root *root, int type)
@@ -179,14 +180,14 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root,
 {
        struct btrfs_trans_handle *h;
        struct btrfs_transaction *cur_trans;
-       int retries = 0;
        int ret;
 again:
        h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS);
        if (!h)
                return ERR_PTR(-ENOMEM);
 
-       mutex_lock(&root->fs_info->trans_mutex);
+       if (type != TRANS_JOIN_NOLOCK)
+               mutex_lock(&root->fs_info->trans_mutex);
        if (may_wait_transaction(root, type))
                wait_current_trans(root);
 
@@ -195,7 +196,8 @@ again:
 
        cur_trans = root->fs_info->running_transaction;
        cur_trans->use_count++;
-       mutex_unlock(&root->fs_info->trans_mutex);
+       if (type != TRANS_JOIN_NOLOCK)
+               mutex_unlock(&root->fs_info->trans_mutex);
 
        h->transid = cur_trans->transid;
        h->transaction = cur_trans;
@@ -212,8 +214,7 @@ again:
        }
 
        if (num_items > 0) {
-               ret = btrfs_trans_reserve_metadata(h, root, num_items,
-                                                  &retries);
+               ret = btrfs_trans_reserve_metadata(h, root, num_items);
                if (ret == -EAGAIN) {
                        btrfs_commit_transaction(h, root);
                        goto again;
@@ -224,9 +225,11 @@ again:
                }
        }
 
-       mutex_lock(&root->fs_info->trans_mutex);
+       if (type != TRANS_JOIN_NOLOCK)
+               mutex_lock(&root->fs_info->trans_mutex);
        record_root_in_trans(h, root);
-       mutex_unlock(&root->fs_info->trans_mutex);
+       if (type != TRANS_JOIN_NOLOCK)
+               mutex_unlock(&root->fs_info->trans_mutex);
 
        if (!current->journal_info && type != TRANS_USERSPACE)
                current->journal_info = h;
@@ -244,6 +247,12 @@ struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root,
        return start_transaction(root, 0, TRANS_JOIN);
 }
 
+struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root,
+                                                         int num_blocks)
+{
+       return start_transaction(root, 0, TRANS_JOIN_NOLOCK);
+}
+
 struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r,
                                                         int num_blocks)
 {
@@ -270,6 +279,58 @@ static noinline int wait_for_commit(struct btrfs_root *root,
        return 0;
 }
 
+int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid)
+{
+       struct btrfs_transaction *cur_trans = NULL, *t;
+       int ret;
+
+       mutex_lock(&root->fs_info->trans_mutex);
+
+       ret = 0;
+       if (transid) {
+               if (transid <= root->fs_info->last_trans_committed)
+                       goto out_unlock;
+
+               /* find specified transaction */
+               list_for_each_entry(t, &root->fs_info->trans_list, list) {
+                       if (t->transid == transid) {
+                               cur_trans = t;
+                               break;
+                       }
+                       if (t->transid > transid)
+                               break;
+               }
+               ret = -EINVAL;
+               if (!cur_trans)
+                       goto out_unlock;  /* bad transid */
+       } else {
+               /* find newest transaction that is committing | committed */
+               list_for_each_entry_reverse(t, &root->fs_info->trans_list,
+                                           list) {
+                       if (t->in_commit) {
+                               if (t->commit_done)
+                                       goto out_unlock;
+                               cur_trans = t;
+                               break;
+                       }
+               }
+               if (!cur_trans)
+                       goto out_unlock;  /* nothing committing|committed */
+       }
+
+       cur_trans->use_count++;
+       mutex_unlock(&root->fs_info->trans_mutex);
+
+       wait_for_commit(root, cur_trans);
+
+       mutex_lock(&root->fs_info->trans_mutex);
+       put_transaction(cur_trans);
+       ret = 0;
+out_unlock:
+       mutex_unlock(&root->fs_info->trans_mutex);
+       return ret;
+}
+
 #if 0
 /*
  * rate limit against the drop_snapshot code.  This helps to slow down new
@@ -348,7 +409,7 @@ int btrfs_should_end_transaction(struct btrfs_trans_handle *trans,
 }
 
 static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
-                         struct btrfs_root *root, int throttle)
+                         struct btrfs_root *root, int throttle, int lock)
 {
        struct btrfs_transaction *cur_trans = trans->transaction;
        struct btrfs_fs_info *info = root->fs_info;
@@ -376,26 +437,29 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
 
        btrfs_trans_release_metadata(trans, root);
 
-       if (!root->fs_info->open_ioctl_trans &&
+       if (lock && !root->fs_info->open_ioctl_trans &&
            should_end_transaction(trans, root))
                trans->transaction->blocked = 1;
 
-       if (cur_trans->blocked && !cur_trans->in_commit) {
+       if (lock && cur_trans->blocked && !cur_trans->in_commit) {
                if (throttle)
                        return btrfs_commit_transaction(trans, root);
                else
                        wake_up_process(info->transaction_kthread);
        }
 
-       mutex_lock(&info->trans_mutex);
+       if (lock)
+               mutex_lock(&info->trans_mutex);
        WARN_ON(cur_trans != info->running_transaction);
        WARN_ON(cur_trans->num_writers < 1);
        cur_trans->num_writers--;
 
+       smp_mb();
        if (waitqueue_active(&cur_trans->writer_wait))
                wake_up(&cur_trans->writer_wait);
        put_transaction(cur_trans);
-       mutex_unlock(&info->trans_mutex);
+       if (lock)
+               mutex_unlock(&info->trans_mutex);
 
        if (current->journal_info == trans)
                current->journal_info = NULL;
@@ -411,13 +475,19 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
 int btrfs_end_transaction(struct btrfs_trans_handle *trans,
                          struct btrfs_root *root)
 {
-       return __btrfs_end_transaction(trans, root, 0);
+       return __btrfs_end_transaction(trans, root, 0, 1);
 }
 
 int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans,
                                   struct btrfs_root *root)
 {
-       return __btrfs_end_transaction(trans, root, 1);
+       return __btrfs_end_transaction(trans, root, 1, 1);
+}
+
+int btrfs_end_transaction_nolock(struct btrfs_trans_handle *trans,
+                                struct btrfs_root *root)
+{
+       return __btrfs_end_transaction(trans, root, 0, 0);
 }
 
 /*
@@ -836,7 +906,6 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
        struct extent_buffer *tmp;
        struct extent_buffer *old;
        int ret;
-       int retries = 0;
        u64 to_reserve = 0;
        u64 index = 0;
        u64 objectid;
@@ -858,7 +927,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
 
        if (to_reserve > 0) {
                ret = btrfs_block_rsv_add(trans, root, &pending->block_rsv,
-                                         to_reserve, &retries);
+                                         to_reserve);
                if (ret) {
                        pending->error = ret;
                        goto fail;
@@ -966,6 +1035,8 @@ static void update_super_roots(struct btrfs_root *root)
        super->root = root_item->bytenr;
        super->generation = root_item->generation;
        super->root_level = root_item->level;
+       if (super->cache_generation != 0 || btrfs_test_opt(root, SPACE_CACHE))
+               super->cache_generation = root_item->generation;
 }
 
 int btrfs_transaction_in_commit(struct btrfs_fs_info *info)
@@ -988,11 +1059,127 @@ int btrfs_transaction_blocked(struct btrfs_fs_info *info)
        return ret;
 }
 
+/*
+ * wait for the current transaction commit to start and block subsequent
+ * transaction joins
+ */
+static void wait_current_trans_commit_start(struct btrfs_root *root,
+                                           struct btrfs_transaction *trans)
+{
+       DEFINE_WAIT(wait);
+
+       if (trans->in_commit)
+               return;
+
+       while (1) {
+               prepare_to_wait(&root->fs_info->transaction_blocked_wait, &wait,
+                               TASK_UNINTERRUPTIBLE);
+               if (trans->in_commit) {
+                       finish_wait(&root->fs_info->transaction_blocked_wait,
+                                   &wait);
+                       break;
+               }
+               mutex_unlock(&root->fs_info->trans_mutex);
+               schedule();
+               mutex_lock(&root->fs_info->trans_mutex);
+               finish_wait(&root->fs_info->transaction_blocked_wait, &wait);
+       }
+}
+
+/*
+ * wait for the current transaction to start and then become unblocked.
+ * caller holds ref.
+ */
+static void wait_current_trans_commit_start_and_unblock(struct btrfs_root *root,
+                                        struct btrfs_transaction *trans)
+{
+       DEFINE_WAIT(wait);
+
+       if (trans->commit_done || (trans->in_commit && !trans->blocked))
+               return;
+
+       while (1) {
+               prepare_to_wait(&root->fs_info->transaction_wait, &wait,
+                               TASK_UNINTERRUPTIBLE);
+               if (trans->commit_done ||
+                   (trans->in_commit && !trans->blocked)) {
+                       finish_wait(&root->fs_info->transaction_wait,
+                                   &wait);
+                       break;
+               }
+               mutex_unlock(&root->fs_info->trans_mutex);
+               schedule();
+               mutex_lock(&root->fs_info->trans_mutex);
+               finish_wait(&root->fs_info->transaction_wait,
+                           &wait);
+       }
+}
+
+/*
+ * commit transactions asynchronously. once btrfs_commit_transaction_async
+ * returns, any subsequent transaction will not be allowed to join.
+ */
+struct btrfs_async_commit {
+       struct btrfs_trans_handle *newtrans;
+       struct btrfs_root *root;
+       struct delayed_work work;
+};
+
+static void do_async_commit(struct work_struct *work)
+{
+       struct btrfs_async_commit *ac =
+               container_of(work, struct btrfs_async_commit, work.work);
+
+       btrfs_commit_transaction(ac->newtrans, ac->root);
+       kfree(ac);
+}
+
+int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
+                                  struct btrfs_root *root,
+                                  int wait_for_unblock)
+{
+       struct btrfs_async_commit *ac;
+       struct btrfs_transaction *cur_trans;
+
+       ac = kmalloc(sizeof(*ac), GFP_NOFS);
+       BUG_ON(!ac);
+
+       INIT_DELAYED_WORK(&ac->work, do_async_commit);
+       ac->root = root;
+       ac->newtrans = btrfs_join_transaction(root, 0);
+
+       /* take transaction reference */
+       mutex_lock(&root->fs_info->trans_mutex);
+       cur_trans = trans->transaction;
+       cur_trans->use_count++;
+       mutex_unlock(&root->fs_info->trans_mutex);
+
+       btrfs_end_transaction(trans, root);
+       schedule_delayed_work(&ac->work, 0);
+
+       /* wait for transaction to start and unblock */
+       mutex_lock(&root->fs_info->trans_mutex);
+       if (wait_for_unblock)
+               wait_current_trans_commit_start_and_unblock(root, cur_trans);
+       else
+               wait_current_trans_commit_start(root, cur_trans);
+       put_transaction(cur_trans);
+       mutex_unlock(&root->fs_info->trans_mutex);
+
+       return 0;
+}
+
+/*
+ * btrfs_transaction state sequence:
+ *    in_commit = 0, blocked = 0  (initial)
+ *    in_commit = 1, blocked = 1
+ *    blocked = 0
+ *    commit_done = 1
+ */
 int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
                             struct btrfs_root *root)
 {
        unsigned long joined = 0;
-       unsigned long timeout = 1;
        struct btrfs_transaction *cur_trans;
        struct btrfs_transaction *prev_trans = NULL;
        DEFINE_WAIT(wait);
@@ -1039,6 +1226,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
 
        trans->transaction->in_commit = 1;
        trans->transaction->blocked = 1;
+       wake_up(&root->fs_info->transaction_blocked_wait);
+
        if (cur_trans->list.prev != &root->fs_info->trans_list) {
                prev_trans = list_entry(cur_trans->list.prev,
                                        struct btrfs_transaction, list);
@@ -1063,11 +1252,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
                        snap_pending = 1;
 
                WARN_ON(cur_trans != trans->transaction);
-               if (cur_trans->num_writers > 1)
-                       timeout = MAX_SCHEDULE_TIMEOUT;
-               else if (should_grow)
-                       timeout = 1;
-
                mutex_unlock(&root->fs_info->trans_mutex);
 
                if (flush_on_commit || snap_pending) {
@@ -1089,8 +1273,10 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
                                TASK_UNINTERRUPTIBLE);
 
                smp_mb();
-               if (cur_trans->num_writers > 1 || should_grow)
-                       schedule_timeout(timeout);
+               if (cur_trans->num_writers > 1)
+                       schedule_timeout(MAX_SCHEDULE_TIMEOUT);
+               else if (should_grow)
+                       schedule_timeout(1);
 
                mutex_lock(&root->fs_info->trans_mutex);
                finish_wait(&cur_trans->writer_wait, &wait);
index e104986d0bfd682e036ec67d236d65d062422164..f104b57ad4efad42ce6f98d257cd62f3297c3cea 100644 (file)
@@ -87,12 +87,17 @@ static inline void btrfs_set_inode_last_trans(struct btrfs_trans_handle *trans,
 
 int btrfs_end_transaction(struct btrfs_trans_handle *trans,
                          struct btrfs_root *root);
+int btrfs_end_transaction_nolock(struct btrfs_trans_handle *trans,
+                                struct btrfs_root *root);
 struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,
                                                   int num_items);
 struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root,
                                                  int num_blocks);
+struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root,
+                                                         int num_blocks);
 struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r,
                                                         int num_blocks);
+int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid);
 int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans,
                                     struct btrfs_root *root);
 int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans,
@@ -104,6 +109,9 @@ int btrfs_defrag_root(struct btrfs_root *root, int cacheonly);
 int btrfs_clean_old_snapshots(struct btrfs_root *root);
 int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
                             struct btrfs_root *root);
+int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
+                                  struct btrfs_root *root,
+                                  int wait_for_unblock);
 int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans,
                                   struct btrfs_root *root);
 int btrfs_should_end_transaction(struct btrfs_trans_handle *trans,
index f7ac8e013ed73459139ae07b00189ffa952bba08..992ab425599d10bfe0b32e866b9c4bd366a1a9b9 100644 (file)
@@ -36,7 +36,6 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
        int ret = 0;
        int wret;
        int level;
-       int orig_level;
        int is_extent = 0;
        int next_key_ret = 0;
        u64 last_ret = 0;
@@ -64,7 +63,6 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
                return -ENOMEM;
 
        level = btrfs_header_level(root->node);
-       orig_level = level;
 
        if (level == 0)
                goto out;
index fb102a9aee9cc1ee3213d04bc1bae6fec6644f2d..a29f19384a27bc91526d46fb3cbefdbe2bd8a5ce 100644 (file)
@@ -786,7 +786,6 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
 {
        struct inode *dir;
        int ret;
-       struct btrfs_key location;
        struct btrfs_inode_ref *ref;
        struct btrfs_dir_item *di;
        struct inode *inode;
@@ -795,10 +794,6 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
        unsigned long ref_ptr;
        unsigned long ref_end;
 
-       location.objectid = key->objectid;
-       location.type = BTRFS_INODE_ITEM_KEY;
-       location.offset = 0;
-
        /*
         * it is possible that we didn't log all the parent directories
         * for a given inode.  If we don't find the dir, just don't
@@ -1583,7 +1578,6 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
        struct btrfs_path *path;
        struct btrfs_root *root = wc->replay_dest;
        struct btrfs_key key;
-       u32 item_size;
        int level;
        int i;
        int ret;
@@ -1601,7 +1595,6 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
        nritems = btrfs_header_nritems(eb);
        for (i = 0; i < nritems; i++) {
                btrfs_item_key_to_cpu(eb, &key, i);
-               item_size = btrfs_item_size_nr(eb, i);
 
                /* inode keys are done during the first stage */
                if (key.type == BTRFS_INODE_ITEM_KEY &&
@@ -1668,7 +1661,6 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
                                   struct walk_control *wc)
 {
        u64 root_owner;
-       u64 root_gen;
        u64 bytenr;
        u64 ptr_gen;
        struct extent_buffer *next;
@@ -1698,7 +1690,6 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
 
                parent = path->nodes[*level];
                root_owner = btrfs_header_owner(parent);
-               root_gen = btrfs_header_generation(parent);
 
                next = btrfs_find_create_tree_block(root, bytenr, blocksize);
 
@@ -1749,7 +1740,6 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans,
                                 struct walk_control *wc)
 {
        u64 root_owner;
-       u64 root_gen;
        int i;
        int slot;
        int ret;
@@ -1757,8 +1747,6 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans,
        for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
                slot = path->slots[i];
                if (slot + 1 < btrfs_header_nritems(path->nodes[i])) {
-                       struct extent_buffer *node;
-                       node = path->nodes[i];
                        path->slots[i]++;
                        *level = i;
                        WARN_ON(*level == 0);
@@ -1771,7 +1759,6 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans,
                                parent = path->nodes[*level + 1];
 
                        root_owner = btrfs_header_owner(parent);
-                       root_gen = btrfs_header_generation(parent);
                        wc->process_func(root, path->nodes[*level], wc,
                                 btrfs_header_generation(path->nodes[*level]));
                        if (wc->free) {
@@ -2273,7 +2260,7 @@ fail:
        }
        btrfs_end_log_trans(root);
 
-       return 0;
+       return err;
 }
 
 /* see comments for btrfs_del_dir_entries_in_log */
@@ -2729,7 +2716,6 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
        struct btrfs_key max_key;
        struct btrfs_root *log = root->log_root;
        struct extent_buffer *src = NULL;
-       u32 size;
        int err = 0;
        int ret;
        int nritems;
@@ -2793,7 +2779,6 @@ again:
                        break;
 
                src = path->nodes[0];
-               size = btrfs_item_size_nr(src, path->slots[0]);
                if (ins_nr && ins_start_slot + ins_nr == path->slots[0]) {
                        ins_nr++;
                        goto next_slot;
index e25e46a8b4e2540639f5d1dbcfbbb71c955f01e6..cc04dc1445d64da8694401c2cbc2fde27105b8f9 100644 (file)
@@ -1898,7 +1898,6 @@ int btrfs_balance(struct btrfs_root *dev_root)
        u64 size_to_free;
        struct btrfs_path *path;
        struct btrfs_key key;
-       struct btrfs_chunk *chunk;
        struct btrfs_root *chunk_root = dev_root->fs_info->chunk_root;
        struct btrfs_trans_handle *trans;
        struct btrfs_key found_key;
@@ -1962,9 +1961,6 @@ int btrfs_balance(struct btrfs_root *dev_root)
                if (found_key.objectid != key.objectid)
                        break;
 
-               chunk = btrfs_item_ptr(path->nodes[0],
-                                      path->slots[0],
-                                      struct btrfs_chunk);
                /* chunk zero is special */
                if (found_key.offset == 0)
                        break;
@@ -3031,8 +3027,7 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
                }
                bio->bi_sector = multi->stripes[dev_nr].physical >> 9;
                dev = multi->stripes[dev_nr].dev;
-               BUG_ON(rw == WRITE && !dev->writeable);
-               if (dev && dev->bdev) {
+               if (dev && dev->bdev && (rw != WRITE || dev->writeable)) {
                        bio->bi_bdev = dev->bdev;
                        if (async_submit)
                                schedule_bio(root, dev, rw, bio);
index 88ecbb215878ae3573a5bbedfc4afadc6ef0a73d..698fdd2c739c9286e549c290983ed1f08138ef39 100644 (file)
@@ -178,7 +178,6 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size)
        struct inode *inode = dentry->d_inode;
        struct btrfs_root *root = BTRFS_I(inode)->root;
        struct btrfs_path *path;
-       struct btrfs_item *item;
        struct extent_buffer *leaf;
        struct btrfs_dir_item *di;
        int ret = 0, slot, advance;
@@ -234,7 +233,6 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size)
                }
                advance = 1;
 
-               item = btrfs_item_nr(leaf, slot);
                btrfs_item_key_to_cpu(leaf, &found_key, slot);
 
                /* check to make sure this item is what we want */
index 3e2b90eaa23945415d56cbd0edfec6848e5dd2d6..b9cd5445f71cac0c6b2d565f80d065091d65fa4b 100644 (file)
@@ -199,8 +199,6 @@ int btrfs_zlib_compress_pages(struct address_space *mapping,
        int nr_pages = 0;
        struct page *in_page = NULL;
        struct page *out_page = NULL;
-       int out_written = 0;
-       int in_read = 0;
        unsigned long bytes_left;
 
        *out_pages = 0;
@@ -233,9 +231,6 @@ int btrfs_zlib_compress_pages(struct address_space *mapping,
        workspace->def_strm.avail_out = PAGE_CACHE_SIZE;
        workspace->def_strm.avail_in = min(len, PAGE_CACHE_SIZE);
 
-       out_written = 0;
-       in_read = 0;
-
        while (workspace->def_strm.total_in < len) {
                ret = zlib_deflate(&workspace->def_strm, Z_SYNC_FLUSH);
                if (ret != Z_OK) {
index f027382b54bef3b7c125c090faaa9e7b87f2bb9c..3d06ccc953aafaa8f590528015953e2e53a2c0c9 100644 (file)
@@ -1081,30 +1081,42 @@ static void wait_sb_inodes(struct super_block *sb)
 }
 
 /**
- * writeback_inodes_sb -       writeback dirty inodes from given super_block
+ * writeback_inodes_sb_nr -    writeback dirty inodes from given super_block
  * @sb: the superblock
+ * @nr: the number of pages to write
  *
  * Start writeback on some inodes on this super_block. No guarantees are made
  * on how many (if any) will be written, and this function does not wait
- * for IO completion of submitted IO. The number of pages submitted is
- * returned.
+ * for IO completion of submitted IO.
  */
-void writeback_inodes_sb(struct super_block *sb)
+void writeback_inodes_sb_nr(struct super_block *sb, unsigned long nr)
 {
        DECLARE_COMPLETION_ONSTACK(done);
        struct wb_writeback_work work = {
                .sb             = sb,
                .sync_mode      = WB_SYNC_NONE,
                .done           = &done,
+               .nr_pages       = nr,
        };
 
        WARN_ON(!rwsem_is_locked(&sb->s_umount));
-
-       work.nr_pages = get_nr_dirty_pages();
-
        bdi_queue_work(sb->s_bdi, &work);
        wait_for_completion(&done);
 }
+EXPORT_SYMBOL(writeback_inodes_sb_nr);
+
+/**
+ * writeback_inodes_sb -       writeback dirty inodes from given super_block
+ * @sb: the superblock
+ *
+ * Start writeback on some inodes on this super_block. No guarantees are made
+ * on how many (if any) will be written, and this function does not wait
+ * for IO completion of submitted IO.
+ */
+void writeback_inodes_sb(struct super_block *sb)
+{
+       return writeback_inodes_sb_nr(sb, get_nr_dirty_pages());
+}
 EXPORT_SYMBOL(writeback_inodes_sb);
 
 /**
@@ -1126,6 +1138,27 @@ int writeback_inodes_sb_if_idle(struct super_block *sb)
 }
 EXPORT_SYMBOL(writeback_inodes_sb_if_idle);
 
+/**
+ * writeback_inodes_sb_if_idle -       start writeback if none underway
+ * @sb: the superblock
+ * @nr: the number of pages to write
+ *
+ * Invoke writeback_inodes_sb if no writeback is currently underway.
+ * Returns 1 if writeback was started, 0 if not.
+ */
+int writeback_inodes_sb_nr_if_idle(struct super_block *sb,
+                                  unsigned long nr)
+{
+       if (!writeback_in_progress(sb->s_bdi)) {
+               down_read(&sb->s_umount);
+               writeback_inodes_sb_nr(sb, nr);
+               up_read(&sb->s_umount);
+               return 1;
+       } else
+               return 0;
+}
+EXPORT_SYMBOL(writeback_inodes_sb_nr_if_idle);
+
 /**
  * sync_inodes_sb      -       sync sb inode pages
  * @sb: the superblock
index 09eec350054d0259fb36f2e1b1447f6b5a0bde72..0ead399e08b5cfc549d1debf26f8e7915c7c9904 100644 (file)
@@ -58,7 +58,9 @@ struct writeback_control {
 struct bdi_writeback;
 int inode_wait(void *);
 void writeback_inodes_sb(struct super_block *);
+void writeback_inodes_sb_nr(struct super_block *, unsigned long nr);
 int writeback_inodes_sb_if_idle(struct super_block *);
+int writeback_inodes_sb_nr_if_idle(struct super_block *, unsigned long nr);
 void sync_inodes_sb(struct super_block *);
 void writeback_inodes_wb(struct bdi_writeback *wb,
                struct writeback_control *wbc);