Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs...
authorLinus Torvalds <torvalds@linux-foundation.org>
Wed, 27 Jul 2011 23:43:52 +0000 (16:43 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Wed, 27 Jul 2011 23:43:52 +0000 (16:43 -0700)
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable:
  Btrfs: make sure reserve_metadata_bytes doesn't leak out strange errors
  Btrfs: use the commit_root for reading free_space_inode crcs
  Btrfs: reduce extent_state lock contention for metadata
  Btrfs: remove lockdep magic from btrfs_next_leaf
  Btrfs: make a lockdep class for each root
  Btrfs: switch the btrfs tree locks to reader/writer
  Btrfs: fix deadlock when throttling transactions
  Btrfs: stop using highmem for extent_buffers
  Btrfs: fix BUG_ON() caused by ENOSPC when relocating space
  Btrfs: tag pages for writeback in sync
  Btrfs: fix enospc problems with delalloc
  Btrfs: don't flush delalloc arbitrarily
  Btrfs: use find_or_create_page instead of grab_cache_page
  Btrfs: use a worker thread to do caching
  Btrfs: fix how we merge extent states and deal with cached states
  Btrfs: use the normal checksumming infrastructure for free space cache
  Btrfs: serialize flushers in reserve_metadata_bytes
  Btrfs: do transaction space reservation before joining the transaction
  Btrfs: try to only do one btrfs_search_slot in do_setxattr

23 files changed:
fs/btrfs/btrfs_inode.h
fs/btrfs/ctree.c
fs/btrfs/ctree.h
fs/btrfs/delayed-inode.c
fs/btrfs/dir-item.c
fs/btrfs/disk-io.c
fs/btrfs/disk-io.h
fs/btrfs/extent-tree.c
fs/btrfs/extent_io.c
fs/btrfs/extent_io.h
fs/btrfs/file-item.c
fs/btrfs/file.c
fs/btrfs/free-space-cache.c
fs/btrfs/inode.c
fs/btrfs/ioctl.c
fs/btrfs/locking.c
fs/btrfs/locking.h
fs/btrfs/relocation.c
fs/btrfs/struct-funcs.c
fs/btrfs/transaction.c
fs/btrfs/tree-log.c
fs/btrfs/volumes.c
fs/btrfs/xattr.c

index 52d7eca8c7bfe9d599e60cd0918b36c7f5fefe42..502b9e98867949736b03d5bbf55b14b7d67205e7 100644 (file)
@@ -34,6 +34,9 @@ struct btrfs_inode {
         */
        struct btrfs_key location;
 
+       /* Lock for counters */
+       spinlock_t lock;
+
        /* the extent_tree has caches of all the extent mappings to disk */
        struct extent_map_tree extent_tree;
 
@@ -134,8 +137,8 @@ struct btrfs_inode {
         * items we think we'll end up using, and reserved_extents is the number
         * of extent items we've reserved metadata for.
         */
-       atomic_t outstanding_extents;
-       atomic_t reserved_extents;
+       unsigned outstanding_extents;
+       unsigned reserved_extents;
 
        /*
         * ordered_data_close is set by truncate when a file that used
@@ -184,4 +187,13 @@ static inline void btrfs_i_size_write(struct inode *inode, u64 size)
        BTRFS_I(inode)->disk_i_size = size;
 }
 
+static inline bool btrfs_is_free_space_inode(struct btrfs_root *root,
+                                      struct inode *inode)
+{
+       if (root == root->fs_info->tree_root ||
+           BTRFS_I(inode)->location.objectid == BTRFS_FREE_INO_OBJECTID)
+               return true;
+       return false;
+}
+
 #endif
index 2e667868e0d2b8b75649572d10602ac56679a0d0..011cab3aca8d9ffeba2690f642badec7e36471c9 100644 (file)
@@ -54,8 +54,13 @@ noinline void btrfs_set_path_blocking(struct btrfs_path *p)
 {
        int i;
        for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
-               if (p->nodes[i] && p->locks[i])
-                       btrfs_set_lock_blocking(p->nodes[i]);
+               if (!p->nodes[i] || !p->locks[i])
+                       continue;
+               btrfs_set_lock_blocking_rw(p->nodes[i], p->locks[i]);
+               if (p->locks[i] == BTRFS_READ_LOCK)
+                       p->locks[i] = BTRFS_READ_LOCK_BLOCKING;
+               else if (p->locks[i] == BTRFS_WRITE_LOCK)
+                       p->locks[i] = BTRFS_WRITE_LOCK_BLOCKING;
        }
 }
 
@@ -68,7 +73,7 @@ noinline void btrfs_set_path_blocking(struct btrfs_path *p)
  * for held
  */
 noinline void btrfs_clear_path_blocking(struct btrfs_path *p,
-                                       struct extent_buffer *held)
+                                       struct extent_buffer *held, int held_rw)
 {
        int i;
 
@@ -79,19 +84,29 @@ noinline void btrfs_clear_path_blocking(struct btrfs_path *p,
         * really sure by forcing the path to blocking before we clear
         * the path blocking.
         */
-       if (held)
-               btrfs_set_lock_blocking(held);
+       if (held) {
+               btrfs_set_lock_blocking_rw(held, held_rw);
+               if (held_rw == BTRFS_WRITE_LOCK)
+                       held_rw = BTRFS_WRITE_LOCK_BLOCKING;
+               else if (held_rw == BTRFS_READ_LOCK)
+                       held_rw = BTRFS_READ_LOCK_BLOCKING;
+       }
        btrfs_set_path_blocking(p);
 #endif
 
        for (i = BTRFS_MAX_LEVEL - 1; i >= 0; i--) {
-               if (p->nodes[i] && p->locks[i])
-                       btrfs_clear_lock_blocking(p->nodes[i]);
+               if (p->nodes[i] && p->locks[i]) {
+                       btrfs_clear_lock_blocking_rw(p->nodes[i], p->locks[i]);
+                       if (p->locks[i] == BTRFS_WRITE_LOCK_BLOCKING)
+                               p->locks[i] = BTRFS_WRITE_LOCK;
+                       else if (p->locks[i] == BTRFS_READ_LOCK_BLOCKING)
+                               p->locks[i] = BTRFS_READ_LOCK;
+               }
        }
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
        if (held)
-               btrfs_clear_lock_blocking(held);
+               btrfs_clear_lock_blocking_rw(held, held_rw);
 #endif
 }
 
@@ -119,7 +134,7 @@ noinline void btrfs_release_path(struct btrfs_path *p)
                if (!p->nodes[i])
                        continue;
                if (p->locks[i]) {
-                       btrfs_tree_unlock(p->nodes[i]);
+                       btrfs_tree_unlock_rw(p->nodes[i], p->locks[i]);
                        p->locks[i] = 0;
                }
                free_extent_buffer(p->nodes[i]);
@@ -167,6 +182,25 @@ struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root)
        return eb;
 }
 
+/* loop around taking references on and locking the root node of the
+ * tree until you end up with a lock on the root.  A locked buffer
+ * is returned, with a reference held.
+ */
+struct extent_buffer *btrfs_read_lock_root_node(struct btrfs_root *root)
+{
+       struct extent_buffer *eb;
+
+       while (1) {
+               eb = btrfs_root_node(root);
+               btrfs_tree_read_lock(eb);
+               if (eb == root->node)
+                       break;
+               btrfs_tree_read_unlock(eb);
+               free_extent_buffer(eb);
+       }
+       return eb;
+}
+
 /* cowonly root (everything not a reference counted cow subvolume), just get
  * put onto a simple dirty list.  transaction.c walks this to make sure they
  * get properly updated on disk.
@@ -626,14 +660,6 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
        for (i = start_slot; i < end_slot; i++) {
                int close = 1;
 
-               if (!parent->map_token) {
-                       map_extent_buffer(parent,
-                                       btrfs_node_key_ptr_offset(i),
-                                       sizeof(struct btrfs_key_ptr),
-                                       &parent->map_token, &parent->kaddr,
-                                       &parent->map_start, &parent->map_len,
-                                       KM_USER1);
-               }
                btrfs_node_key(parent, &disk_key, i);
                if (!progress_passed && comp_keys(&disk_key, progress) < 0)
                        continue;
@@ -656,11 +682,6 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
                        last_block = blocknr;
                        continue;
                }
-               if (parent->map_token) {
-                       unmap_extent_buffer(parent, parent->map_token,
-                                           KM_USER1);
-                       parent->map_token = NULL;
-               }
 
                cur = btrfs_find_tree_block(root, blocknr, blocksize);
                if (cur)
@@ -701,11 +722,6 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
                btrfs_tree_unlock(cur);
                free_extent_buffer(cur);
        }
-       if (parent->map_token) {
-               unmap_extent_buffer(parent, parent->map_token,
-                                   KM_USER1);
-               parent->map_token = NULL;
-       }
        return err;
 }
 
@@ -746,7 +762,6 @@ static noinline int generic_bin_search(struct extent_buffer *eb,
        struct btrfs_disk_key *tmp = NULL;
        struct btrfs_disk_key unaligned;
        unsigned long offset;
-       char *map_token = NULL;
        char *kaddr = NULL;
        unsigned long map_start = 0;
        unsigned long map_len = 0;
@@ -756,18 +771,13 @@ static noinline int generic_bin_search(struct extent_buffer *eb,
                mid = (low + high) / 2;
                offset = p + mid * item_size;
 
-               if (!map_token || offset < map_start ||
+               if (!kaddr || offset < map_start ||
                    (offset + sizeof(struct btrfs_disk_key)) >
                    map_start + map_len) {
-                       if (map_token) {
-                               unmap_extent_buffer(eb, map_token, KM_USER0);
-                               map_token = NULL;
-                       }
 
                        err = map_private_extent_buffer(eb, offset,
                                                sizeof(struct btrfs_disk_key),
-                                               &map_token, &kaddr,
-                                               &map_start, &map_len, KM_USER0);
+                                               &kaddr, &map_start, &map_len);
 
                        if (!err) {
                                tmp = (struct btrfs_disk_key *)(kaddr + offset -
@@ -790,14 +800,10 @@ static noinline int generic_bin_search(struct extent_buffer *eb,
                        high = mid;
                else {
                        *slot = mid;
-                       if (map_token)
-                               unmap_extent_buffer(eb, map_token, KM_USER0);
                        return 0;
                }
        }
        *slot = low;
-       if (map_token)
-               unmap_extent_buffer(eb, map_token, KM_USER0);
        return 1;
 }
 
@@ -890,7 +896,8 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
 
        mid = path->nodes[level];
 
-       WARN_ON(!path->locks[level]);
+       WARN_ON(path->locks[level] != BTRFS_WRITE_LOCK &&
+               path->locks[level] != BTRFS_WRITE_LOCK_BLOCKING);
        WARN_ON(btrfs_header_generation(mid) != trans->transid);
 
        orig_ptr = btrfs_node_blockptr(mid, orig_slot);
@@ -1228,7 +1235,6 @@ static void reada_for_search(struct btrfs_root *root,
        u32 nr;
        u32 blocksize;
        u32 nscan = 0;
-       bool map = true;
 
        if (level != 1)
                return;
@@ -1250,19 +1256,8 @@ static void reada_for_search(struct btrfs_root *root,
 
        nritems = btrfs_header_nritems(node);
        nr = slot;
-       if (node->map_token || path->skip_locking)
-               map = false;
 
        while (1) {
-               if (map && !node->map_token) {
-                       unsigned long offset = btrfs_node_key_ptr_offset(nr);
-                       map_private_extent_buffer(node, offset,
-                                                 sizeof(struct btrfs_key_ptr),
-                                                 &node->map_token,
-                                                 &node->kaddr,
-                                                 &node->map_start,
-                                                 &node->map_len, KM_USER1);
-               }
                if (direction < 0) {
                        if (nr == 0)
                                break;
@@ -1281,11 +1276,6 @@ static void reada_for_search(struct btrfs_root *root,
                if ((search <= target && target - search <= 65536) ||
                    (search > target && search - target <= 65536)) {
                        gen = btrfs_node_ptr_generation(node, nr);
-                       if (map && node->map_token) {
-                               unmap_extent_buffer(node, node->map_token,
-                                                   KM_USER1);
-                               node->map_token = NULL;
-                       }
                        readahead_tree_block(root, search, blocksize, gen);
                        nread += blocksize;
                }
@@ -1293,10 +1283,6 @@ static void reada_for_search(struct btrfs_root *root,
                if ((nread > 65536 || nscan > 32))
                        break;
        }
-       if (map && node->map_token) {
-               unmap_extent_buffer(node, node->map_token, KM_USER1);
-               node->map_token = NULL;
-       }
 }
 
 /*
@@ -1409,7 +1395,7 @@ static noinline void unlock_up(struct btrfs_path *path, int level,
 
                t = path->nodes[i];
                if (i >= lowest_unlock && i > skip_level && path->locks[i]) {
-                       btrfs_tree_unlock(t);
+                       btrfs_tree_unlock_rw(t, path->locks[i]);
                        path->locks[i] = 0;
                }
        }
@@ -1436,7 +1422,7 @@ noinline void btrfs_unlock_up_safe(struct btrfs_path *path, int level)
                        continue;
                if (!path->locks[i])
                        continue;
-               btrfs_tree_unlock(path->nodes[i]);
+               btrfs_tree_unlock_rw(path->nodes[i], path->locks[i]);
                path->locks[i] = 0;
        }
 }
@@ -1485,6 +1471,8 @@ read_block_for_search(struct btrfs_trans_handle *trans,
                         * we can trust our generation number
                         */
                        free_extent_buffer(tmp);
+                       btrfs_set_path_blocking(p);
+
                        tmp = read_tree_block(root, blocknr, blocksize, gen);
                        if (tmp && btrfs_buffer_uptodate(tmp, gen)) {
                                *eb_ret = tmp;
@@ -1540,20 +1528,27 @@ read_block_for_search(struct btrfs_trans_handle *trans,
 static int
 setup_nodes_for_search(struct btrfs_trans_handle *trans,
                       struct btrfs_root *root, struct btrfs_path *p,
-                      struct extent_buffer *b, int level, int ins_len)
+                      struct extent_buffer *b, int level, int ins_len,
+                      int *write_lock_level)
 {
        int ret;
        if ((p->search_for_split || ins_len > 0) && btrfs_header_nritems(b) >=
            BTRFS_NODEPTRS_PER_BLOCK(root) - 3) {
                int sret;
 
+               if (*write_lock_level < level + 1) {
+                       *write_lock_level = level + 1;
+                       btrfs_release_path(p);
+                       goto again;
+               }
+
                sret = reada_for_balance(root, p, level);
                if (sret)
                        goto again;
 
                btrfs_set_path_blocking(p);
                sret = split_node(trans, root, p, level);
-               btrfs_clear_path_blocking(p, NULL);
+               btrfs_clear_path_blocking(p, NULL, 0);
 
                BUG_ON(sret > 0);
                if (sret) {
@@ -1565,13 +1560,19 @@ setup_nodes_for_search(struct btrfs_trans_handle *trans,
                   BTRFS_NODEPTRS_PER_BLOCK(root) / 2) {
                int sret;
 
+               if (*write_lock_level < level + 1) {
+                       *write_lock_level = level + 1;
+                       btrfs_release_path(p);
+                       goto again;
+               }
+
                sret = reada_for_balance(root, p, level);
                if (sret)
                        goto again;
 
                btrfs_set_path_blocking(p);
                sret = balance_level(trans, root, p, level);
-               btrfs_clear_path_blocking(p, NULL);
+               btrfs_clear_path_blocking(p, NULL, 0);
 
                if (sret) {
                        ret = sret;
@@ -1615,27 +1616,78 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
        int err;
        int level;
        int lowest_unlock = 1;
+       int root_lock;
+       /* everything at write_lock_level or lower must be write locked */
+       int write_lock_level = 0;
        u8 lowest_level = 0;
 
        lowest_level = p->lowest_level;
        WARN_ON(lowest_level && ins_len > 0);
        WARN_ON(p->nodes[0] != NULL);
 
-       if (ins_len < 0)
+       if (ins_len < 0) {
                lowest_unlock = 2;
 
+               /* when we are removing items, we might have to go up to level
+                * two as we update tree pointers  Make sure we keep write
+                * for those levels as well
+                */
+               write_lock_level = 2;
+       } else if (ins_len > 0) {
+               /*
+                * for inserting items, make sure we have a write lock on
+                * level 1 so we can update keys
+                */
+               write_lock_level = 1;
+       }
+
+       if (!cow)
+               write_lock_level = -1;
+
+       if (cow && (p->keep_locks || p->lowest_level))
+               write_lock_level = BTRFS_MAX_LEVEL;
+
 again:
+       /*
+        * we try very hard to do read locks on the root
+        */
+       root_lock = BTRFS_READ_LOCK;
+       level = 0;
        if (p->search_commit_root) {
+               /*
+                * the commit roots are read only
+                * so we always do read locks
+                */
                b = root->commit_root;
                extent_buffer_get(b);
+               level = btrfs_header_level(b);
                if (!p->skip_locking)
-                       btrfs_tree_lock(b);
+                       btrfs_tree_read_lock(b);
        } else {
-               if (p->skip_locking)
+               if (p->skip_locking) {
                        b = btrfs_root_node(root);
-               else
-                       b = btrfs_lock_root_node(root);
+                       level = btrfs_header_level(b);
+               } else {
+                       /* we don't know the level of the root node
+                        * until we actually have it read locked
+                        */
+                       b = btrfs_read_lock_root_node(root);
+                       level = btrfs_header_level(b);
+                       if (level <= write_lock_level) {
+                               /* whoops, must trade for write lock */
+                               btrfs_tree_read_unlock(b);
+                               free_extent_buffer(b);
+                               b = btrfs_lock_root_node(root);
+                               root_lock = BTRFS_WRITE_LOCK;
+
+                               /* the level might have changed, check again */
+                               level = btrfs_header_level(b);
+                       }
+               }
        }
+       p->nodes[level] = b;
+       if (!p->skip_locking)
+               p->locks[level] = root_lock;
 
        while (b) {
                level = btrfs_header_level(b);
@@ -1644,10 +1696,6 @@ again:
                 * setup the path here so we can release it under lock
                 * contention with the cow code
                 */
-               p->nodes[level] = b;
-               if (!p->skip_locking)
-                       p->locks[level] = 1;
-
                if (cow) {
                        /*
                         * if we don't really need to cow this block
@@ -1659,6 +1707,16 @@ again:
 
                        btrfs_set_path_blocking(p);
 
+                       /*
+                        * must have write locks on this node and the
+                        * parent
+                        */
+                       if (level + 1 > write_lock_level) {
+                               write_lock_level = level + 1;
+                               btrfs_release_path(p);
+                               goto again;
+                       }
+
                        err = btrfs_cow_block(trans, root, b,
                                              p->nodes[level + 1],
                                              p->slots[level + 1], &b);
@@ -1671,10 +1729,7 @@ cow_done:
                BUG_ON(!cow && ins_len);
 
                p->nodes[level] = b;
-               if (!p->skip_locking)
-                       p->locks[level] = 1;
-
-               btrfs_clear_path_blocking(p, NULL);
+               btrfs_clear_path_blocking(p, NULL, 0);
 
                /*
                 * we have a lock on b and as long as we aren't changing
@@ -1700,7 +1755,7 @@ cow_done:
                        }
                        p->slots[level] = slot;
                        err = setup_nodes_for_search(trans, root, p, b, level,
-                                                    ins_len);
+                                            ins_len, &write_lock_level);
                        if (err == -EAGAIN)
                                goto again;
                        if (err) {
@@ -1710,6 +1765,19 @@ cow_done:
                        b = p->nodes[level];
                        slot = p->slots[level];
 
+                       /*
+                        * slot 0 is special, if we change the key
+                        * we have to update the parent pointer
+                        * which means we must have a write lock
+                        * on the parent
+                        */
+                       if (slot == 0 && cow &&
+                           write_lock_level < level + 1) {
+                               write_lock_level = level + 1;
+                               btrfs_release_path(p);
+                               goto again;
+                       }
+
                        unlock_up(p, level, lowest_unlock);
 
                        if (level == lowest_level) {
@@ -1728,23 +1796,42 @@ cow_done:
                        }
 
                        if (!p->skip_locking) {
-                               btrfs_clear_path_blocking(p, NULL);
-                               err = btrfs_try_spin_lock(b);
-
-                               if (!err) {
-                                       btrfs_set_path_blocking(p);
-                                       btrfs_tree_lock(b);
-                                       btrfs_clear_path_blocking(p, b);
+                               level = btrfs_header_level(b);
+                               if (level <= write_lock_level) {
+                                       err = btrfs_try_tree_write_lock(b);
+                                       if (!err) {
+                                               btrfs_set_path_blocking(p);
+                                               btrfs_tree_lock(b);
+                                               btrfs_clear_path_blocking(p, b,
+                                                                 BTRFS_WRITE_LOCK);
+                                       }
+                                       p->locks[level] = BTRFS_WRITE_LOCK;
+                               } else {
+                                       err = btrfs_try_tree_read_lock(b);
+                                       if (!err) {
+                                               btrfs_set_path_blocking(p);
+                                               btrfs_tree_read_lock(b);
+                                               btrfs_clear_path_blocking(p, b,
+                                                                 BTRFS_READ_LOCK);
+                                       }
+                                       p->locks[level] = BTRFS_READ_LOCK;
                                }
+                               p->nodes[level] = b;
                        }
                } else {
                        p->slots[level] = slot;
                        if (ins_len > 0 &&
                            btrfs_leaf_free_space(root, b) < ins_len) {
+                               if (write_lock_level < 1) {
+                                       write_lock_level = 1;
+                                       btrfs_release_path(p);
+                                       goto again;
+                               }
+
                                btrfs_set_path_blocking(p);
                                err = split_leaf(trans, root, key,
                                                 p, ins_len, ret == 0);
-                               btrfs_clear_path_blocking(p, NULL);
+                               btrfs_clear_path_blocking(p, NULL, 0);
 
                                BUG_ON(err > 0);
                                if (err) {
@@ -2025,7 +2112,7 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
        add_root_to_dirty_list(root);
        extent_buffer_get(c);
        path->nodes[level] = c;
-       path->locks[level] = 1;
+       path->locks[level] = BTRFS_WRITE_LOCK;
        path->slots[level] = 0;
        return 0;
 }
@@ -2253,14 +2340,6 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
                if (path->slots[0] == i)
                        push_space += data_size;
 
-               if (!left->map_token) {
-                       map_extent_buffer(left, (unsigned long)item,
-                                       sizeof(struct btrfs_item),
-                                       &left->map_token, &left->kaddr,
-                                       &left->map_start, &left->map_len,
-                                       KM_USER1);
-               }
-
                this_item_size = btrfs_item_size(left, item);
                if (this_item_size + sizeof(*item) + push_space > free_space)
                        break;
@@ -2271,10 +2350,6 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
                        break;
                i--;
        }
-       if (left->map_token) {
-               unmap_extent_buffer(left, left->map_token, KM_USER1);
-               left->map_token = NULL;
-       }
 
        if (push_items == 0)
                goto out_unlock;
@@ -2316,21 +2391,10 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
        push_space = BTRFS_LEAF_DATA_SIZE(root);
        for (i = 0; i < right_nritems; i++) {
                item = btrfs_item_nr(right, i);
-               if (!right->map_token) {
-                       map_extent_buffer(right, (unsigned long)item,
-                                       sizeof(struct btrfs_item),
-                                       &right->map_token, &right->kaddr,
-                                       &right->map_start, &right->map_len,
-                                       KM_USER1);
-               }
                push_space -= btrfs_item_size(right, item);
                btrfs_set_item_offset(right, item, push_space);
        }
 
-       if (right->map_token) {
-               unmap_extent_buffer(right, right->map_token, KM_USER1);
-               right->map_token = NULL;
-       }
        left_nritems -= push_items;
        btrfs_set_header_nritems(left, left_nritems);
 
@@ -2467,13 +2531,6 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
 
        for (i = 0; i < nr; i++) {
                item = btrfs_item_nr(right, i);
-               if (!right->map_token) {
-                       map_extent_buffer(right, (unsigned long)item,
-                                       sizeof(struct btrfs_item),
-                                       &right->map_token, &right->kaddr,
-                                       &right->map_start, &right->map_len,
-                                       KM_USER1);
-               }
 
                if (!empty && push_items > 0) {
                        if (path->slots[0] < i)
@@ -2496,11 +2553,6 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
                push_space += this_item_size + sizeof(*item);
        }
 
-       if (right->map_token) {
-               unmap_extent_buffer(right, right->map_token, KM_USER1);
-               right->map_token = NULL;
-       }
-
        if (push_items == 0) {
                ret = 1;
                goto out;
@@ -2530,23 +2582,12 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
                u32 ioff;
 
                item = btrfs_item_nr(left, i);
-               if (!left->map_token) {
-                       map_extent_buffer(left, (unsigned long)item,
-                                       sizeof(struct btrfs_item),
-                                       &left->map_token, &left->kaddr,
-                                       &left->map_start, &left->map_len,
-                                       KM_USER1);
-               }
 
                ioff = btrfs_item_offset(left, item);
                btrfs_set_item_offset(left, item,
                      ioff - (BTRFS_LEAF_DATA_SIZE(root) - old_left_item_size));
        }
        btrfs_set_header_nritems(left, old_left_nritems + push_items);
-       if (left->map_token) {
-               unmap_extent_buffer(left, left->map_token, KM_USER1);
-               left->map_token = NULL;
-       }
 
        /* fixup right node */
        if (push_items > right_nritems) {
@@ -2574,21 +2615,9 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
        for (i = 0; i < right_nritems; i++) {
                item = btrfs_item_nr(right, i);
 
-               if (!right->map_token) {
-                       map_extent_buffer(right, (unsigned long)item,
-                                       sizeof(struct btrfs_item),
-                                       &right->map_token, &right->kaddr,
-                                       &right->map_start, &right->map_len,
-                                       KM_USER1);
-               }
-
                push_space = push_space - btrfs_item_size(right, item);
                btrfs_set_item_offset(right, item, push_space);
        }
-       if (right->map_token) {
-               unmap_extent_buffer(right, right->map_token, KM_USER1);
-               right->map_token = NULL;
-       }
 
        btrfs_mark_buffer_dirty(left);
        if (right_nritems)
@@ -2729,23 +2758,10 @@ static noinline int copy_for_split(struct btrfs_trans_handle *trans,
                struct btrfs_item *item = btrfs_item_nr(right, i);
                u32 ioff;
 
-               if (!right->map_token) {
-                       map_extent_buffer(right, (unsigned long)item,
-                                       sizeof(struct btrfs_item),
-                                       &right->map_token, &right->kaddr,
-                                       &right->map_start, &right->map_len,
-                                       KM_USER1);
-               }
-
                ioff = btrfs_item_offset(right, item);
                btrfs_set_item_offset(right, item, ioff + rt_data_off);
        }
 
-       if (right->map_token) {
-               unmap_extent_buffer(right, right->map_token, KM_USER1);
-               right->map_token = NULL;
-       }
-
        btrfs_set_header_nritems(l, mid);
        ret = 0;
        btrfs_item_key(right, &disk_key, 0);
@@ -3264,23 +3280,10 @@ int btrfs_truncate_item(struct btrfs_trans_handle *trans,
                u32 ioff;
                item = btrfs_item_nr(leaf, i);
 
-               if (!leaf->map_token) {
-                       map_extent_buffer(leaf, (unsigned long)item,
-                                       sizeof(struct btrfs_item),
-                                       &leaf->map_token, &leaf->kaddr,
-                                       &leaf->map_start, &leaf->map_len,
-                                       KM_USER1);
-               }
-
                ioff = btrfs_item_offset(leaf, item);
                btrfs_set_item_offset(leaf, item, ioff + size_diff);
        }
 
-       if (leaf->map_token) {
-               unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
-               leaf->map_token = NULL;
-       }
-
        /* shift the data */
        if (from_end) {
                memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
@@ -3377,22 +3380,10 @@ int btrfs_extend_item(struct btrfs_trans_handle *trans,
                u32 ioff;
                item = btrfs_item_nr(leaf, i);
 
-               if (!leaf->map_token) {
-                       map_extent_buffer(leaf, (unsigned long)item,
-                                       sizeof(struct btrfs_item),
-                                       &leaf->map_token, &leaf->kaddr,
-                                       &leaf->map_start, &leaf->map_len,
-                                       KM_USER1);
-               }
                ioff = btrfs_item_offset(leaf, item);
                btrfs_set_item_offset(leaf, item, ioff - data_size);
        }
 
-       if (leaf->map_token) {
-               unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
-               leaf->map_token = NULL;
-       }
-
        /* shift the data */
        memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
                      data_end - data_size, btrfs_leaf_data(leaf) +
@@ -3494,27 +3485,13 @@ int btrfs_insert_some_items(struct btrfs_trans_handle *trans,
                 * item0..itemN ... dataN.offset..dataN.size .. data0.size
                 */
                /* first correct the data pointers */
-               WARN_ON(leaf->map_token);
                for (i = slot; i < nritems; i++) {
                        u32 ioff;
 
                        item = btrfs_item_nr(leaf, i);
-                       if (!leaf->map_token) {
-                               map_extent_buffer(leaf, (unsigned long)item,
-                                       sizeof(struct btrfs_item),
-                                       &leaf->map_token, &leaf->kaddr,
-                                       &leaf->map_start, &leaf->map_len,
-                                       KM_USER1);
-                       }
-
                        ioff = btrfs_item_offset(leaf, item);
                        btrfs_set_item_offset(leaf, item, ioff - total_data);
                }
-               if (leaf->map_token) {
-                       unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
-                       leaf->map_token = NULL;
-               }
-
                /* shift the items */
                memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr),
                              btrfs_item_nr_offset(slot),
@@ -3608,27 +3585,13 @@ int setup_items_for_insert(struct btrfs_trans_handle *trans,
                 * item0..itemN ... dataN.offset..dataN.size .. data0.size
                 */
                /* first correct the data pointers */
-               WARN_ON(leaf->map_token);
                for (i = slot; i < nritems; i++) {
                        u32 ioff;
 
                        item = btrfs_item_nr(leaf, i);
-                       if (!leaf->map_token) {
-                               map_extent_buffer(leaf, (unsigned long)item,
-                                       sizeof(struct btrfs_item),
-                                       &leaf->map_token, &leaf->kaddr,
-                                       &leaf->map_start, &leaf->map_len,
-                                       KM_USER1);
-                       }
-
                        ioff = btrfs_item_offset(leaf, item);
                        btrfs_set_item_offset(leaf, item, ioff - total_data);
                }
-               if (leaf->map_token) {
-                       unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
-                       leaf->map_token = NULL;
-               }
-
                /* shift the items */
                memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr),
                              btrfs_item_nr_offset(slot),
@@ -3840,22 +3803,10 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
                        u32 ioff;
 
                        item = btrfs_item_nr(leaf, i);
-                       if (!leaf->map_token) {
-                               map_extent_buffer(leaf, (unsigned long)item,
-                                       sizeof(struct btrfs_item),
-                                       &leaf->map_token, &leaf->kaddr,
-                                       &leaf->map_start, &leaf->map_len,
-                                       KM_USER1);
-                       }
                        ioff = btrfs_item_offset(leaf, item);
                        btrfs_set_item_offset(leaf, item, ioff + dsize);
                }
 
-               if (leaf->map_token) {
-                       unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
-                       leaf->map_token = NULL;
-               }
-
                memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot),
                              btrfs_item_nr_offset(slot + nr),
                              sizeof(struct btrfs_item) *
@@ -4004,11 +3955,11 @@ int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key,
 
        WARN_ON(!path->keep_locks);
 again:
-       cur = btrfs_lock_root_node(root);
+       cur = btrfs_read_lock_root_node(root);
        level = btrfs_header_level(cur);
        WARN_ON(path->nodes[level]);
        path->nodes[level] = cur;
-       path->locks[level] = 1;
+       path->locks[level] = BTRFS_READ_LOCK;
 
        if (btrfs_header_generation(cur) < min_trans) {
                ret = 1;
@@ -4098,12 +4049,12 @@ find_next_key:
                cur = read_node_slot(root, cur, slot);
                BUG_ON(!cur);
 
-               btrfs_tree_lock(cur);
+               btrfs_tree_read_lock(cur);
 
-               path->locks[level - 1] = 1;
+               path->locks[level - 1] = BTRFS_READ_LOCK;
                path->nodes[level - 1] = cur;
                unlock_up(path, level, 1);
-               btrfs_clear_path_blocking(path, NULL);
+               btrfs_clear_path_blocking(path, NULL, 0);
        }
 out:
        if (ret == 0)
@@ -4218,30 +4169,21 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
        u32 nritems;
        int ret;
        int old_spinning = path->leave_spinning;
-       int force_blocking = 0;
+       int next_rw_lock = 0;
 
        nritems = btrfs_header_nritems(path->nodes[0]);
        if (nritems == 0)
                return 1;
 
-       /*
-        * we take the blocks in an order that upsets lockdep.  Using
-        * blocking mode is the only way around it.
-        */
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-       force_blocking = 1;
-#endif
-
        btrfs_item_key_to_cpu(path->nodes[0], &key, nritems - 1);
 again:
        level = 1;
        next = NULL;
+       next_rw_lock = 0;
        btrfs_release_path(path);
 
        path->keep_locks = 1;
-
-       if (!force_blocking)
-               path->leave_spinning = 1;
+       path->leave_spinning = 1;
 
        ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
        path->keep_locks = 0;
@@ -4281,11 +4223,12 @@ again:
                }
 
                if (next) {
-                       btrfs_tree_unlock(next);
+                       btrfs_tree_unlock_rw(next, next_rw_lock);
                        free_extent_buffer(next);
                }
 
                next = c;
+               next_rw_lock = path->locks[level];
                ret = read_block_for_search(NULL, root, path, &next, level,
                                            slot, &key);
                if (ret == -EAGAIN)
@@ -4297,15 +4240,14 @@ again:
                }
 
                if (!path->skip_locking) {
-                       ret = btrfs_try_spin_lock(next);
+                       ret = btrfs_try_tree_read_lock(next);
                        if (!ret) {
                                btrfs_set_path_blocking(path);
-                               btrfs_tree_lock(next);
-                               if (!force_blocking)
-                                       btrfs_clear_path_blocking(path, next);
+                               btrfs_tree_read_lock(next);
+                               btrfs_clear_path_blocking(path, next,
+                                                         BTRFS_READ_LOCK);
                        }
-                       if (force_blocking)
-                               btrfs_set_lock_blocking(next);
+                       next_rw_lock = BTRFS_READ_LOCK;
                }
                break;
        }
@@ -4314,14 +4256,13 @@ again:
                level--;
                c = path->nodes[level];
                if (path->locks[level])
-                       btrfs_tree_unlock(c);
+                       btrfs_tree_unlock_rw(c, path->locks[level]);
 
                free_extent_buffer(c);
                path->nodes[level] = next;
                path->slots[level] = 0;
                if (!path->skip_locking)
-                       path->locks[level] = 1;
-
+                       path->locks[level] = next_rw_lock;
                if (!level)
                        break;
 
@@ -4336,16 +4277,14 @@ again:
                }
 
                if (!path->skip_locking) {
-                       btrfs_assert_tree_locked(path->nodes[level]);
-                       ret = btrfs_try_spin_lock(next);
+                       ret = btrfs_try_tree_read_lock(next);
                        if (!ret) {
                                btrfs_set_path_blocking(path);
-                               btrfs_tree_lock(next);
-                               if (!force_blocking)
-                                       btrfs_clear_path_blocking(path, next);
+                               btrfs_tree_read_lock(next);
+                               btrfs_clear_path_blocking(path, next,
+                                                         BTRFS_READ_LOCK);
                        }
-                       if (force_blocking)
-                               btrfs_set_lock_blocking(next);
+                       next_rw_lock = BTRFS_READ_LOCK;
                }
        }
        ret = 0;
index fe9287b064969d503e0e2c44f8c7446a8767ce9b..365c4e1dde04968379b86568dbb129bd6f3f0dbc 100644 (file)
@@ -755,6 +755,8 @@ struct btrfs_space_info {
                                   chunks for this space */
        unsigned int chunk_alloc:1;     /* set if we are allocating a chunk */
 
+       unsigned int flush:1;           /* set if we are trying to make space */
+
        unsigned int force_alloc;       /* set if we need to force a chunk
                                           alloc for this space */
 
@@ -764,7 +766,7 @@ struct btrfs_space_info {
        struct list_head block_groups[BTRFS_NR_RAID_TYPES];
        spinlock_t lock;
        struct rw_semaphore groups_sem;
-       atomic_t caching_threads;
+       wait_queue_head_t wait;
 };
 
 struct btrfs_block_rsv {
@@ -824,6 +826,7 @@ struct btrfs_caching_control {
        struct list_head list;
        struct mutex mutex;
        wait_queue_head_t wait;
+       struct btrfs_work work;
        struct btrfs_block_group_cache *block_group;
        u64 progress;
        atomic_t count;
@@ -1032,6 +1035,8 @@ struct btrfs_fs_info {
        struct btrfs_workers endio_write_workers;
        struct btrfs_workers endio_freespace_worker;
        struct btrfs_workers submit_workers;
+       struct btrfs_workers caching_workers;
+
        /*
         * fixup workers take dirty pages that didn't properly go through
         * the cow mechanism and make them safe to write.  It happens
@@ -2128,7 +2133,7 @@ static inline bool btrfs_mixed_space_info(struct btrfs_space_info *space_info)
 
 /* extent-tree.c */
 static inline u64 btrfs_calc_trans_metadata_size(struct btrfs_root *root,
-                                                int num_items)
+                                                unsigned num_items)
 {
        return (root->leafsize + root->nodesize * (BTRFS_MAX_LEVEL - 1)) *
                3 * num_items;
@@ -2222,9 +2227,6 @@ void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *ionde);
 void btrfs_clear_space_info_full(struct btrfs_fs_info *info);
 int btrfs_check_data_free_space(struct inode *inode, u64 bytes);
 void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes);
-int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans,
-                               struct btrfs_root *root,
-                               int num_items);
 void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans,
                                struct btrfs_root *root);
 int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans,
@@ -2330,7 +2332,7 @@ struct btrfs_path *btrfs_alloc_path(void);
 void btrfs_free_path(struct btrfs_path *p);
 void btrfs_set_path_blocking(struct btrfs_path *p);
 void btrfs_clear_path_blocking(struct btrfs_path *p,
-                              struct extent_buffer *held);
+                              struct extent_buffer *held, int held_rw);
 void btrfs_unlock_up_safe(struct btrfs_path *p, int level);
 
 int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
index 98c68e658a9b2eb08a8ba99d0b5691f16fc8a67d..b52c672f4c180beae28f60302071731bd343dcba 100644 (file)
@@ -735,7 +735,7 @@ static int btrfs_batch_insert_items(struct btrfs_trans_handle *trans,
        }
 
        /* reset all the locked nodes in the patch to spinning locks. */
-       btrfs_clear_path_blocking(path, NULL);
+       btrfs_clear_path_blocking(path, NULL, 0);
 
        /* insert the keys of the items */
        ret = setup_items_for_insert(trans, root, path, keys, data_size,
index 685f2593c4f049559a087cec8b88daa04a0d357d..c360a848d97fb6115c58a41bb45aeef611c4c86f 100644 (file)
@@ -89,13 +89,8 @@ int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans,
        data_size = sizeof(*dir_item) + name_len + data_len;
        dir_item = insert_with_overflow(trans, root, path, &key, data_size,
                                        name, name_len);
-       /*
-        * FIXME: at some point we should handle xattr's that are larger than
-        * what we can fit in our leaf.  We set location to NULL b/c we arent
-        * pointing at anything else, that will change if we store the xattr
-        * data in a separate inode.
-        */
-       BUG_ON(IS_ERR(dir_item));
+       if (IS_ERR(dir_item))
+               return PTR_ERR(dir_item);
        memset(&location, 0, sizeof(location));
 
        leaf = path->nodes[0];
index b231ae13b2697ebcd5b88d848a81ddc83e01c990..07b3ac662e193e6b3a19180c3c3c9bfccbeb8028 100644 (file)
@@ -100,38 +100,83 @@ struct async_submit_bio {
        struct btrfs_work work;
 };
 
-/* These are used to set the lockdep class on the extent buffer locks.
- * The class is set by the readpage_end_io_hook after the buffer has
- * passed csum validation but before the pages are unlocked.
+/*
+ * Lockdep class keys for extent_buffer->lock's in this root.  For a given
+ * eb, the lockdep key is determined by the btrfs_root it belongs to and
+ * the level the eb occupies in the tree.
+ *
+ * Different roots are used for different purposes and may nest inside each
+ * other and they require separate keysets.  As lockdep keys should be
+ * static, assign keysets according to the purpose of the root as indicated
+ * by btrfs_root->objectid.  This ensures that all special purpose roots
+ * have separate keysets.
  *
- * The lockdep class is also set by btrfs_init_new_buffer on freshly
- * allocated blocks.
+ * Lock-nesting across peer nodes is always done with the immediate parent
+ * node locked thus preventing deadlock.  As lockdep doesn't know this, use
+ * subclass to avoid triggering lockdep warning in such cases.
  *
- * The class is based on the level in the tree block, which allows lockdep
- * to know that lower nodes nest inside the locks of higher nodes.
+ * The key is set by the readpage_end_io_hook after the buffer has passed
+ * csum validation but before the pages are unlocked.  It is also set by
+ * btrfs_init_new_buffer on freshly allocated blocks.
  *
- * We also add a check to make sure the highest level of the tree is
- * the same as our lockdep setup here.  If BTRFS_MAX_LEVEL changes, this
- * code needs update as well.
+ * We also add a check to make sure the highest level of the tree is the
+ * same as our lockdep setup here.  If BTRFS_MAX_LEVEL changes, this code
+ * needs update as well.
  */
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 # if BTRFS_MAX_LEVEL != 8
 #  error
 # endif
-static struct lock_class_key btrfs_eb_class[BTRFS_MAX_LEVEL + 1];
-static const char *btrfs_eb_name[BTRFS_MAX_LEVEL + 1] = {
-       /* leaf */
-       "btrfs-extent-00",
-       "btrfs-extent-01",
-       "btrfs-extent-02",
-       "btrfs-extent-03",
-       "btrfs-extent-04",
-       "btrfs-extent-05",
-       "btrfs-extent-06",
-       "btrfs-extent-07",
-       /* highest possible level */
-       "btrfs-extent-08",
+
+static struct btrfs_lockdep_keyset {
+       u64                     id;             /* root objectid */
+       const char              *name_stem;     /* lock name stem */
+       char                    names[BTRFS_MAX_LEVEL + 1][20];
+       struct lock_class_key   keys[BTRFS_MAX_LEVEL + 1];
+} btrfs_lockdep_keysets[] = {
+       { .id = BTRFS_ROOT_TREE_OBJECTID,       .name_stem = "root"     },
+       { .id = BTRFS_EXTENT_TREE_OBJECTID,     .name_stem = "extent"   },
+       { .id = BTRFS_CHUNK_TREE_OBJECTID,      .name_stem = "chunk"    },
+       { .id = BTRFS_DEV_TREE_OBJECTID,        .name_stem = "dev"      },
+       { .id = BTRFS_FS_TREE_OBJECTID,         .name_stem = "fs"       },
+       { .id = BTRFS_CSUM_TREE_OBJECTID,       .name_stem = "csum"     },
+       { .id = BTRFS_ORPHAN_OBJECTID,          .name_stem = "orphan"   },
+       { .id = BTRFS_TREE_LOG_OBJECTID,        .name_stem = "log"      },
+       { .id = BTRFS_TREE_RELOC_OBJECTID,      .name_stem = "treloc"   },
+       { .id = BTRFS_DATA_RELOC_TREE_OBJECTID, .name_stem = "dreloc"   },
+       { .id = 0,                              .name_stem = "tree"     },
 };
+
+void __init btrfs_init_lockdep(void)
+{
+       int i, j;
+
+       /* initialize lockdep class names */
+       for (i = 0; i < ARRAY_SIZE(btrfs_lockdep_keysets); i++) {
+               struct btrfs_lockdep_keyset *ks = &btrfs_lockdep_keysets[i];
+
+               for (j = 0; j < ARRAY_SIZE(ks->names); j++)
+                       snprintf(ks->names[j], sizeof(ks->names[j]),
+                                "btrfs-%s-%02d", ks->name_stem, j);
+       }
+}
+
+void btrfs_set_buffer_lockdep_class(u64 objectid, struct extent_buffer *eb,
+                                   int level)
+{
+       struct btrfs_lockdep_keyset *ks;
+
+       BUG_ON(level >= ARRAY_SIZE(ks->keys));
+
+       /* find the matching keyset, id 0 is the default entry */
+       for (ks = btrfs_lockdep_keysets; ks->id; ks++)
+               if (ks->id == objectid)
+                       break;
+
+       lockdep_set_class_and_name(&eb->lock,
+                                  &ks->keys[level], ks->names[level]);
+}
+
 #endif
 
 /*
@@ -217,7 +262,6 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf,
        unsigned long len;
        unsigned long cur_len;
        unsigned long offset = BTRFS_CSUM_SIZE;
-       char *map_token = NULL;
        char *kaddr;
        unsigned long map_start;
        unsigned long map_len;
@@ -228,8 +272,7 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf,
        len = buf->len - offset;
        while (len > 0) {
                err = map_private_extent_buffer(buf, offset, 32,
-                                       &map_token, &kaddr,
-                                       &map_start, &map_len, KM_USER0);
+                                       &kaddr, &map_start, &map_len);
                if (err)
                        return 1;
                cur_len = min(len, map_len - (offset - map_start));
@@ -237,7 +280,6 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf,
                                      crc, cur_len);
                len -= cur_len;
                offset += cur_len;
-               unmap_extent_buffer(buf, map_token, KM_USER0);
        }
        if (csum_size > sizeof(inline_result)) {
                result = kzalloc(csum_size * sizeof(char), GFP_NOFS);
@@ -494,15 +536,6 @@ static noinline int check_leaf(struct btrfs_root *root,
        return 0;
 }
 
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-void btrfs_set_buffer_lockdep_class(struct extent_buffer *eb, int level)
-{
-       lockdep_set_class_and_name(&eb->lock,
-                          &btrfs_eb_class[level],
-                          btrfs_eb_name[level]);
-}
-#endif
-
 static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
                               struct extent_state *state)
 {
@@ -553,7 +586,8 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
        }
        found_level = btrfs_header_level(eb);
 
-       btrfs_set_buffer_lockdep_class(eb, found_level);
+       btrfs_set_buffer_lockdep_class(btrfs_header_owner(eb),
+                                      eb, found_level);
 
        ret = csum_tree_block(root, eb, 1);
        if (ret) {
@@ -1598,7 +1632,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
                goto fail_bdi;
        }
 
-       fs_info->btree_inode->i_mapping->flags &= ~__GFP_FS;
+       mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS);
 
        INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC);
        INIT_LIST_HEAD(&fs_info->trans_list);
@@ -1802,6 +1836,9 @@ struct btrfs_root *open_ctree(struct super_block *sb,
                           fs_info->thread_pool_size),
                           &fs_info->generic_worker);
 
+       btrfs_init_workers(&fs_info->caching_workers, "cache",
+                          2, &fs_info->generic_worker);
+
        /* a higher idle thresh on the submit workers makes it much more
         * likely that bios will be send down in a sane order to the
         * devices
@@ -1855,6 +1892,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
        btrfs_start_workers(&fs_info->endio_write_workers, 1);
        btrfs_start_workers(&fs_info->endio_freespace_worker, 1);
        btrfs_start_workers(&fs_info->delayed_workers, 1);
+       btrfs_start_workers(&fs_info->caching_workers, 1);
 
        fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super);
        fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages,
@@ -2112,6 +2150,7 @@ fail_sb_buffer:
        btrfs_stop_workers(&fs_info->endio_freespace_worker);
        btrfs_stop_workers(&fs_info->submit_workers);
        btrfs_stop_workers(&fs_info->delayed_workers);
+       btrfs_stop_workers(&fs_info->caching_workers);
 fail_alloc:
        kfree(fs_info->delayed_root);
 fail_iput:
@@ -2577,6 +2616,7 @@ int close_ctree(struct btrfs_root *root)
        btrfs_stop_workers(&fs_info->endio_freespace_worker);
        btrfs_stop_workers(&fs_info->submit_workers);
        btrfs_stop_workers(&fs_info->delayed_workers);
+       btrfs_stop_workers(&fs_info->caching_workers);
 
        btrfs_close_devices(fs_info->fs_devices);
        btrfs_mapping_tree_free(&fs_info->mapping_tree);
index a0b610a67aaeadf1f564d03e218305997c021a85..bec3ea4bd67fd465a8a5e008fcae1e7353f573ad 100644 (file)
@@ -87,10 +87,14 @@ int btree_lock_page_hook(struct page *page);
 
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
-void btrfs_set_buffer_lockdep_class(struct extent_buffer *eb, int level);
+void btrfs_init_lockdep(void);
+void btrfs_set_buffer_lockdep_class(u64 objectid,
+                                   struct extent_buffer *eb, int level);
 #else
-static inline void btrfs_set_buffer_lockdep_class(struct extent_buffer *eb,
-                                                int level)
+static inline void btrfs_init_lockdep(void)
+{ }
+static inline void btrfs_set_buffer_lockdep_class(u64 objectid,
+                                       struct extent_buffer *eb, int level)
 {
 }
 #endif
index 71cd456fdb60360d44edd113ab7c593bdacfa658..4d08ed79405d2c5729fd78d45c75a02f4e556687 100644 (file)
@@ -320,12 +320,12 @@ static u64 add_new_free_space(struct btrfs_block_group_cache *block_group,
        return total_added;
 }
 
-static int caching_kthread(void *data)
+static noinline void caching_thread(struct btrfs_work *work)
 {
-       struct btrfs_block_group_cache *block_group = data;
-       struct btrfs_fs_info *fs_info = block_group->fs_info;
-       struct btrfs_caching_control *caching_ctl = block_group->caching_ctl;
-       struct btrfs_root *extent_root = fs_info->extent_root;
+       struct btrfs_block_group_cache *block_group;
+       struct btrfs_fs_info *fs_info;
+       struct btrfs_caching_control *caching_ctl;
+       struct btrfs_root *extent_root;
        struct btrfs_path *path;
        struct extent_buffer *leaf;
        struct btrfs_key key;
@@ -334,9 +334,14 @@ static int caching_kthread(void *data)
        u32 nritems;
        int ret = 0;
 
+       caching_ctl = container_of(work, struct btrfs_caching_control, work);
+       block_group = caching_ctl->block_group;
+       fs_info = block_group->fs_info;
+       extent_root = fs_info->extent_root;
+
        path = btrfs_alloc_path();
        if (!path)
-               return -ENOMEM;
+               goto out;
 
        last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET);
 
@@ -433,13 +438,11 @@ err:
        free_excluded_extents(extent_root, block_group);
 
        mutex_unlock(&caching_ctl->mutex);
+out:
        wake_up(&caching_ctl->wait);
 
        put_caching_control(caching_ctl);
-       atomic_dec(&block_group->space_info->caching_threads);
        btrfs_put_block_group(block_group);
-
-       return 0;
 }
 
 static int cache_block_group(struct btrfs_block_group_cache *cache,
@@ -449,7 +452,6 @@ static int cache_block_group(struct btrfs_block_group_cache *cache,
 {
        struct btrfs_fs_info *fs_info = cache->fs_info;
        struct btrfs_caching_control *caching_ctl;
-       struct task_struct *tsk;
        int ret = 0;
 
        smp_mb();
@@ -501,6 +503,7 @@ static int cache_block_group(struct btrfs_block_group_cache *cache,
        caching_ctl->progress = cache->key.objectid;
        /* one for caching kthread, one for caching block group list */
        atomic_set(&caching_ctl->count, 2);
+       caching_ctl->work.func = caching_thread;
 
        spin_lock(&cache->lock);
        if (cache->cached != BTRFS_CACHE_NO) {
@@ -516,16 +519,9 @@ static int cache_block_group(struct btrfs_block_group_cache *cache,
        list_add_tail(&caching_ctl->list, &fs_info->caching_block_groups);
        up_write(&fs_info->extent_commit_sem);
 
-       atomic_inc(&cache->space_info->caching_threads);
        btrfs_get_block_group(cache);
 
-       tsk = kthread_run(caching_kthread, cache, "btrfs-cache-%llu\n",
-                         cache->key.objectid);
-       if (IS_ERR(tsk)) {
-               ret = PTR_ERR(tsk);
-               printk(KERN_ERR "error running thread %d\n", ret);
-               BUG();
-       }
+       btrfs_queue_worker(&fs_info->caching_workers, &caching_ctl->work);
 
        return ret;
 }
@@ -2932,9 +2928,10 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
        found->full = 0;
        found->force_alloc = CHUNK_ALLOC_NO_FORCE;
        found->chunk_alloc = 0;
+       found->flush = 0;
+       init_waitqueue_head(&found->wait);
        *space_info = found;
        list_add_rcu(&found->list, &info->space_info);
-       atomic_set(&found->caching_threads, 0);
        return 0;
 }
 
@@ -3314,6 +3311,14 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
        if (reserved == 0)
                return 0;
 
+       smp_mb();
+       if (root->fs_info->delalloc_bytes == 0) {
+               if (trans)
+                       return 0;
+               btrfs_wait_ordered_extents(root, 0, 0);
+               return 0;
+       }
+
        max_reclaim = min(reserved, to_reclaim);
 
        while (loops < 1024) {
@@ -3356,6 +3361,8 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
                }
 
        }
+       if (reclaimed >= to_reclaim && !trans)
+               btrfs_wait_ordered_extents(root, 0, 0);
        return reclaimed >= to_reclaim;
 }
 
@@ -3380,15 +3387,36 @@ static int reserve_metadata_bytes(struct btrfs_trans_handle *trans,
        u64 num_bytes = orig_bytes;
        int retries = 0;
        int ret = 0;
-       bool reserved = false;
        bool committed = false;
+       bool flushing = false;
 
 again:
-       ret = -ENOSPC;
-       if (reserved)
-               num_bytes = 0;
-
+       ret = 0;
        spin_lock(&space_info->lock);
+       /*
+        * We only want to wait if somebody other than us is flushing and we are
+        * actually alloed to flush.
+        */
+       while (flush && !flushing && space_info->flush) {
+               spin_unlock(&space_info->lock);
+               /*
+                * If we have a trans handle we can't wait because the flusher
+                * may have to commit the transaction, which would mean we would
+                * deadlock since we are waiting for the flusher to finish, but
+                * hold the current transaction open.
+                */
+               if (trans)
+                       return -EAGAIN;
+               ret = wait_event_interruptible(space_info->wait,
+                                              !space_info->flush);
+               /* Must have been interrupted, return */
+               if (ret)
+                       return -EINTR;
+
+               spin_lock(&space_info->lock);
+       }
+
+       ret = -ENOSPC;
        unused = space_info->bytes_used + space_info->bytes_reserved +
                 space_info->bytes_pinned + space_info->bytes_readonly +
                 space_info->bytes_may_use;
@@ -3403,8 +3431,7 @@ again:
        if (unused <= space_info->total_bytes) {
                unused = space_info->total_bytes - unused;
                if (unused >= num_bytes) {
-                       if (!reserved)
-                               space_info->bytes_reserved += orig_bytes;
+                       space_info->bytes_reserved += orig_bytes;
                        ret = 0;
                } else {
                        /*
@@ -3429,17 +3456,14 @@ again:
         * to reclaim space we can actually use it instead of somebody else
         * stealing it from us.
         */
-       if (ret && !reserved) {
-               space_info->bytes_reserved += orig_bytes;
-               reserved = true;
+       if (ret && flush) {
+               flushing = true;
+               space_info->flush = 1;
        }
 
        spin_unlock(&space_info->lock);
 
-       if (!ret)
-               return 0;
-
-       if (!flush)
+       if (!ret || !flush)
                goto out;
 
        /*
@@ -3447,11 +3471,11 @@ again:
         * metadata until after the IO is completed.
         */
        ret = shrink_delalloc(trans, root, num_bytes, 1);
-       if (ret > 0)
-               return 0;
-       else if (ret < 0)
+       if (ret < 0)
                goto out;
 
+       ret = 0;
+
        /*
         * So if we were overcommitted it's possible that somebody else flushed
         * out enough space and we simply didn't have enough space to reclaim,
@@ -3462,11 +3486,11 @@ again:
                goto again;
        }
 
-       spin_lock(&space_info->lock);
        /*
         * Not enough space to be reclaimed, don't bother committing the
         * transaction.
         */
+       spin_lock(&space_info->lock);
        if (space_info->bytes_pinned < orig_bytes)
                ret = -ENOSPC;
        spin_unlock(&space_info->lock);
@@ -3474,10 +3498,13 @@ again:
                goto out;
 
        ret = -EAGAIN;
-       if (trans || committed)
+       if (trans)
                goto out;
 
        ret = -ENOSPC;
+       if (committed)
+               goto out;
+
        trans = btrfs_join_transaction(root);
        if (IS_ERR(trans))
                goto out;
@@ -3489,12 +3516,12 @@ again:
        }
 
 out:
-       if (reserved) {
+       if (flushing) {
                spin_lock(&space_info->lock);
-               space_info->bytes_reserved -= orig_bytes;
+               space_info->flush = 0;
+               wake_up_all(&space_info->wait);
                spin_unlock(&space_info->lock);
        }
-
        return ret;
 }
 
@@ -3704,7 +3731,6 @@ int btrfs_block_rsv_check(struct btrfs_trans_handle *trans,
        if (commit_trans) {
                if (trans)
                        return -EAGAIN;
-
                trans = btrfs_join_transaction(root);
                BUG_ON(IS_ERR(trans));
                ret = btrfs_commit_transaction(trans, root);
@@ -3874,26 +3900,6 @@ int btrfs_truncate_reserve_metadata(struct btrfs_trans_handle *trans,
        return 0;
 }
 
-int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans,
-                                struct btrfs_root *root,
-                                int num_items)
-{
-       u64 num_bytes;
-       int ret;
-
-       if (num_items == 0 || root->fs_info->chunk_root == root)
-               return 0;
-
-       num_bytes = btrfs_calc_trans_metadata_size(root, num_items);
-       ret = btrfs_block_rsv_add(trans, root, &root->fs_info->trans_block_rsv,
-                                 num_bytes);
-       if (!ret) {
-               trans->bytes_reserved += num_bytes;
-               trans->block_rsv = &root->fs_info->trans_block_rsv;
-       }
-       return ret;
-}
-
 void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans,
                                  struct btrfs_root *root)
 {
@@ -3944,6 +3950,30 @@ int btrfs_snap_reserve_metadata(struct btrfs_trans_handle *trans,
        return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes);
 }
 
+static unsigned drop_outstanding_extent(struct inode *inode)
+{
+       unsigned dropped_extents = 0;
+
+       spin_lock(&BTRFS_I(inode)->lock);
+       BUG_ON(!BTRFS_I(inode)->outstanding_extents);
+       BTRFS_I(inode)->outstanding_extents--;
+
+       /*
+        * If we have more or the same amount of outsanding extents than we have
+        * reserved then we need to leave the reserved extents count alone.
+        */
+       if (BTRFS_I(inode)->outstanding_extents >=
+           BTRFS_I(inode)->reserved_extents)
+               goto out;
+
+       dropped_extents = BTRFS_I(inode)->reserved_extents -
+               BTRFS_I(inode)->outstanding_extents;
+       BTRFS_I(inode)->reserved_extents -= dropped_extents;
+out:
+       spin_unlock(&BTRFS_I(inode)->lock);
+       return dropped_extents;
+}
+
 static u64 calc_csum_metadata_size(struct inode *inode, u64 num_bytes)
 {
        return num_bytes >>= 3;
@@ -3953,9 +3983,8 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
 {
        struct btrfs_root *root = BTRFS_I(inode)->root;
        struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv;
-       u64 to_reserve;
-       int nr_extents;
-       int reserved_extents;
+       u64 to_reserve = 0;
+       unsigned nr_extents = 0;
        int ret;
 
        if (btrfs_transaction_in_commit(root->fs_info))
@@ -3963,66 +3992,49 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
 
        num_bytes = ALIGN(num_bytes, root->sectorsize);
 
-       nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents) + 1;
-       reserved_extents = atomic_read(&BTRFS_I(inode)->reserved_extents);
+       spin_lock(&BTRFS_I(inode)->lock);
+       BTRFS_I(inode)->outstanding_extents++;
+
+       if (BTRFS_I(inode)->outstanding_extents >
+           BTRFS_I(inode)->reserved_extents) {
+               nr_extents = BTRFS_I(inode)->outstanding_extents -
+                       BTRFS_I(inode)->reserved_extents;
+               BTRFS_I(inode)->reserved_extents += nr_extents;
 
-       if (nr_extents > reserved_extents) {
-               nr_extents -= reserved_extents;
                to_reserve = btrfs_calc_trans_metadata_size(root, nr_extents);
-       } else {
-               nr_extents = 0;
-               to_reserve = 0;
        }
+       spin_unlock(&BTRFS_I(inode)->lock);
 
        to_reserve += calc_csum_metadata_size(inode, num_bytes);
        ret = reserve_metadata_bytes(NULL, root, block_rsv, to_reserve, 1);
-       if (ret)
+       if (ret) {
+               unsigned dropped;
+               /*
+                * We don't need the return value since our reservation failed,
+                * we just need to clean up our counter.
+                */
+               dropped = drop_outstanding_extent(inode);
+               WARN_ON(dropped > 1);
                return ret;
-
-       atomic_add(nr_extents, &BTRFS_I(inode)->reserved_extents);
-       atomic_inc(&BTRFS_I(inode)->outstanding_extents);
+       }
 
        block_rsv_add_bytes(block_rsv, to_reserve, 1);
 
-       if (block_rsv->size > 512 * 1024 * 1024)
-               shrink_delalloc(NULL, root, to_reserve, 0);
-
        return 0;
 }
 
 void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes)
 {
        struct btrfs_root *root = BTRFS_I(inode)->root;
-       u64 to_free;
-       int nr_extents;
-       int reserved_extents;
+       u64 to_free = 0;
+       unsigned dropped;
 
        num_bytes = ALIGN(num_bytes, root->sectorsize);
-       atomic_dec(&BTRFS_I(inode)->outstanding_extents);
-       WARN_ON(atomic_read(&BTRFS_I(inode)->outstanding_extents) < 0);
-
-       reserved_extents = atomic_read(&BTRFS_I(inode)->reserved_extents);
-       do {
-               int old, new;
-
-               nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents);
-               if (nr_extents >= reserved_extents) {
-                       nr_extents = 0;
-                       break;
-               }
-               old = reserved_extents;
-               nr_extents = reserved_extents - nr_extents;
-               new = reserved_extents - nr_extents;
-               old = atomic_cmpxchg(&BTRFS_I(inode)->reserved_extents,
-                                    reserved_extents, new);
-               if (likely(old == reserved_extents))
-                       break;
-               reserved_extents = old;
-       } while (1);
+       dropped = drop_outstanding_extent(inode);
 
        to_free = calc_csum_metadata_size(inode, num_bytes);
-       if (nr_extents > 0)
-               to_free += btrfs_calc_trans_metadata_size(root, nr_extents);
+       if (dropped > 0)
+               to_free += btrfs_calc_trans_metadata_size(root, dropped);
 
        btrfs_block_rsv_release(root, &root->fs_info->delalloc_block_rsv,
                                to_free);
@@ -4990,14 +5002,10 @@ have_block_group:
                        }
 
                        /*
-                        * We only want to start kthread caching if we are at
-                        * the point where we will wait for caching to make
-                        * progress, or if our ideal search is over and we've
-                        * found somebody to start caching.
+                        * The caching workers are limited to 2 threads, so we
+                        * can queue as much work as we care to.
                         */
-                       if (loop > LOOP_CACHING_NOWAIT ||
-                           (loop > LOOP_FIND_IDEAL &&
-                            atomic_read(&space_info->caching_threads) < 2)) {
+                       if (loop > LOOP_FIND_IDEAL) {
                                ret = cache_block_group(block_group, trans,
                                                        orig_root, 0);
                                BUG_ON(ret);
@@ -5219,8 +5227,7 @@ loop:
                if (loop == LOOP_FIND_IDEAL && found_uncached_bg) {
                        found_uncached_bg = false;
                        loop++;
-                       if (!ideal_cache_percent &&
-                           atomic_read(&space_info->caching_threads))
+                       if (!ideal_cache_percent)
                                goto search;
 
                        /*
@@ -5623,7 +5630,7 @@ struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans,
        if (!buf)
                return ERR_PTR(-ENOMEM);
        btrfs_set_header_generation(buf, trans->transid);
-       btrfs_set_buffer_lockdep_class(buf, level);
+       btrfs_set_buffer_lockdep_class(root->root_key.objectid, buf, level);
        btrfs_tree_lock(buf);
        clean_tree_block(trans, root, buf);
 
@@ -5910,7 +5917,7 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
                        return 1;
 
                if (path->locks[level] && !wc->keep_locks) {
-                       btrfs_tree_unlock(eb);
+                       btrfs_tree_unlock_rw(eb, path->locks[level]);
                        path->locks[level] = 0;
                }
                return 0;
@@ -5934,7 +5941,7 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
         * keep the tree lock
         */
        if (path->locks[level] && level > 0) {
-               btrfs_tree_unlock(eb);
+               btrfs_tree_unlock_rw(eb, path->locks[level]);
                path->locks[level] = 0;
        }
        return 0;
@@ -6047,7 +6054,7 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
        BUG_ON(level != btrfs_header_level(next));
        path->nodes[level] = next;
        path->slots[level] = 0;
-       path->locks[level] = 1;
+       path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
        wc->level = level;
        if (wc->level == 1)
                wc->reada_slot = 0;
@@ -6118,7 +6125,7 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
                        BUG_ON(level == 0);
                        btrfs_tree_lock(eb);
                        btrfs_set_lock_blocking(eb);
-                       path->locks[level] = 1;
+                       path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
 
                        ret = btrfs_lookup_extent_info(trans, root,
                                                       eb->start, eb->len,
@@ -6127,8 +6134,7 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
                        BUG_ON(ret);
                        BUG_ON(wc->refs[level] == 0);
                        if (wc->refs[level] == 1) {
-                               btrfs_tree_unlock(eb);
-                               path->locks[level] = 0;
+                               btrfs_tree_unlock_rw(eb, path->locks[level]);
                                return 1;
                        }
                }
@@ -6150,7 +6156,7 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
                    btrfs_header_generation(eb) == trans->transid) {
                        btrfs_tree_lock(eb);
                        btrfs_set_lock_blocking(eb);
-                       path->locks[level] = 1;
+                       path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
                }
                clean_tree_block(trans, root, eb);
        }
@@ -6229,7 +6235,8 @@ static noinline int walk_up_tree(struct btrfs_trans_handle *trans,
                                return 0;
 
                        if (path->locks[level]) {
-                               btrfs_tree_unlock(path->nodes[level]);
+                               btrfs_tree_unlock_rw(path->nodes[level],
+                                                    path->locks[level]);
                                path->locks[level] = 0;
                        }
                        free_extent_buffer(path->nodes[level]);
@@ -6281,7 +6288,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
                path->nodes[level] = btrfs_lock_root_node(root);
                btrfs_set_lock_blocking(path->nodes[level]);
                path->slots[level] = 0;
-               path->locks[level] = 1;
+               path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
                memset(&wc->update_progress, 0,
                       sizeof(wc->update_progress));
        } else {
@@ -6449,7 +6456,7 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
        level = btrfs_header_level(node);
        path->nodes[level] = node;
        path->slots[level] = 0;
-       path->locks[level] = 1;
+       path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
 
        wc->refs[parent_level] = 1;
        wc->flags[parent_level] = BTRFS_BLOCK_FLAG_FULL_BACKREF;
@@ -6524,15 +6531,28 @@ static u64 update_block_group_flags(struct btrfs_root *root, u64 flags)
        return flags;
 }
 
-static int set_block_group_ro(struct btrfs_block_group_cache *cache)
+static int set_block_group_ro(struct btrfs_block_group_cache *cache, int force)
 {
        struct btrfs_space_info *sinfo = cache->space_info;
        u64 num_bytes;
+       u64 min_allocable_bytes;
        int ret = -ENOSPC;
 
        if (cache->ro)
                return 0;
 
+       /*
+        * We need some metadata space and system metadata space for
+        * allocating chunks in some corner cases until we force to set
+        * it to be readonly.
+        */
+       if ((sinfo->flags &
+            (BTRFS_BLOCK_GROUP_SYSTEM | BTRFS_BLOCK_GROUP_METADATA)) &&
+           !force)
+               min_allocable_bytes = 1 * 1024 * 1024;
+       else
+               min_allocable_bytes = 0;
+
        spin_lock(&sinfo->lock);
        spin_lock(&cache->lock);
        num_bytes = cache->key.offset - cache->reserved - cache->pinned -
@@ -6540,7 +6560,8 @@ static int set_block_group_ro(struct btrfs_block_group_cache *cache)
 
        if (sinfo->bytes_used + sinfo->bytes_reserved + sinfo->bytes_pinned +
            sinfo->bytes_may_use + sinfo->bytes_readonly +
-           cache->reserved_pinned + num_bytes <= sinfo->total_bytes) {
+           cache->reserved_pinned + num_bytes + min_allocable_bytes <=
+           sinfo->total_bytes) {
                sinfo->bytes_readonly += num_bytes;
                sinfo->bytes_reserved += cache->reserved_pinned;
                cache->reserved_pinned = 0;
@@ -6571,7 +6592,7 @@ int btrfs_set_block_group_ro(struct btrfs_root *root,
                do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags,
                               CHUNK_ALLOC_FORCE);
 
-       ret = set_block_group_ro(cache);
+       ret = set_block_group_ro(cache, 0);
        if (!ret)
                goto out;
        alloc_flags = get_alloc_profile(root, cache->space_info->flags);
@@ -6579,7 +6600,7 @@ int btrfs_set_block_group_ro(struct btrfs_root *root,
                             CHUNK_ALLOC_FORCE);
        if (ret < 0)
                goto out;
-       ret = set_block_group_ro(cache);
+       ret = set_block_group_ro(cache, 0);
 out:
        btrfs_end_transaction(trans, root);
        return ret;
@@ -7016,7 +7037,7 @@ int btrfs_read_block_groups(struct btrfs_root *root)
 
                set_avail_alloc_bits(root->fs_info, cache->flags);
                if (btrfs_chunk_readonly(root, cache->key.objectid))
-                       set_block_group_ro(cache);
+                       set_block_group_ro(cache, 1);
        }
 
        list_for_each_entry_rcu(space_info, &root->fs_info->space_info, list) {
@@ -7030,9 +7051,9 @@ int btrfs_read_block_groups(struct btrfs_root *root)
                 * mirrored block groups.
                 */
                list_for_each_entry(cache, &space_info->block_groups[3], list)
-                       set_block_group_ro(cache);
+                       set_block_group_ro(cache, 1);
                list_for_each_entry(cache, &space_info->block_groups[4], list)
-                       set_block_group_ro(cache);
+                       set_block_group_ro(cache, 1);
        }
 
        init_global_block_rsv(info);
index 561262d35689f463875e6bf4214f12ff5725b775..067b1747421bfd655b2d0ddbd228f195cca65553 100644 (file)
@@ -281,11 +281,10 @@ static int merge_state(struct extent_io_tree *tree,
                if (other->start == state->end + 1 &&
                    other->state == state->state) {
                        merge_cb(tree, state, other);
-                       other->start = state->start;
-                       state->tree = NULL;
-                       rb_erase(&state->rb_node, &tree->state);
-                       free_extent_state(state);
-                       state = NULL;
+                       state->end = other->end;
+                       other->tree = NULL;
+                       rb_erase(&other->rb_node, &tree->state);
+                       free_extent_state(other);
                }
        }
 
@@ -351,7 +350,6 @@ static int insert_state(struct extent_io_tree *tree,
                       "%llu %llu\n", (unsigned long long)found->start,
                       (unsigned long long)found->end,
                       (unsigned long long)start, (unsigned long long)end);
-               free_extent_state(state);
                return -EEXIST;
        }
        state->tree = tree;
@@ -500,7 +498,8 @@ again:
                        cached_state = NULL;
                }
 
-               if (cached && cached->tree && cached->start == start) {
+               if (cached && cached->tree && cached->start <= start &&
+                   cached->end > start) {
                        if (clear)
                                atomic_dec(&cached->refs);
                        state = cached;
@@ -742,7 +741,8 @@ again:
        spin_lock(&tree->lock);
        if (cached_state && *cached_state) {
                state = *cached_state;
-               if (state->start == start && state->tree) {
+               if (state->start <= start && state->end > start &&
+                   state->tree) {
                        node = &state->rb_node;
                        goto hit_next;
                }
@@ -783,13 +783,13 @@ hit_next:
                if (err)
                        goto out;
 
-               next_node = rb_next(node);
                cache_state(state, cached_state);
                merge_state(tree, state);
                if (last_end == (u64)-1)
                        goto out;
 
                start = last_end + 1;
+               next_node = rb_next(&state->rb_node);
                if (next_node && start < end && prealloc && !need_resched()) {
                        state = rb_entry(next_node, struct extent_state,
                                         rb_node);
@@ -862,7 +862,6 @@ hit_next:
                 * Avoid to free 'prealloc' if it can be merged with
                 * the later extent.
                 */
-               atomic_inc(&prealloc->refs);
                err = insert_state(tree, prealloc, start, this_end,
                                   &bits);
                BUG_ON(err == -EEXIST);
@@ -872,7 +871,6 @@ hit_next:
                        goto out;
                }
                cache_state(prealloc, cached_state);
-               free_extent_state(prealloc);
                prealloc = NULL;
                start = this_end + 1;
                goto search_again;
@@ -1564,7 +1562,8 @@ int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
        int bitset = 0;
 
        spin_lock(&tree->lock);
-       if (cached && cached->tree && cached->start == start)
+       if (cached && cached->tree && cached->start <= start &&
+           cached->end > start)
                node = &cached->rb_node;
        else
                node = tree_search(tree, start);
@@ -2432,6 +2431,7 @@ static int extent_write_cache_pages(struct extent_io_tree *tree,
        pgoff_t index;
        pgoff_t end;            /* Inclusive */
        int scanned = 0;
+       int tag;
 
        pagevec_init(&pvec, 0);
        if (wbc->range_cyclic) {
@@ -2442,11 +2442,16 @@ static int extent_write_cache_pages(struct extent_io_tree *tree,
                end = wbc->range_end >> PAGE_CACHE_SHIFT;
                scanned = 1;
        }
+       if (wbc->sync_mode == WB_SYNC_ALL)
+               tag = PAGECACHE_TAG_TOWRITE;
+       else
+               tag = PAGECACHE_TAG_DIRTY;
 retry:
+       if (wbc->sync_mode == WB_SYNC_ALL)
+               tag_pages_for_writeback(mapping, index, end);
        while (!done && !nr_to_write_done && (index <= end) &&
-              (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
-                             PAGECACHE_TAG_DIRTY, min(end - index,
-                                 (pgoff_t)PAGEVEC_SIZE-1) + 1))) {
+              (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag,
+                       min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) {
                unsigned i;
 
                scanned = 1;
@@ -3020,8 +3025,15 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree,
                return NULL;
        eb->start = start;
        eb->len = len;
-       spin_lock_init(&eb->lock);
-       init_waitqueue_head(&eb->lock_wq);
+       rwlock_init(&eb->lock);
+       atomic_set(&eb->write_locks, 0);
+       atomic_set(&eb->read_locks, 0);
+       atomic_set(&eb->blocking_readers, 0);
+       atomic_set(&eb->blocking_writers, 0);
+       atomic_set(&eb->spinning_readers, 0);
+       atomic_set(&eb->spinning_writers, 0);
+       init_waitqueue_head(&eb->write_lock_wq);
+       init_waitqueue_head(&eb->read_lock_wq);
 
 #if LEAK_DEBUG
        spin_lock_irqsave(&leak_lock, flags);
@@ -3117,7 +3129,7 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
                i = 0;
        }
        for (; i < num_pages; i++, index++) {
-               p = find_or_create_page(mapping, index, GFP_NOFS | __GFP_HIGHMEM);
+               p = find_or_create_page(mapping, index, GFP_NOFS);
                if (!p) {
                        WARN_ON(1);
                        goto free_eb;
@@ -3264,6 +3276,22 @@ int set_extent_buffer_dirty(struct extent_io_tree *tree,
        return was_dirty;
 }
 
+static int __eb_straddles_pages(u64 start, u64 len)
+{
+       if (len < PAGE_CACHE_SIZE)
+               return 1;
+       if (start & (PAGE_CACHE_SIZE - 1))
+               return 1;
+       if ((start + len) & (PAGE_CACHE_SIZE - 1))
+               return 1;
+       return 0;
+}
+
+static int eb_straddles_pages(struct extent_buffer *eb)
+{
+       return __eb_straddles_pages(eb->start, eb->len);
+}
+
 int clear_extent_buffer_uptodate(struct extent_io_tree *tree,
                                struct extent_buffer *eb,
                                struct extent_state **cached_state)
@@ -3275,8 +3303,10 @@ int clear_extent_buffer_uptodate(struct extent_io_tree *tree,
        num_pages = num_extent_pages(eb->start, eb->len);
        clear_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
 
-       clear_extent_uptodate(tree, eb->start, eb->start + eb->len - 1,
-                             cached_state, GFP_NOFS);
+       if (eb_straddles_pages(eb)) {
+               clear_extent_uptodate(tree, eb->start, eb->start + eb->len - 1,
+                                     cached_state, GFP_NOFS);
+       }
        for (i = 0; i < num_pages; i++) {
                page = extent_buffer_page(eb, i);
                if (page)
@@ -3294,8 +3324,10 @@ int set_extent_buffer_uptodate(struct extent_io_tree *tree,
 
        num_pages = num_extent_pages(eb->start, eb->len);
 
-       set_extent_uptodate(tree, eb->start, eb->start + eb->len - 1,
-                           NULL, GFP_NOFS);
+       if (eb_straddles_pages(eb)) {
+               set_extent_uptodate(tree, eb->start, eb->start + eb->len - 1,
+                                   NULL, GFP_NOFS);
+       }
        for (i = 0; i < num_pages; i++) {
                page = extent_buffer_page(eb, i);
                if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) ||
@@ -3318,9 +3350,12 @@ int extent_range_uptodate(struct extent_io_tree *tree,
        int uptodate;
        unsigned long index;
 
-       ret = test_range_bit(tree, start, end, EXTENT_UPTODATE, 1, NULL);
-       if (ret)
-               return 1;
+       if (__eb_straddles_pages(start, end - start + 1)) {
+               ret = test_range_bit(tree, start, end,
+                                    EXTENT_UPTODATE, 1, NULL);
+               if (ret)
+                       return 1;
+       }
        while (start <= end) {
                index = start >> PAGE_CACHE_SHIFT;
                page = find_get_page(tree->mapping, index);
@@ -3348,10 +3383,12 @@ int extent_buffer_uptodate(struct extent_io_tree *tree,
        if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags))
                return 1;
 
-       ret = test_range_bit(tree, eb->start, eb->start + eb->len - 1,
-                          EXTENT_UPTODATE, 1, cached_state);
-       if (ret)
-               return ret;
+       if (eb_straddles_pages(eb)) {
+               ret = test_range_bit(tree, eb->start, eb->start + eb->len - 1,
+                                  EXTENT_UPTODATE, 1, cached_state);
+               if (ret)
+                       return ret;
+       }
 
        num_pages = num_extent_pages(eb->start, eb->len);
        for (i = 0; i < num_pages; i++) {
@@ -3384,9 +3421,11 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,
        if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags))
                return 0;
 
-       if (test_range_bit(tree, eb->start, eb->start + eb->len - 1,
-                          EXTENT_UPTODATE, 1, NULL)) {
-               return 0;
+       if (eb_straddles_pages(eb)) {
+               if (test_range_bit(tree, eb->start, eb->start + eb->len - 1,
+                                  EXTENT_UPTODATE, 1, NULL)) {
+                       return 0;
+               }
        }
 
        if (start) {
@@ -3490,9 +3529,8 @@ void read_extent_buffer(struct extent_buffer *eb, void *dstv,
                page = extent_buffer_page(eb, i);
 
                cur = min(len, (PAGE_CACHE_SIZE - offset));
-               kaddr = kmap_atomic(page, KM_USER1);
+               kaddr = page_address(page);
                memcpy(dst, kaddr + offset, cur);
-               kunmap_atomic(kaddr, KM_USER1);
 
                dst += cur;
                len -= cur;
@@ -3502,9 +3540,9 @@ void read_extent_buffer(struct extent_buffer *eb, void *dstv,
 }
 
 int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start,
-                              unsigned long min_len, char **token, char **map,
+                              unsigned long min_len, char **map,
                               unsigned long *map_start,
-                              unsigned long *map_len, int km)
+                              unsigned long *map_len)
 {
        size_t offset = start & (PAGE_CACHE_SIZE - 1);
        char *kaddr;
@@ -3534,42 +3572,12 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start,
        }
 
        p = extent_buffer_page(eb, i);
-       kaddr = kmap_atomic(p, km);
-       *token = kaddr;
+       kaddr = page_address(p);
        *map = kaddr + offset;
        *map_len = PAGE_CACHE_SIZE - offset;
        return 0;
 }
 
-int map_extent_buffer(struct extent_buffer *eb, unsigned long start,
-                     unsigned long min_len,
-                     char **token, char **map,
-                     unsigned long *map_start,
-                     unsigned long *map_len, int km)
-{
-       int err;
-       int save = 0;
-       if (eb->map_token) {
-               unmap_extent_buffer(eb, eb->map_token, km);
-               eb->map_token = NULL;
-               save = 1;
-       }
-       err = map_private_extent_buffer(eb, start, min_len, token, map,
-                                      map_start, map_len, km);
-       if (!err && save) {
-               eb->map_token = *token;
-               eb->kaddr = *map;
-               eb->map_start = *map_start;
-               eb->map_len = *map_len;
-       }
-       return err;
-}
-
-void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km)
-{
-       kunmap_atomic(token, km);
-}
-
 int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv,
                          unsigned long start,
                          unsigned long len)
@@ -3593,9 +3601,8 @@ int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv,
 
                cur = min(len, (PAGE_CACHE_SIZE - offset));
 
-               kaddr = kmap_atomic(page, KM_USER0);
+               kaddr = page_address(page);
                ret = memcmp(ptr, kaddr + offset, cur);
-               kunmap_atomic(kaddr, KM_USER0);
                if (ret)
                        break;
 
@@ -3628,9 +3635,8 @@ void write_extent_buffer(struct extent_buffer *eb, const void *srcv,
                WARN_ON(!PageUptodate(page));
 
                cur = min(len, PAGE_CACHE_SIZE - offset);
-               kaddr = kmap_atomic(page, KM_USER1);
+               kaddr = page_address(page);
                memcpy(kaddr + offset, src, cur);
-               kunmap_atomic(kaddr, KM_USER1);
 
                src += cur;
                len -= cur;
@@ -3659,9 +3665,8 @@ void memset_extent_buffer(struct extent_buffer *eb, char c,
                WARN_ON(!PageUptodate(page));
 
                cur = min(len, PAGE_CACHE_SIZE - offset);
-               kaddr = kmap_atomic(page, KM_USER0);
+               kaddr = page_address(page);
                memset(kaddr + offset, c, cur);
-               kunmap_atomic(kaddr, KM_USER0);
 
                len -= cur;
                offset = 0;
@@ -3692,9 +3697,8 @@ void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,
 
                cur = min(len, (unsigned long)(PAGE_CACHE_SIZE - offset));
 
-               kaddr = kmap_atomic(page, KM_USER0);
+               kaddr = page_address(page);
                read_extent_buffer(src, kaddr + offset, src_offset, cur);
-               kunmap_atomic(kaddr, KM_USER0);
 
                src_offset += cur;
                len -= cur;
@@ -3707,20 +3711,17 @@ static void move_pages(struct page *dst_page, struct page *src_page,
                       unsigned long dst_off, unsigned long src_off,
                       unsigned long len)
 {
-       char *dst_kaddr = kmap_atomic(dst_page, KM_USER0);
+       char *dst_kaddr = page_address(dst_page);
        if (dst_page == src_page) {
                memmove(dst_kaddr + dst_off, dst_kaddr + src_off, len);
        } else {
-               char *src_kaddr = kmap_atomic(src_page, KM_USER1);
+               char *src_kaddr = page_address(src_page);
                char *p = dst_kaddr + dst_off + len;
                char *s = src_kaddr + src_off + len;
 
                while (len--)
                        *--p = *--s;
-
-               kunmap_atomic(src_kaddr, KM_USER1);
        }
-       kunmap_atomic(dst_kaddr, KM_USER0);
 }
 
 static inline bool areas_overlap(unsigned long src, unsigned long dst, unsigned long len)
@@ -3733,20 +3734,17 @@ static void copy_pages(struct page *dst_page, struct page *src_page,
                       unsigned long dst_off, unsigned long src_off,
                       unsigned long len)
 {
-       char *dst_kaddr = kmap_atomic(dst_page, KM_USER0);
+       char *dst_kaddr = page_address(dst_page);
        char *src_kaddr;
 
        if (dst_page != src_page) {
-               src_kaddr = kmap_atomic(src_page, KM_USER1);
+               src_kaddr = page_address(src_page);
        } else {
                src_kaddr = dst_kaddr;
                BUG_ON(areas_overlap(src_off, dst_off, len));
        }
 
        memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len);
-       kunmap_atomic(dst_kaddr, KM_USER0);
-       if (dst_page != src_page)
-               kunmap_atomic(src_kaddr, KM_USER1);
 }
 
 void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
index a11a92ee2d30a84ccf52284e704c5cc971f7126e..21a7ca9e72825752a3b910cdfb9fac444094b96b 100644 (file)
@@ -120,8 +120,6 @@ struct extent_state {
 struct extent_buffer {
        u64 start;
        unsigned long len;
-       char *map_token;
-       char *kaddr;
        unsigned long map_start;
        unsigned long map_len;
        struct page *first_page;
@@ -130,14 +128,26 @@ struct extent_buffer {
        struct rcu_head rcu_head;
        atomic_t refs;
 
-       /* the spinlock is used to protect most operations */
-       spinlock_t lock;
+       /* count of read lock holders on the extent buffer */
+       atomic_t write_locks;
+       atomic_t read_locks;
+       atomic_t blocking_writers;
+       atomic_t blocking_readers;
+       atomic_t spinning_readers;
+       atomic_t spinning_writers;
+
+       /* protects write locks */
+       rwlock_t lock;
 
-       /*
-        * when we keep the lock held while blocking, waiters go onto
-        * the wq
+       /* readers use lock_wq while they wait for the write
+        * lock holders to unlock
         */
-       wait_queue_head_t lock_wq;
+       wait_queue_head_t write_lock_wq;
+
+       /* writers use read_lock_wq while they wait for readers
+        * to unlock
+        */
+       wait_queue_head_t read_lock_wq;
 };
 
 static inline void extent_set_compress_type(unsigned long *bio_flags,
@@ -279,15 +289,10 @@ int clear_extent_buffer_uptodate(struct extent_io_tree *tree,
 int extent_buffer_uptodate(struct extent_io_tree *tree,
                           struct extent_buffer *eb,
                           struct extent_state *cached_state);
-int map_extent_buffer(struct extent_buffer *eb, unsigned long offset,
-                     unsigned long min_len, char **token, char **map,
-                     unsigned long *map_start,
-                     unsigned long *map_len, int km);
 int map_private_extent_buffer(struct extent_buffer *eb, unsigned long offset,
-                     unsigned long min_len, char **token, char **map,
+                     unsigned long min_len, char **map,
                      unsigned long *map_start,
-                     unsigned long *map_len, int km);
-void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km);
+                     unsigned long *map_len);
 int extent_range_uptodate(struct extent_io_tree *tree,
                          u64 start, u64 end);
 int extent_clear_unlock_delalloc(struct inode *inode,
index 90d4ee52cd458ac9f7bf87dfe4a34c99be27bc30..08bcfa92a2226bcfd3ba0edf6fe8eb944454612b 100644 (file)
@@ -177,6 +177,15 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root,
 
        WARN_ON(bio->bi_vcnt <= 0);
 
+       /*
+        * the free space stuff is only read when it hasn't been
+        * updated in the current transaction.  So, we can safely
+        * read from the commit root and sidestep a nasty deadlock
+        * between reading the free space cache and updating the csum tree.
+        */
+       if (btrfs_is_free_space_inode(root, inode))
+               path->search_commit_root = 1;
+
        disk_bytenr = (u64)bio->bi_sector << 9;
        if (dio)
                offset = logical_offset;
@@ -664,10 +673,6 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
        struct btrfs_sector_sum *sector_sum;
        u32 nritems;
        u32 ins_size;
-       char *eb_map;
-       char *eb_token;
-       unsigned long map_len;
-       unsigned long map_start;
        u16 csum_size =
                btrfs_super_csum_size(&root->fs_info->super_copy);
 
@@ -814,30 +819,9 @@ found:
        item_end = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item);
        item_end = (struct btrfs_csum_item *)((unsigned char *)item_end +
                                      btrfs_item_size_nr(leaf, path->slots[0]));
-       eb_token = NULL;
 next_sector:
 
-       if (!eb_token ||
-          (unsigned long)item + csum_size >= map_start + map_len) {
-               int err;
-
-               if (eb_token)
-                       unmap_extent_buffer(leaf, eb_token, KM_USER1);
-               eb_token = NULL;
-               err = map_private_extent_buffer(leaf, (unsigned long)item,
-                                               csum_size,
-                                               &eb_token, &eb_map,
-                                               &map_start, &map_len, KM_USER1);
-               if (err)
-                       eb_token = NULL;
-       }
-       if (eb_token) {
-               memcpy(eb_token + ((unsigned long)item & (PAGE_CACHE_SIZE - 1)),
-                      &sector_sum->sum, csum_size);
-       } else {
-               write_extent_buffer(leaf, &sector_sum->sum,
-                                   (unsigned long)item, csum_size);
-       }
+       write_extent_buffer(leaf, &sector_sum->sum, (unsigned long)item, csum_size);
 
        total_bytes += root->sectorsize;
        sector_sum++;
@@ -850,10 +834,7 @@ next_sector:
                        goto next_sector;
                }
        }
-       if (eb_token) {
-               unmap_extent_buffer(leaf, eb_token, KM_USER1);
-               eb_token = NULL;
-       }
+
        btrfs_mark_buffer_dirty(path->nodes[0]);
        if (total_bytes < sums->len) {
                btrfs_release_path(path);
index 59cbdb120ad0917a001dd9c97d5a17999e6ce59e..a35e51c9f235559c5c9a0f5f5ddea267e6c14a40 100644 (file)
@@ -1081,7 +1081,8 @@ static noinline int prepare_pages(struct btrfs_root *root, struct file *file,
 
 again:
        for (i = 0; i < num_pages; i++) {
-               pages[i] = grab_cache_page(inode->i_mapping, index + i);
+               pages[i] = find_or_create_page(inode->i_mapping, index + i,
+                                              GFP_NOFS);
                if (!pages[i]) {
                        faili = i - 1;
                        err = -ENOMEM;
@@ -1238,9 +1239,11 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
                 * managed to copy.
                 */
                if (num_pages > dirty_pages) {
-                       if (copied > 0)
-                               atomic_inc(
-                                       &BTRFS_I(inode)->outstanding_extents);
+                       if (copied > 0) {
+                               spin_lock(&BTRFS_I(inode)->lock);
+                               BTRFS_I(inode)->outstanding_extents++;
+                               spin_unlock(&BTRFS_I(inode)->lock);
+                       }
                        btrfs_delalloc_release_space(inode,
                                        (num_pages - dirty_pages) <<
                                        PAGE_CACHE_SHIFT);
index bf0d61567f3d65a24e9d11802acdd0c8c5abf32b..6377713f639c978db84f64dec2dc9541e8c8287e 100644 (file)
@@ -98,6 +98,12 @@ struct inode *lookup_free_space_inode(struct btrfs_root *root,
                return inode;
 
        spin_lock(&block_group->lock);
+       if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM) {
+               printk(KERN_INFO "Old style space inode found, converting.\n");
+               BTRFS_I(inode)->flags &= ~BTRFS_INODE_NODATASUM;
+               block_group->disk_cache_state = BTRFS_DC_CLEAR;
+       }
+
        if (!btrfs_fs_closing(root->fs_info)) {
                block_group->inode = igrab(inode);
                block_group->iref = 1;
@@ -135,7 +141,7 @@ int __create_free_space_inode(struct btrfs_root *root,
        btrfs_set_inode_gid(leaf, inode_item, 0);
        btrfs_set_inode_mode(leaf, inode_item, S_IFREG | 0600);
        btrfs_set_inode_flags(leaf, inode_item, BTRFS_INODE_NOCOMPRESS |
-                             BTRFS_INODE_PREALLOC | BTRFS_INODE_NODATASUM);
+                             BTRFS_INODE_PREALLOC);
        btrfs_set_inode_nlink(leaf, inode_item, 1);
        btrfs_set_inode_transid(leaf, inode_item, trans->transid);
        btrfs_set_inode_block_group(leaf, inode_item, offset);
@@ -239,17 +245,12 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
        struct btrfs_free_space_header *header;
        struct extent_buffer *leaf;
        struct page *page;
-       u32 *checksums = NULL, *crc;
-       char *disk_crcs = NULL;
        struct btrfs_key key;
        struct list_head bitmaps;
        u64 num_entries;
        u64 num_bitmaps;
        u64 generation;
-       u32 cur_crc = ~(u32)0;
        pgoff_t index = 0;
-       unsigned long first_page_offset;
-       int num_checksums;
        int ret = 0;
 
        INIT_LIST_HEAD(&bitmaps);
@@ -292,16 +293,6 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
        if (!num_entries)
                goto out;
 
-       /* Setup everything for doing checksumming */
-       num_checksums = i_size_read(inode) / PAGE_CACHE_SIZE;
-       checksums = crc = kzalloc(sizeof(u32) * num_checksums, GFP_NOFS);
-       if (!checksums)
-               goto out;
-       first_page_offset = (sizeof(u32) * num_checksums) + sizeof(u64);
-       disk_crcs = kzalloc(first_page_offset, GFP_NOFS);
-       if (!disk_crcs)
-               goto out;
-
        ret = readahead_cache(inode);
        if (ret)
                goto out;
@@ -311,18 +302,12 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
                struct btrfs_free_space *e;
                void *addr;
                unsigned long offset = 0;
-               unsigned long start_offset = 0;
                int need_loop = 0;
 
                if (!num_entries && !num_bitmaps)
                        break;
 
-               if (index == 0) {
-                       start_offset = first_page_offset;
-                       offset = start_offset;
-               }
-
-               page = grab_cache_page(inode->i_mapping, index);
+               page = find_or_create_page(inode->i_mapping, index, GFP_NOFS);
                if (!page)
                        goto free_cache;
 
@@ -342,8 +327,15 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
                if (index == 0) {
                        u64 *gen;
 
-                       memcpy(disk_crcs, addr, first_page_offset);
-                       gen = addr + (sizeof(u32) * num_checksums);
+                       /*
+                        * We put a bogus crc in the front of the first page in
+                        * case old kernels try to mount a fs with the new
+                        * format to make sure they discard the cache.
+                        */
+                       addr += sizeof(u64);
+                       offset += sizeof(u64);
+
+                       gen = addr;
                        if (*gen != BTRFS_I(inode)->generation) {
                                printk(KERN_ERR "btrfs: space cache generation"
                                       " (%llu) does not match inode (%llu)\n",
@@ -355,24 +347,10 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
                                page_cache_release(page);
                                goto free_cache;
                        }
-                       crc = (u32 *)disk_crcs;
-               }
-               entry = addr + start_offset;
-
-               /* First lets check our crc before we do anything fun */
-               cur_crc = ~(u32)0;
-               cur_crc = btrfs_csum_data(root, addr + start_offset, cur_crc,
-                                         PAGE_CACHE_SIZE - start_offset);
-               btrfs_csum_final(cur_crc, (char *)&cur_crc);
-               if (cur_crc != *crc) {
-                       printk(KERN_ERR "btrfs: crc mismatch for page %lu\n",
-                              index);
-                       kunmap(page);
-                       unlock_page(page);
-                       page_cache_release(page);
-                       goto free_cache;
+                       addr += sizeof(u64);
+                       offset += sizeof(u64);
                }
-               crc++;
+               entry = addr;
 
                while (1) {
                        if (!num_entries)
@@ -470,8 +448,6 @@ next:
 
        ret = 1;
 out:
-       kfree(checksums);
-       kfree(disk_crcs);
        return ret;
 free_cache:
        __btrfs_remove_free_space_cache(ctl);
@@ -569,8 +545,7 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
        struct btrfs_key key;
        u64 start, end, len;
        u64 bytes = 0;
-       u32 *crc, *checksums;
-       unsigned long first_page_offset;
+       u32 crc = ~(u32)0;
        int index = 0, num_pages = 0;
        int entries = 0;
        int bitmaps = 0;
@@ -590,34 +565,13 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
        num_pages = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >>
                PAGE_CACHE_SHIFT;
 
-       /* Since the first page has all of our checksums and our generation we
-        * need to calculate the offset into the page that we can start writing
-        * our entries.
-        */
-       first_page_offset = (sizeof(u32) * num_pages) + sizeof(u64);
-
        filemap_write_and_wait(inode->i_mapping);
        btrfs_wait_ordered_range(inode, inode->i_size &
                                 ~(root->sectorsize - 1), (u64)-1);
 
-       /* make sure we don't overflow that first page */
-       if (first_page_offset + sizeof(struct btrfs_free_space_entry) >= PAGE_CACHE_SIZE) {
-               /* this is really the same as running out of space, where we also return 0 */
-               printk(KERN_CRIT "Btrfs: free space cache was too big for the crc page\n");
-               ret = 0;
-               goto out_update;
-       }
-
-       /* We need a checksum per page. */
-       crc = checksums = kzalloc(sizeof(u32) * num_pages, GFP_NOFS);
-       if (!crc)
-               return -1;
-
        pages = kzalloc(sizeof(struct page *) * num_pages, GFP_NOFS);
-       if (!pages) {
-               kfree(crc);
+       if (!pages)
                return -1;
-       }
 
        /* Get the cluster for this block_group if it exists */
        if (block_group && !list_empty(&block_group->cluster_list))
@@ -640,7 +594,7 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
         * know and don't freak out.
         */
        while (index < num_pages) {
-               page = grab_cache_page(inode->i_mapping, index);
+               page = find_or_create_page(inode->i_mapping, index, GFP_NOFS);
                if (!page) {
                        int i;
 
@@ -648,7 +602,7 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
                                unlock_page(pages[i]);
                                page_cache_release(pages[i]);
                        }
-                       goto out_free;
+                       goto out;
                }
                pages[index] = page;
                index++;
@@ -668,17 +622,11 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
        /* Write out the extent entries */
        do {
                struct btrfs_free_space_entry *entry;
-               void *addr;
+               void *addr, *orig;
                unsigned long offset = 0;
-               unsigned long start_offset = 0;
 
                next_page = false;
 
-               if (index == 0) {
-                       start_offset = first_page_offset;
-                       offset = start_offset;
-               }
-
                if (index >= num_pages) {
                        out_of_space = true;
                        break;
@@ -686,10 +634,26 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
 
                page = pages[index];
 
-               addr = kmap(page);
-               entry = addr + start_offset;
+               orig = addr = kmap(page);
+               if (index == 0) {
+                       u64 *gen;
 
-               memset(addr, 0, PAGE_CACHE_SIZE);
+                       /*
+                        * We're going to put in a bogus crc for this page to
+                        * make sure that old kernels who aren't aware of this
+                        * format will be sure to discard the cache.
+                        */
+                       addr += sizeof(u64);
+                       offset += sizeof(u64);
+
+                       gen = addr;
+                       *gen = trans->transid;
+                       addr += sizeof(u64);
+                       offset += sizeof(u64);
+               }
+               entry = addr;
+
+               memset(addr, 0, PAGE_CACHE_SIZE - offset);
                while (node && !next_page) {
                        struct btrfs_free_space *e;
 
@@ -752,13 +716,19 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
                                next_page = true;
                        entry++;
                }
-               *crc = ~(u32)0;
-               *crc = btrfs_csum_data(root, addr + start_offset, *crc,
-                                      PAGE_CACHE_SIZE - start_offset);
-               kunmap(page);
 
-               btrfs_csum_final(*crc, (char *)crc);
-               crc++;
+               /* Generate bogus crc value */
+               if (index == 0) {
+                       u32 *tmp;
+                       crc = btrfs_csum_data(root, orig + sizeof(u64), crc,
+                                             PAGE_CACHE_SIZE - sizeof(u64));
+                       btrfs_csum_final(crc, (char *)&crc);
+                       crc++;
+                       tmp = orig;
+                       *tmp = crc;
+               }
+
+               kunmap(page);
 
                bytes += PAGE_CACHE_SIZE;
 
@@ -779,11 +749,7 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
 
                addr = kmap(page);
                memcpy(addr, entry->bitmap, PAGE_CACHE_SIZE);
-               *crc = ~(u32)0;
-               *crc = btrfs_csum_data(root, addr, *crc, PAGE_CACHE_SIZE);
                kunmap(page);
-               btrfs_csum_final(*crc, (char *)crc);
-               crc++;
                bytes += PAGE_CACHE_SIZE;
 
                list_del_init(&entry->list);
@@ -796,7 +762,7 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
                                     i_size_read(inode) - 1, &cached_state,
                                     GFP_NOFS);
                ret = 0;
-               goto out_free;
+               goto out;
        }
 
        /* Zero out the rest of the pages just to make sure */
@@ -811,20 +777,6 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
                index++;
        }
 
-       /* Write the checksums and trans id to the first page */
-       {
-               void *addr;
-               u64 *gen;
-
-               page = pages[0];
-
-               addr = kmap(page);
-               memcpy(addr, checksums, sizeof(u32) * num_pages);
-               gen = addr + (sizeof(u32) * num_pages);
-               *gen = trans->transid;
-               kunmap(page);
-       }
-
        ret = btrfs_dirty_pages(root, inode, pages, num_pages, 0,
                                            bytes, &cached_state);
        btrfs_drop_pages(pages, num_pages);
@@ -833,7 +785,7 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
 
        if (ret) {
                ret = 0;
-               goto out_free;
+               goto out;
        }
 
        BTRFS_I(inode)->generation = trans->transid;
@@ -850,7 +802,7 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
                clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, bytes - 1,
                                 EXTENT_DIRTY | EXTENT_DELALLOC |
                                 EXTENT_DO_ACCOUNTING, 0, 0, NULL, GFP_NOFS);
-               goto out_free;
+               goto out;
        }
        leaf = path->nodes[0];
        if (ret > 0) {
@@ -866,7 +818,7 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
                                         EXTENT_DO_ACCOUNTING, 0, 0, NULL,
                                         GFP_NOFS);
                        btrfs_release_path(path);
-                       goto out_free;
+                       goto out;
                }
        }
        header = btrfs_item_ptr(leaf, path->slots[0],
@@ -879,11 +831,8 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
 
        ret = 1;
 
-out_free:
-       kfree(checksums);
+out:
        kfree(pages);
-
-out_update:
        if (ret != 1) {
                invalidate_inode_pages2_range(inode->i_mapping, 0, index);
                BTRFS_I(inode)->generation = 0;
index caa26ab5ed6833094a3334ed388edca18d2fd15a..13e6255182e3dad59af5463537dbc21abdff4bef 100644 (file)
@@ -750,15 +750,6 @@ static u64 get_extent_allocation_hint(struct inode *inode, u64 start,
        return alloc_hint;
 }
 
-static inline bool is_free_space_inode(struct btrfs_root *root,
-                                      struct inode *inode)
-{
-       if (root == root->fs_info->tree_root ||
-           BTRFS_I(inode)->location.objectid == BTRFS_FREE_INO_OBJECTID)
-               return true;
-       return false;
-}
-
 /*
  * when extent_io.c finds a delayed allocation range in the file,
  * the call backs end up in this code.  The basic idea is to
@@ -791,7 +782,7 @@ static noinline int cow_file_range(struct inode *inode,
        struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
        int ret = 0;
 
-       BUG_ON(is_free_space_inode(root, inode));
+       BUG_ON(btrfs_is_free_space_inode(root, inode));
        trans = btrfs_join_transaction(root);
        BUG_ON(IS_ERR(trans));
        trans->block_rsv = &root->fs_info->delalloc_block_rsv;
@@ -1072,7 +1063,7 @@ static noinline int run_delalloc_nocow(struct inode *inode,
        path = btrfs_alloc_path();
        BUG_ON(!path);
 
-       nolock = is_free_space_inode(root, inode);
+       nolock = btrfs_is_free_space_inode(root, inode);
 
        if (nolock)
                trans = btrfs_join_transaction_nolock(root);
@@ -1298,7 +1289,9 @@ static int btrfs_split_extent_hook(struct inode *inode,
        if (!(orig->state & EXTENT_DELALLOC))
                return 0;
 
-       atomic_inc(&BTRFS_I(inode)->outstanding_extents);
+       spin_lock(&BTRFS_I(inode)->lock);
+       BTRFS_I(inode)->outstanding_extents++;
+       spin_unlock(&BTRFS_I(inode)->lock);
        return 0;
 }
 
@@ -1316,7 +1309,9 @@ static int btrfs_merge_extent_hook(struct inode *inode,
        if (!(other->state & EXTENT_DELALLOC))
                return 0;
 
-       atomic_dec(&BTRFS_I(inode)->outstanding_extents);
+       spin_lock(&BTRFS_I(inode)->lock);
+       BTRFS_I(inode)->outstanding_extents--;
+       spin_unlock(&BTRFS_I(inode)->lock);
        return 0;
 }
 
@@ -1337,12 +1332,15 @@ static int btrfs_set_bit_hook(struct inode *inode,
        if (!(state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
                struct btrfs_root *root = BTRFS_I(inode)->root;
                u64 len = state->end + 1 - state->start;
-               bool do_list = !is_free_space_inode(root, inode);
+               bool do_list = !btrfs_is_free_space_inode(root, inode);
 
-               if (*bits & EXTENT_FIRST_DELALLOC)
+               if (*bits & EXTENT_FIRST_DELALLOC) {
                        *bits &= ~EXTENT_FIRST_DELALLOC;
-               else
-                       atomic_inc(&BTRFS_I(inode)->outstanding_extents);
+               } else {
+                       spin_lock(&BTRFS_I(inode)->lock);
+                       BTRFS_I(inode)->outstanding_extents++;
+                       spin_unlock(&BTRFS_I(inode)->lock);
+               }
 
                spin_lock(&root->fs_info->delalloc_lock);
                BTRFS_I(inode)->delalloc_bytes += len;
@@ -1370,12 +1368,15 @@ static int btrfs_clear_bit_hook(struct inode *inode,
        if ((state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
                struct btrfs_root *root = BTRFS_I(inode)->root;
                u64 len = state->end + 1 - state->start;
-               bool do_list = !is_free_space_inode(root, inode);
+               bool do_list = !btrfs_is_free_space_inode(root, inode);
 
-               if (*bits & EXTENT_FIRST_DELALLOC)
+               if (*bits & EXTENT_FIRST_DELALLOC) {
                        *bits &= ~EXTENT_FIRST_DELALLOC;
-               else if (!(*bits & EXTENT_DO_ACCOUNTING))
-                       atomic_dec(&BTRFS_I(inode)->outstanding_extents);
+               } else if (!(*bits & EXTENT_DO_ACCOUNTING)) {
+                       spin_lock(&BTRFS_I(inode)->lock);
+                       BTRFS_I(inode)->outstanding_extents--;
+                       spin_unlock(&BTRFS_I(inode)->lock);
+               }
 
                if (*bits & EXTENT_DO_ACCOUNTING)
                        btrfs_delalloc_release_metadata(inode, len);
@@ -1477,7 +1478,7 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
 
        skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
 
-       if (is_free_space_inode(root, inode))
+       if (btrfs_is_free_space_inode(root, inode))
                ret = btrfs_bio_wq_end_io(root->fs_info, bio, 2);
        else
                ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
@@ -1726,7 +1727,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
                return 0;
        BUG_ON(!ordered_extent);
 
-       nolock = is_free_space_inode(root, inode);
+       nolock = btrfs_is_free_space_inode(root, inode);
 
        if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) {
                BUG_ON(!list_empty(&ordered_extent->list));
@@ -2531,13 +2532,6 @@ static void btrfs_read_locked_inode(struct inode *inode)
 
        inode_item = btrfs_item_ptr(leaf, path->slots[0],
                                    struct btrfs_inode_item);
-       if (!leaf->map_token)
-               map_private_extent_buffer(leaf, (unsigned long)inode_item,
-                                         sizeof(struct btrfs_inode_item),
-                                         &leaf->map_token, &leaf->kaddr,
-                                         &leaf->map_start, &leaf->map_len,
-                                         KM_USER1);
-
        inode->i_mode = btrfs_inode_mode(leaf, inode_item);
        inode->i_nlink = btrfs_inode_nlink(leaf, inode_item);
        inode->i_uid = btrfs_inode_uid(leaf, inode_item);
@@ -2575,11 +2569,6 @@ cache_acl:
        if (!maybe_acls)
                cache_no_acl(inode);
 
-       if (leaf->map_token) {
-               unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
-               leaf->map_token = NULL;
-       }
-
        btrfs_free_path(path);
 
        switch (inode->i_mode & S_IFMT) {
@@ -2624,13 +2613,6 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
                            struct btrfs_inode_item *item,
                            struct inode *inode)
 {
-       if (!leaf->map_token)
-               map_private_extent_buffer(leaf, (unsigned long)item,
-                                         sizeof(struct btrfs_inode_item),
-                                         &leaf->map_token, &leaf->kaddr,
-                                         &leaf->map_start, &leaf->map_len,
-                                         KM_USER1);
-
        btrfs_set_inode_uid(leaf, item, inode->i_uid);
        btrfs_set_inode_gid(leaf, item, inode->i_gid);
        btrfs_set_inode_size(leaf, item, BTRFS_I(inode)->disk_i_size);
@@ -2659,11 +2641,6 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
        btrfs_set_inode_rdev(leaf, item, inode->i_rdev);
        btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags);
        btrfs_set_inode_block_group(leaf, item, 0);
-
-       if (leaf->map_token) {
-               unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
-               leaf->map_token = NULL;
-       }
 }
 
 /*
@@ -2684,7 +2661,7 @@ noinline int btrfs_update_inode(struct btrfs_trans_handle *trans,
         * The data relocation inode should also be directly updated
         * without delay
         */
-       if (!is_free_space_inode(root, inode)
+       if (!btrfs_is_free_space_inode(root, inode)
            && root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID) {
                ret = btrfs_delayed_update_inode(trans, root, inode);
                if (!ret)
@@ -3398,7 +3375,7 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from)
 
        ret = -ENOMEM;
 again:
-       page = grab_cache_page(mapping, index);
+       page = find_or_create_page(mapping, index, GFP_NOFS);
        if (!page) {
                btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE);
                goto out;
@@ -3634,7 +3611,7 @@ void btrfs_evict_inode(struct inode *inode)
 
        truncate_inode_pages(&inode->i_data, 0);
        if (inode->i_nlink && (btrfs_root_refs(&root->root_item) != 0 ||
-                              is_free_space_inode(root, inode)))
+                              btrfs_is_free_space_inode(root, inode)))
                goto no_delete;
 
        if (is_bad_inode(inode)) {
@@ -4271,7 +4248,7 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc)
        if (BTRFS_I(inode)->dummy_inode)
                return 0;
 
-       if (btrfs_fs_closing(root->fs_info) && is_free_space_inode(root, inode))
+       if (btrfs_fs_closing(root->fs_info) && btrfs_is_free_space_inode(root, inode))
                nolock = true;
 
        if (wbc->sync_mode == WB_SYNC_ALL) {
@@ -6728,8 +6705,9 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
        ei->index_cnt = (u64)-1;
        ei->last_unlink_trans = 0;
 
-       atomic_set(&ei->outstanding_extents, 0);
-       atomic_set(&ei->reserved_extents, 0);
+       spin_lock_init(&ei->lock);
+       ei->outstanding_extents = 0;
+       ei->reserved_extents = 0;
 
        ei->ordered_data_close = 0;
        ei->orphan_meta_reserved = 0;
@@ -6767,8 +6745,8 @@ void btrfs_destroy_inode(struct inode *inode)
 
        WARN_ON(!list_empty(&inode->i_dentry));
        WARN_ON(inode->i_data.nrpages);
-       WARN_ON(atomic_read(&BTRFS_I(inode)->outstanding_extents));
-       WARN_ON(atomic_read(&BTRFS_I(inode)->reserved_extents));
+       WARN_ON(BTRFS_I(inode)->outstanding_extents);
+       WARN_ON(BTRFS_I(inode)->reserved_extents);
 
        /*
         * This can happen where we create an inode, but somebody else also
@@ -6823,7 +6801,7 @@ int btrfs_drop_inode(struct inode *inode)
        struct btrfs_root *root = BTRFS_I(inode)->root;
 
        if (btrfs_root_refs(&root->root_item) == 0 &&
-           !is_free_space_inode(root, inode))
+           !btrfs_is_free_space_inode(root, inode))
                return 1;
        else
                return generic_drop_inode(inode);
index 622543309eb25e86fb39308ce077b84b4a883c70..0b980afc5eddfd4aeec80162e2eebecadd4015de 100644 (file)
@@ -859,8 +859,8 @@ again:
        /* step one, lock all the pages */
        for (i = 0; i < num_pages; i++) {
                struct page *page;
-               page = grab_cache_page(inode->i_mapping,
-                                           start_index + i);
+               page = find_or_create_page(inode->i_mapping,
+                                           start_index + i, GFP_NOFS);
                if (!page)
                        break;
 
@@ -930,7 +930,9 @@ again:
                          GFP_NOFS);
 
        if (i_done != num_pages) {
-               atomic_inc(&BTRFS_I(inode)->outstanding_extents);
+               spin_lock(&BTRFS_I(inode)->lock);
+               BTRFS_I(inode)->outstanding_extents++;
+               spin_unlock(&BTRFS_I(inode)->lock);
                btrfs_delalloc_release_space(inode,
                                     (num_pages - i_done) << PAGE_CACHE_SHIFT);
        }
index 66fa43dc3f0f9ff8b5c67e5330120fd663bf4054..d77b67c4b275731417c11e04ad38b3dc9c4d456b 100644 (file)
 #include "extent_io.h"
 #include "locking.h"
 
-static inline void spin_nested(struct extent_buffer *eb)
-{
-       spin_lock(&eb->lock);
-}
+void btrfs_assert_tree_read_locked(struct extent_buffer *eb);
 
 /*
- * Setting a lock to blocking will drop the spinlock and set the
- * flag that forces other procs who want the lock to wait.  After
- * this you can safely schedule with the lock held.
+ * if we currently have a spinning reader or writer lock
+ * (indicated by the rw flag) this will bump the count
+ * of blocking holders and drop the spinlock.
  */
-void btrfs_set_lock_blocking(struct extent_buffer *eb)
+void btrfs_set_lock_blocking_rw(struct extent_buffer *eb, int rw)
 {
-       if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) {
-               set_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags);
-               spin_unlock(&eb->lock);
+       if (rw == BTRFS_WRITE_LOCK) {
+               if (atomic_read(&eb->blocking_writers) == 0) {
+                       WARN_ON(atomic_read(&eb->spinning_writers) != 1);
+                       atomic_dec(&eb->spinning_writers);
+                       btrfs_assert_tree_locked(eb);
+                       atomic_inc(&eb->blocking_writers);
+                       write_unlock(&eb->lock);
+               }
+       } else if (rw == BTRFS_READ_LOCK) {
+               btrfs_assert_tree_read_locked(eb);
+               atomic_inc(&eb->blocking_readers);
+               WARN_ON(atomic_read(&eb->spinning_readers) == 0);
+               atomic_dec(&eb->spinning_readers);
+               read_unlock(&eb->lock);
        }
-       /* exit with the spin lock released and the bit set */
+       return;
 }
 
 /*
- * clearing the blocking flag will take the spinlock again.
- * After this you can't safely schedule
+ * if we currently have a blocking lock, take the spinlock
+ * and drop our blocking count
  */
-void btrfs_clear_lock_blocking(struct extent_buffer *eb)
+void btrfs_clear_lock_blocking_rw(struct extent_buffer *eb, int rw)
 {
-       if (test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) {
-               spin_nested(eb);
-               clear_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags);
-               smp_mb__after_clear_bit();
+       if (rw == BTRFS_WRITE_LOCK_BLOCKING) {
+               BUG_ON(atomic_read(&eb->blocking_writers) != 1);
+               write_lock(&eb->lock);
+               WARN_ON(atomic_read(&eb->spinning_writers));
+               atomic_inc(&eb->spinning_writers);
+               if (atomic_dec_and_test(&eb->blocking_writers))
+                       wake_up(&eb->write_lock_wq);
+       } else if (rw == BTRFS_READ_LOCK_BLOCKING) {
+               BUG_ON(atomic_read(&eb->blocking_readers) == 0);
+               read_lock(&eb->lock);
+               atomic_inc(&eb->spinning_readers);
+               if (atomic_dec_and_test(&eb->blocking_readers))
+                       wake_up(&eb->read_lock_wq);
        }
-       /* exit with the spin lock held */
+       return;
 }
 
 /*
- * unfortunately, many of the places that currently set a lock to blocking
- * don't end up blocking for very long, and often they don't block
- * at all.  For a dbench 50 run, if we don't spin on the blocking bit
- * at all, the context switch rate can jump up to 400,000/sec or more.
- *
- * So, we're still stuck with this crummy spin on the blocking bit,
- * at least until the most common causes of the short blocks
- * can be dealt with.
+ * take a spinning read lock.  This will wait for any blocking
+ * writers
  */
-static int btrfs_spin_on_block(struct extent_buffer *eb)
+void btrfs_tree_read_lock(struct extent_buffer *eb)
 {
-       int i;
-
-       for (i = 0; i < 512; i++) {
-               if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags))
-                       return 1;
-               if (need_resched())
-                       break;
-               cpu_relax();
+again:
+       wait_event(eb->write_lock_wq, atomic_read(&eb->blocking_writers) == 0);
+       read_lock(&eb->lock);
+       if (atomic_read(&eb->blocking_writers)) {
+               read_unlock(&eb->lock);
+               wait_event(eb->write_lock_wq,
+                          atomic_read(&eb->blocking_writers) == 0);
+               goto again;
        }
-       return 0;
+       atomic_inc(&eb->read_locks);
+       atomic_inc(&eb->spinning_readers);
 }
 
 /*
- * This is somewhat different from trylock.  It will take the
- * spinlock but if it finds the lock is set to blocking, it will
- * return without the lock held.
- *
- * returns 1 if it was able to take the lock and zero otherwise
- *
- * After this call, scheduling is not safe without first calling
- * btrfs_set_lock_blocking()
+ * returns 1 if we get the read lock and 0 if we don't
+ * this won't wait for blocking writers
  */
-int btrfs_try_spin_lock(struct extent_buffer *eb)
+int btrfs_try_tree_read_lock(struct extent_buffer *eb)
 {
-       int i;
+       if (atomic_read(&eb->blocking_writers))
+               return 0;
 
-       if (btrfs_spin_on_block(eb)) {
-               spin_nested(eb);
-               if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags))
-                       return 1;
-               spin_unlock(&eb->lock);
+       read_lock(&eb->lock);
+       if (atomic_read(&eb->blocking_writers)) {
+               read_unlock(&eb->lock);
+               return 0;
        }
-       /* spin for a bit on the BLOCKING flag */
-       for (i = 0; i < 2; i++) {
-               cpu_relax();
-               if (!btrfs_spin_on_block(eb))
-                       break;
-
-               spin_nested(eb);
-               if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags))
-                       return 1;
-               spin_unlock(&eb->lock);
-       }
-       return 0;
+       atomic_inc(&eb->read_locks);
+       atomic_inc(&eb->spinning_readers);
+       return 1;
 }
 
 /*
- * the autoremove wake function will return 0 if it tried to wake up
- * a process that was already awake, which means that process won't
- * count as an exclusive wakeup.  The waitq code will continue waking
- * procs until it finds one that was actually sleeping.
- *
- * For btrfs, this isn't quite what we want.  We want a single proc
- * to be notified that the lock is ready for taking.  If that proc
- * already happen to be awake, great, it will loop around and try for
- * the lock.
- *
- * So, btrfs_wake_function always returns 1, even when the proc that we
- * tried to wake up was already awake.
+ * returns 1 if we get the read lock and 0 if we don't
+ * this won't wait for blocking writers or readers
  */
-static int btrfs_wake_function(wait_queue_t *wait, unsigned mode,
-                              int sync, void *key)
+int btrfs_try_tree_write_lock(struct extent_buffer *eb)
 {
-       autoremove_wake_function(wait, mode, sync, key);
+       if (atomic_read(&eb->blocking_writers) ||
+           atomic_read(&eb->blocking_readers))
+               return 0;
+       write_lock(&eb->lock);
+       if (atomic_read(&eb->blocking_writers) ||
+           atomic_read(&eb->blocking_readers)) {
+               write_unlock(&eb->lock);
+               return 0;
+       }
+       atomic_inc(&eb->write_locks);
+       atomic_inc(&eb->spinning_writers);
        return 1;
 }
 
 /*
- * returns with the extent buffer spinlocked.
- *
- * This will spin and/or wait as required to take the lock, and then
- * return with the spinlock held.
- *
- * After this call, scheduling is not safe without first calling
- * btrfs_set_lock_blocking()
+ * drop a spinning read lock
+ */
+void btrfs_tree_read_unlock(struct extent_buffer *eb)
+{
+       btrfs_assert_tree_read_locked(eb);
+       WARN_ON(atomic_read(&eb->spinning_readers) == 0);
+       atomic_dec(&eb->spinning_readers);
+       atomic_dec(&eb->read_locks);
+       read_unlock(&eb->lock);
+}
+
+/*
+ * drop a blocking read lock
+ */
+void btrfs_tree_read_unlock_blocking(struct extent_buffer *eb)
+{
+       btrfs_assert_tree_read_locked(eb);
+       WARN_ON(atomic_read(&eb->blocking_readers) == 0);
+       if (atomic_dec_and_test(&eb->blocking_readers))
+               wake_up(&eb->read_lock_wq);
+       atomic_dec(&eb->read_locks);
+}
+
+/*
+ * take a spinning write lock.  This will wait for both
+ * blocking readers or writers
  */
 int btrfs_tree_lock(struct extent_buffer *eb)
 {
-       DEFINE_WAIT(wait);
-       wait.func = btrfs_wake_function;
-
-       if (!btrfs_spin_on_block(eb))
-               goto sleep;
-
-       while(1) {
-               spin_nested(eb);
-
-               /* nobody is blocking, exit with the spinlock held */
-               if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags))
-                       return 0;
-
-               /*
-                * we have the spinlock, but the real owner is blocking.
-                * wait for them
-                */
-               spin_unlock(&eb->lock);
-
-               /*
-                * spin for a bit, and if the blocking flag goes away,
-                * loop around
-                */
-               cpu_relax();
-               if (btrfs_spin_on_block(eb))
-                       continue;
-sleep:
-               prepare_to_wait_exclusive(&eb->lock_wq, &wait,
-                                         TASK_UNINTERRUPTIBLE);
-
-               if (test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags))
-                       schedule();
-
-               finish_wait(&eb->lock_wq, &wait);
+again:
+       wait_event(eb->read_lock_wq, atomic_read(&eb->blocking_readers) == 0);
+       wait_event(eb->write_lock_wq, atomic_read(&eb->blocking_writers) == 0);
+       write_lock(&eb->lock);
+       if (atomic_read(&eb->blocking_readers)) {
+               write_unlock(&eb->lock);
+               wait_event(eb->read_lock_wq,
+                          atomic_read(&eb->blocking_readers) == 0);
+               goto again;
        }
+       if (atomic_read(&eb->blocking_writers)) {
+               write_unlock(&eb->lock);
+               wait_event(eb->write_lock_wq,
+                          atomic_read(&eb->blocking_writers) == 0);
+               goto again;
+       }
+       WARN_ON(atomic_read(&eb->spinning_writers));
+       atomic_inc(&eb->spinning_writers);
+       atomic_inc(&eb->write_locks);
        return 0;
 }
 
+/*
+ * drop a spinning or a blocking write lock.
+ */
 int btrfs_tree_unlock(struct extent_buffer *eb)
 {
-       /*
-        * if we were a blocking owner, we don't have the spinlock held
-        * just clear the bit and look for waiters
-        */
-       if (test_and_clear_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags))
-               smp_mb__after_clear_bit();
-       else
-               spin_unlock(&eb->lock);
-
-       if (waitqueue_active(&eb->lock_wq))
-               wake_up(&eb->lock_wq);
+       int blockers = atomic_read(&eb->blocking_writers);
+
+       BUG_ON(blockers > 1);
+
+       btrfs_assert_tree_locked(eb);
+       atomic_dec(&eb->write_locks);
+
+       if (blockers) {
+               WARN_ON(atomic_read(&eb->spinning_writers));
+               atomic_dec(&eb->blocking_writers);
+               smp_wmb();
+               wake_up(&eb->write_lock_wq);
+       } else {
+               WARN_ON(atomic_read(&eb->spinning_writers) != 1);
+               atomic_dec(&eb->spinning_writers);
+               write_unlock(&eb->lock);
+       }
        return 0;
 }
 
 void btrfs_assert_tree_locked(struct extent_buffer *eb)
 {
-       if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags))
-               assert_spin_locked(&eb->lock);
+       BUG_ON(!atomic_read(&eb->write_locks));
+}
+
+void btrfs_assert_tree_read_locked(struct extent_buffer *eb)
+{
+       BUG_ON(!atomic_read(&eb->read_locks));
 }
index 5c33a560a2f100c2454797d122875a76921403ab..17247ddb81a00f80e2e3e0a82c5c87e36cc564d1 100644 (file)
 #ifndef __BTRFS_LOCKING_
 #define __BTRFS_LOCKING_
 
+#define BTRFS_WRITE_LOCK 1
+#define BTRFS_READ_LOCK 2
+#define BTRFS_WRITE_LOCK_BLOCKING 3
+#define BTRFS_READ_LOCK_BLOCKING 4
+
 int btrfs_tree_lock(struct extent_buffer *eb);
 int btrfs_tree_unlock(struct extent_buffer *eb);
 int btrfs_try_spin_lock(struct extent_buffer *eb);
 
-void btrfs_set_lock_blocking(struct extent_buffer *eb);
-void btrfs_clear_lock_blocking(struct extent_buffer *eb);
+void btrfs_tree_read_lock(struct extent_buffer *eb);
+void btrfs_tree_read_unlock(struct extent_buffer *eb);
+void btrfs_tree_read_unlock_blocking(struct extent_buffer *eb);
+void btrfs_set_lock_blocking_rw(struct extent_buffer *eb, int rw);
+void btrfs_clear_lock_blocking_rw(struct extent_buffer *eb, int rw);
 void btrfs_assert_tree_locked(struct extent_buffer *eb);
+int btrfs_try_tree_read_lock(struct extent_buffer *eb);
+int btrfs_try_tree_write_lock(struct extent_buffer *eb);
+
+static inline void btrfs_tree_unlock_rw(struct extent_buffer *eb, int rw)
+{
+       if (rw == BTRFS_WRITE_LOCK || rw == BTRFS_WRITE_LOCK_BLOCKING)
+               btrfs_tree_unlock(eb);
+       else if (rw == BTRFS_READ_LOCK_BLOCKING)
+               btrfs_tree_read_unlock_blocking(eb);
+       else if (rw == BTRFS_READ_LOCK)
+               btrfs_tree_read_unlock(eb);
+       else
+               BUG();
+}
+
+static inline void btrfs_set_lock_blocking(struct extent_buffer *eb)
+{
+       btrfs_set_lock_blocking_rw(eb, BTRFS_WRITE_LOCK);
+}
+
+static inline void btrfs_clear_lock_blocking(struct extent_buffer *eb)
+{
+       btrfs_clear_lock_blocking_rw(eb, BTRFS_WRITE_LOCK_BLOCKING);
+}
 #endif
index 5e0a3dc79a453f3930e9c749c1cf08c63e5c7c6a..59bb1764273d476b7efe01a8d0948f4f473de6f2 100644 (file)
@@ -2955,7 +2955,8 @@ static int relocate_file_extent_cluster(struct inode *inode,
                        page_cache_sync_readahead(inode->i_mapping,
                                                  ra, NULL, index,
                                                  last_index + 1 - index);
-                       page = grab_cache_page(inode->i_mapping, index);
+                       page = find_or_create_page(inode->i_mapping, index,
+                                                  GFP_NOFS);
                        if (!page) {
                                btrfs_delalloc_release_metadata(inode,
                                                        PAGE_CACHE_SIZE);
index c0f7ecaf1e79a931bbb2be480c97c3d848ec4654..bc1f6ad18442bc728a10e9919b91162af1b60b4d 100644 (file)
@@ -50,36 +50,22 @@ u##bits btrfs_##name(struct extent_buffer *eb,                              \
        unsigned long part_offset = (unsigned long)s;                   \
        unsigned long offset = part_offset + offsetof(type, member);    \
        type *p;                                                        \
-       /* ugly, but we want the fast path here */                      \
-       if (eb->map_token && offset >= eb->map_start &&                 \
-           offset + sizeof(((type *)0)->member) <= eb->map_start +     \
-           eb->map_len) {                                              \
-               p = (type *)(eb->kaddr + part_offset - eb->map_start);  \
-               return le##bits##_to_cpu(p->member);                    \
-       }                                                               \
-       {                                                               \
-               int err;                                                \
-               char *map_token;                                        \
-               char *kaddr;                                            \
-               int unmap_on_exit = (eb->map_token == NULL);            \
-               unsigned long map_start;                                \
-               unsigned long map_len;                                  \
-               u##bits res;                                            \
-               err = map_extent_buffer(eb, offset,                     \
-                               sizeof(((type *)0)->member),            \
-                               &map_token, &kaddr,                     \
-                               &map_start, &map_len, KM_USER1);        \
-               if (err) {                                              \
-                       __le##bits leres;                               \
-                       read_eb_member(eb, s, type, member, &leres);    \
-                       return le##bits##_to_cpu(leres);                \
-               }                                                       \
-               p = (type *)(kaddr + part_offset - map_start);          \
-               res = le##bits##_to_cpu(p->member);                     \
-               if (unmap_on_exit)                                      \
-                       unmap_extent_buffer(eb, map_token, KM_USER1);   \
-               return res;                                             \
-       }                                                               \
+       int err;                                                \
+       char *kaddr;                                            \
+       unsigned long map_start;                                \
+       unsigned long map_len;                                  \
+       u##bits res;                                            \
+       err = map_private_extent_buffer(eb, offset,             \
+                       sizeof(((type *)0)->member),            \
+                       &kaddr, &map_start, &map_len);          \
+       if (err) {                                              \
+               __le##bits leres;                               \
+               read_eb_member(eb, s, type, member, &leres);    \
+               return le##bits##_to_cpu(leres);                \
+       }                                                       \
+       p = (type *)(kaddr + part_offset - map_start);          \
+       res = le##bits##_to_cpu(p->member);                     \
+       return res;                                             \
 }                                                                      \
 void btrfs_set_##name(struct extent_buffer *eb,                                \
                                    type *s, u##bits val)               \
@@ -87,36 +73,21 @@ void btrfs_set_##name(struct extent_buffer *eb,                             \
        unsigned long part_offset = (unsigned long)s;                   \
        unsigned long offset = part_offset + offsetof(type, member);    \
        type *p;                                                        \
-       /* ugly, but we want the fast path here */                      \
-       if (eb->map_token && offset >= eb->map_start &&                 \
-           offset + sizeof(((type *)0)->member) <= eb->map_start +     \
-           eb->map_len) {                                              \
-               p = (type *)(eb->kaddr + part_offset - eb->map_start);  \
-               p->member = cpu_to_le##bits(val);                       \
-               return;                                                 \
-       }                                                               \
-       {                                                               \
-               int err;                                                \
-               char *map_token;                                        \
-               char *kaddr;                                            \
-               int unmap_on_exit = (eb->map_token == NULL);            \
-               unsigned long map_start;                                \
-               unsigned long map_len;                                  \
-               err = map_extent_buffer(eb, offset,                     \
-                               sizeof(((type *)0)->member),            \
-                               &map_token, &kaddr,                     \
-                               &map_start, &map_len, KM_USER1);        \
-               if (err) {                                              \
-                       __le##bits val2;                                \
-                       val2 = cpu_to_le##bits(val);                    \
-                       write_eb_member(eb, s, type, member, &val2);    \
-                       return;                                         \
-               }                                                       \
-               p = (type *)(kaddr + part_offset - map_start);          \
-               p->member = cpu_to_le##bits(val);                       \
-               if (unmap_on_exit)                                      \
-                       unmap_extent_buffer(eb, map_token, KM_USER1);   \
-       }                                                               \
+       int err;                                                \
+       char *kaddr;                                            \
+       unsigned long map_start;                                \
+       unsigned long map_len;                                  \
+       err = map_private_extent_buffer(eb, offset,             \
+                       sizeof(((type *)0)->member),            \
+                       &kaddr, &map_start, &map_len);          \
+       if (err) {                                              \
+               __le##bits val2;                                \
+               val2 = cpu_to_le##bits(val);                    \
+               write_eb_member(eb, s, type, member, &val2);    \
+               return;                                         \
+       }                                                       \
+       p = (type *)(kaddr + part_offset - map_start);          \
+       p->member = cpu_to_le##bits(val);                       \
 }
 
 #include "ctree.h"
@@ -125,15 +96,6 @@ void btrfs_node_key(struct extent_buffer *eb,
                    struct btrfs_disk_key *disk_key, int nr)
 {
        unsigned long ptr = btrfs_node_key_ptr_offset(nr);
-       if (eb->map_token && ptr >= eb->map_start &&
-           ptr + sizeof(*disk_key) <= eb->map_start + eb->map_len) {
-               memcpy(disk_key, eb->kaddr + ptr - eb->map_start,
-                       sizeof(*disk_key));
-               return;
-       } else if (eb->map_token) {
-               unmap_extent_buffer(eb, eb->map_token, KM_USER1);
-               eb->map_token = NULL;
-       }
        read_eb_member(eb, (struct btrfs_key_ptr *)ptr,
                       struct btrfs_key_ptr, key, disk_key);
 }
index 51dcec86757f071654bc3866123e65157ef0286b..eb55863bb4aee8a323783aa24536d17ec166f26d 100644 (file)
@@ -260,7 +260,7 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root,
 {
        struct btrfs_trans_handle *h;
        struct btrfs_transaction *cur_trans;
-       int retries = 0;
+       u64 num_bytes = 0;
        int ret;
 
        if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)
@@ -274,6 +274,19 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root,
                h->block_rsv = NULL;
                goto got_it;
        }
+
+       /*
+        * Do the reservation before we join the transaction so we can do all
+        * the appropriate flushing if need be.
+        */
+       if (num_items > 0 && root != root->fs_info->chunk_root) {
+               num_bytes = btrfs_calc_trans_metadata_size(root, num_items);
+               ret = btrfs_block_rsv_add(NULL, root,
+                                         &root->fs_info->trans_block_rsv,
+                                         num_bytes);
+               if (ret)
+                       return ERR_PTR(ret);
+       }
 again:
        h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS);
        if (!h)
@@ -310,24 +323,9 @@ again:
                goto again;
        }
 
-       if (num_items > 0) {
-               ret = btrfs_trans_reserve_metadata(h, root, num_items);
-               if (ret == -EAGAIN && !retries) {
-                       retries++;
-                       btrfs_commit_transaction(h, root);
-                       goto again;
-               } else if (ret == -EAGAIN) {
-                       /*
-                        * We have already retried and got EAGAIN, so really we
-                        * don't have space, so set ret to -ENOSPC.
-                        */
-                       ret = -ENOSPC;
-               }
-
-               if (ret < 0) {
-                       btrfs_end_transaction(h, root);
-                       return ERR_PTR(ret);
-               }
+       if (num_bytes) {
+               h->block_rsv = &root->fs_info->trans_block_rsv;
+               h->bytes_reserved = num_bytes;
        }
 
 got_it:
@@ -499,10 +497,17 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
        }
 
        if (lock && cur_trans->blocked && !cur_trans->in_commit) {
-               if (throttle)
+               if (throttle) {
+                       /*
+                        * We may race with somebody else here so end up having
+                        * to call end_transaction on ourselves again, so inc
+                        * our use_count.
+                        */
+                       trans->use_count++;
                        return btrfs_commit_transaction(trans, root);
-               else
+               } else {
                        wake_up_process(info->transaction_kthread);
+               }
        }
 
        WARN_ON(cur_trans != info->running_transaction);
index 4ce8a9f41d1ec3916753bd610183a82b4dc6aa21..ac278dd83175e3a64a573f9a16e154a21226ecda 100644 (file)
@@ -1730,8 +1730,8 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
                                btrfs_read_buffer(next, ptr_gen);
 
                                btrfs_tree_lock(next);
-                               clean_tree_block(trans, root, next);
                                btrfs_set_lock_blocking(next);
+                               clean_tree_block(trans, root, next);
                                btrfs_wait_tree_block_writeback(next);
                                btrfs_tree_unlock(next);
 
@@ -1796,8 +1796,8 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans,
                                next = path->nodes[*level];
 
                                btrfs_tree_lock(next);
-                               clean_tree_block(trans, root, next);
                                btrfs_set_lock_blocking(next);
+                               clean_tree_block(trans, root, next);
                                btrfs_wait_tree_block_writeback(next);
                                btrfs_tree_unlock(next);
 
@@ -1864,8 +1864,8 @@ static int walk_log_tree(struct btrfs_trans_handle *trans,
                        next = path->nodes[orig_level];
 
                        btrfs_tree_lock(next);
-                       clean_tree_block(trans, log, next);
                        btrfs_set_lock_blocking(next);
+                       clean_tree_block(trans, log, next);
                        btrfs_wait_tree_block_writeback(next);
                        btrfs_tree_unlock(next);
 
index 19450bc536327c77f7add37e723b668da410eedd..b89e372c75449bf228f7ca6eb5904d5b00474b90 100644 (file)
@@ -3595,7 +3595,7 @@ int btrfs_read_sys_array(struct btrfs_root *root)
        if (!sb)
                return -ENOMEM;
        btrfs_set_buffer_uptodate(sb);
-       btrfs_set_buffer_lockdep_class(sb, 0);
+       btrfs_set_buffer_lockdep_class(root->root_key.objectid, sb, 0);
 
        write_extent_buffer(sb, super_copy, 0, BTRFS_SUPER_INFO_SIZE);
        array_size = btrfs_super_sys_array_size(super_copy);
index 5366fe452ab07db7402402e96351c1b956809e20..d733b9cfea343207e71bdb6d8d8b51323717c800 100644 (file)
@@ -102,43 +102,57 @@ static int do_setxattr(struct btrfs_trans_handle *trans,
        if (!path)
                return -ENOMEM;
 
-       /* first lets see if we already have this xattr */
-       di = btrfs_lookup_xattr(trans, root, path, btrfs_ino(inode), name,
-                               strlen(name), -1);
-       if (IS_ERR(di)) {
-               ret = PTR_ERR(di);
-               goto out;
-       }
-
-       /* ok we already have this xattr, lets remove it */
-       if (di) {
-               /* if we want create only exit */
-               if (flags & XATTR_CREATE) {
-                       ret = -EEXIST;
+       if (flags & XATTR_REPLACE) {
+               di = btrfs_lookup_xattr(trans, root, path, btrfs_ino(inode), name,
+                                       name_len, -1);
+               if (IS_ERR(di)) {
+                       ret = PTR_ERR(di);
+                       goto out;
+               } else if (!di) {
+                       ret = -ENODATA;
                        goto out;
                }
-
                ret = btrfs_delete_one_dir_name(trans, root, path, di);
-               BUG_ON(ret);
+               if (ret)
+                       goto out;
                btrfs_release_path(path);
+       }
 
-               /* if we don't have a value then we are removing the xattr */
-               if (!value)
+again:
+       ret = btrfs_insert_xattr_item(trans, root, path, btrfs_ino(inode),
+                                     name, name_len, value, size);
+       if (ret == -EEXIST) {
+               if (flags & XATTR_CREATE)
                        goto out;
-       } else {
+               /*
+                * We can't use the path we already have since we won't have the
+                * proper locking for a delete, so release the path and
+                * re-lookup to delete the thing.
+                */
                btrfs_release_path(path);
+               di = btrfs_lookup_xattr(trans, root, path, btrfs_ino(inode),
+                                       name, name_len, -1);
+               if (IS_ERR(di)) {
+                       ret = PTR_ERR(di);
+                       goto out;
+               } else if (!di) {
+                       /* Shouldn't happen but just in case... */
+                       btrfs_release_path(path);
+                       goto again;
+               }
 
-               if (flags & XATTR_REPLACE) {
-                       /* we couldn't find the attr to replace */
-                       ret = -ENODATA;
+               ret = btrfs_delete_one_dir_name(trans, root, path, di);
+               if (ret)
                        goto out;
+
+               /*
+                * We have a value to set, so go back and try to insert it now.
+                */
+               if (value) {
+                       btrfs_release_path(path);
+                       goto again;
                }
        }
-
-       /* ok we have to create a completely new xattr */
-       ret = btrfs_insert_xattr_item(trans, root, path, btrfs_ino(inode),
-                                     name, name_len, value, size);
-       BUG_ON(ret);
 out:
        btrfs_free_path(path);
        return ret;