Merge Btrfs into fs/btrfs
authorChris Mason <chris.mason@oracle.com>
Thu, 25 Sep 2008 19:32:36 +0000 (15:32 -0400)
committerChris Mason <chris.mason@oracle.com>
Thu, 25 Sep 2008 19:33:18 +0000 (15:33 -0400)
56 files changed:
fs/btrfs/COPYING [new file with mode: 0644]
fs/btrfs/INSTALL [new file with mode: 0644]
fs/btrfs/Makefile [new file with mode: 0644]
fs/btrfs/TODO [new file with mode: 0644]
fs/btrfs/acl.c [new file with mode: 0644]
fs/btrfs/async-thread.c [new file with mode: 0644]
fs/btrfs/async-thread.h [new file with mode: 0644]
fs/btrfs/bit-radix.c [new file with mode: 0644]
fs/btrfs/bit-radix.h [new file with mode: 0644]
fs/btrfs/btrfs_inode.h [new file with mode: 0644]
fs/btrfs/compat.h [new file with mode: 0644]
fs/btrfs/crc32c.h [new file with mode: 0644]
fs/btrfs/ctree.c [new file with mode: 0644]
fs/btrfs/ctree.h [new file with mode: 0644]
fs/btrfs/dir-item.c [new file with mode: 0644]
fs/btrfs/disk-io.c [new file with mode: 0644]
fs/btrfs/disk-io.h [new file with mode: 0644]
fs/btrfs/export.c [new file with mode: 0644]
fs/btrfs/export.h [new file with mode: 0644]
fs/btrfs/extent-tree.c [new file with mode: 0644]
fs/btrfs/extent_io.c [new file with mode: 0644]
fs/btrfs/extent_io.h [new file with mode: 0644]
fs/btrfs/extent_map.c [new file with mode: 0644]
fs/btrfs/extent_map.h [new file with mode: 0644]
fs/btrfs/file-item.c [new file with mode: 0644]
fs/btrfs/file.c [new file with mode: 0644]
fs/btrfs/free-space-cache.c [new file with mode: 0644]
fs/btrfs/hash.h [new file with mode: 0644]
fs/btrfs/inode-item.c [new file with mode: 0644]
fs/btrfs/inode-map.c [new file with mode: 0644]
fs/btrfs/inode.c [new file with mode: 0644]
fs/btrfs/ioctl.c [new file with mode: 0644]
fs/btrfs/ioctl.h [new file with mode: 0644]
fs/btrfs/locking.c [new file with mode: 0644]
fs/btrfs/locking.h [new file with mode: 0644]
fs/btrfs/ordered-data.c [new file with mode: 0644]
fs/btrfs/ordered-data.h [new file with mode: 0644]
fs/btrfs/orphan.c [new file with mode: 0644]
fs/btrfs/print-tree.c [new file with mode: 0644]
fs/btrfs/print-tree.h [new file with mode: 0644]
fs/btrfs/ref-cache.c [new file with mode: 0644]
fs/btrfs/ref-cache.h [new file with mode: 0644]
fs/btrfs/root-tree.c [new file with mode: 0644]
fs/btrfs/struct-funcs.c [new file with mode: 0644]
fs/btrfs/super.c [new file with mode: 0644]
fs/btrfs/sysfs.c [new file with mode: 0644]
fs/btrfs/transaction.c [new file with mode: 0644]
fs/btrfs/transaction.h [new file with mode: 0644]
fs/btrfs/tree-defrag.c [new file with mode: 0644]
fs/btrfs/tree-log.c [new file with mode: 0644]
fs/btrfs/tree-log.h [new file with mode: 0644]
fs/btrfs/version.sh [new file with mode: 0644]
fs/btrfs/volumes.c [new file with mode: 0644]
fs/btrfs/volumes.h [new file with mode: 0644]
fs/btrfs/xattr.c [new file with mode: 0644]
fs/btrfs/xattr.h [new file with mode: 0644]

diff --git a/fs/btrfs/COPYING b/fs/btrfs/COPYING
new file mode 100644 (file)
index 0000000..ca442d3
--- /dev/null
@@ -0,0 +1,356 @@
+
+   NOTE! This copyright does *not* cover user programs that use kernel
+ services by normal system calls - this is merely considered normal use
+ of the kernel, and does *not* fall under the heading of "derived work".
+ Also note that the GPL below is copyrighted by the Free Software
+ Foundation, but the instance of code that it refers to (the Linux
+ kernel) is copyrighted by me and others who actually wrote it.
+
+ Also note that the only valid version of the GPL as far as the kernel
+ is concerned is _this_ particular version of the license (ie v2, not
+ v2.2 or v3.x or whatever), unless explicitly otherwise stated.
+
+                       Linus Torvalds
+
+----------------------------------------
+
+                   GNU GENERAL PUBLIC LICENSE
+                      Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.
+                       51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+                           Preamble
+
+  The licenses for most software are designed to take away your
+freedom to share and change it.  By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users.  This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it.  (Some other Free Software Foundation software is covered by
+the GNU Library General Public License instead.)  You can apply it to
+your programs, too.
+
+  When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+  To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+  For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have.  You must make sure that they, too, receive or can get the
+source code.  And you must show them these terms so they know their
+rights.
+
+  We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+  Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software.  If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+  Finally, any free program is threatened constantly by software
+patents.  We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary.  To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.
+\f
+                   GNU GENERAL PUBLIC LICENSE
+   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+  0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License.  The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language.  (Hereinafter, translation is included without limitation in
+the term "modification".)  Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope.  The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+  1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+  2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+    a) You must cause the modified files to carry prominent notices
+    stating that you changed the files and the date of any change.
+
+    b) You must cause any work that you distribute or publish, that in
+    whole or in part contains or is derived from the Program or any
+    part thereof, to be licensed as a whole at no charge to all third
+    parties under the terms of this License.
+
+    c) If the modified program normally reads commands interactively
+    when run, you must cause it, when started running for such
+    interactive use in the most ordinary way, to print or display an
+    announcement including an appropriate copyright notice and a
+    notice that there is no warranty (or else, saying that you provide
+    a warranty) and that users may redistribute the program under
+    these conditions, and telling the user how to view a copy of this
+    License.  (Exception: if the Program itself is interactive but
+    does not normally print such an announcement, your work based on
+    the Program is not required to print an announcement.)
+\f
+These requirements apply to the modified work as a whole.  If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works.  But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+  3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+    a) Accompany it with the complete corresponding machine-readable
+    source code, which must be distributed under the terms of Sections
+    1 and 2 above on a medium customarily used for software interchange; or,
+
+    b) Accompany it with a written offer, valid for at least three
+    years, to give any third party, for a charge no more than your
+    cost of physically performing source distribution, a complete
+    machine-readable copy of the corresponding source code, to be
+    distributed under the terms of Sections 1 and 2 above on a medium
+    customarily used for software interchange; or,
+
+    c) Accompany it with the information you received as to the offer
+    to distribute corresponding source code.  (This alternative is
+    allowed only for noncommercial distribution and only if you
+    received the program in object code or executable form with such
+    an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it.  For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable.  However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+\f
+  4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License.  Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+  5. You are not required to accept this License, since you have not
+signed it.  However, nothing else grants you permission to modify or
+distribute the Program or its derivative works.  These actions are
+prohibited by law if you do not accept this License.  Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+  6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions.  You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+  7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all.  For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices.  Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+\f
+  8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded.  In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+  9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time.  Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number.  If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation.  If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+  10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission.  For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this.  Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+                           NO WARRANTY
+
+  11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+  12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+                    END OF TERMS AND CONDITIONS
+\f
+           How to Apply These Terms to Your New Programs
+
+  If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+  To do so, attach the following notices to the program.  It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+    <one line to give the program's name and a brief idea of what it does.>
+    Copyright (C) <year>  <name of author>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+    Gnomovision version 69, Copyright (C) year name of author
+    Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+    This is free software, and you are welcome to redistribute it
+    under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License.  Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary.  Here is a sample; alter the names:
+
+  Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+  `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+  <signature of Ty Coon>, 1 April 1989
+  Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs.  If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library.  If this is what you want to do, use the GNU Library General
+Public License instead of this License.
diff --git a/fs/btrfs/INSTALL b/fs/btrfs/INSTALL
new file mode 100644 (file)
index 0000000..16b45a5
--- /dev/null
@@ -0,0 +1,48 @@
+Install Instructions
+
+Btrfs puts snapshots and subvolumes into the root directory of the FS.  This
+directory can only be changed by btrfsctl right now, and normal filesystem
+operations do not work on it.  The default subvolume is called 'default',
+and you can create files and directories in mount_point/default
+
+Btrfs uses libcrc32c in the kernel for file and metadata checksums.  You need
+to compile the kernel with:
+
+CONFIG_LIBCRC32C=m
+
+libcrc32c can be static as well.  Once your kernel is setup, typing make in the
+btrfs module sources will build against the running kernel.  When the build is
+complete:
+
+modprobe libcrc32c
+insmod btrfs.ko
+
+The Btrfs utility programs require libuuid to build.  This can be found
+in the e2fsprogs sources, and is usually available as libuuid or
+e2fsprogs-devel from various distros.
+
+Building the utilities is just make ; make install.  The programs go
+into /usr/local/bin.  The commands available are:
+
+mkfs.btrfs: create a filesystem
+
+btrfsctl: control program to create snapshots and subvolumes:
+
+       mount /dev/sda2 /mnt
+       btrfsctl -s new_subvol_name /mnt
+       btrfsctl -s snapshot_of_default /mnt/default
+       btrfsctl -s snapshot_of_new_subvol /mnt/new_subvol_name
+       btrfsctl -s snapshot_of_a_snapshot /mnt/snapshot_of_new_subvol
+       ls /mnt
+       default snapshot_of_a_snapshot snapshot_of_new_subvol
+       new_subvol_name snapshot_of_default
+
+       Snapshots and subvolumes cannot be deleted right now, but you can
+       rm -rf all the files and directories inside them.
+
+btrfsck: do a limited check of the FS extent trees.</li>
+
+debug-tree: print all of the FS metadata in text form.  Example:
+
+       debug-tree /dev/sda2 >& big_output_file
+
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile
new file mode 100644 (file)
index 0000000..eb36ae9
--- /dev/null
@@ -0,0 +1,29 @@
+ifneq ($(KERNELRELEASE),)
+# kbuild part of makefile
+
+obj-m  := btrfs.o
+btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
+          file-item.o inode-item.o inode-map.o disk-io.o \
+          transaction.o bit-radix.o inode.o file.o tree-defrag.o \
+          extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \
+          extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \
+          ref-cache.o export.o tree-log.o acl.o free-space-cache.o
+else
+
+# Normal Makefile
+
+KERNELDIR := /lib/modules/`uname -r`/build
+all: version
+       $(MAKE) -C $(KERNELDIR) M=`pwd` modules
+
+version:
+       bash version.sh
+
+modules_install:
+       $(MAKE) -C $(KERNELDIR) M=`pwd` modules_install
+clean:
+       $(MAKE) -C $(KERNELDIR) M=`pwd` clean
+
+tester:
+       $(MAKE) -C $(KERNELDIR) M=`pwd` tree-defrag.o transaction.o sysfs.o super.o root-tree.o inode-map.o inode-item.o inode.o file-item.o file.o extent_map.o disk-io.o ctree.o dir-item.o extent-tree.o
+endif
diff --git a/fs/btrfs/TODO b/fs/btrfs/TODO
new file mode 100644 (file)
index 0000000..d9b6d38
--- /dev/null
@@ -0,0 +1,20 @@
+* cleanup, add more error checking, get rid of BUG_ONs
+* Fix ENOSPC handling
+* Make allocator smarter
+* add a block group to struct inode
+* Do actual block accounting
+* Check compat and incompat flags on the inode
+* Get rid of struct ctree_path, limiting tree levels held at one time
+* Add generation number to key pointer in nodes
+* Add generation number to inode
+* forbid cross subvolume renames and hardlinks
+* Release
+* Do real tree locking
+* Add extent mirroring (backup copies of blocks)
+* Add fancy interface to get access to incremental backups
+* Add fancy striped extents to make big reads faster
+* Use relocation to try and fix write errors
+* Make allocator much smarter
+* xattrs (directory streams for regular files)
+* Scrub & defrag
+
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
new file mode 100644 (file)
index 0000000..867eaf1
--- /dev/null
@@ -0,0 +1,352 @@
+/*
+ * Copyright (C) 2007 Red Hat.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include <linux/fs.h>
+#include <linux/string.h>
+#include <linux/xattr.h>
+#include <linux/posix_acl_xattr.h>
+#include <linux/posix_acl.h>
+#include <linux/sched.h>
+
+#include "ctree.h"
+#include "btrfs_inode.h"
+#include "xattr.h"
+
+#ifdef CONFIG_FS_POSIX_ACL
+
+static void btrfs_update_cached_acl(struct inode *inode,
+                                   struct posix_acl **p_acl,
+                                   struct posix_acl *acl)
+{
+       spin_lock(&inode->i_lock);
+       if (*p_acl && *p_acl != BTRFS_ACL_NOT_CACHED)
+               posix_acl_release(*p_acl);
+       *p_acl = posix_acl_dup(acl);
+       spin_unlock(&inode->i_lock);
+}
+
+static struct posix_acl *btrfs_get_acl(struct inode *inode, int type)
+{
+       int size;
+       const char *name;
+       char *value = NULL;
+       struct posix_acl *acl = NULL, **p_acl;
+
+       switch (type) {
+       case ACL_TYPE_ACCESS:
+               name = POSIX_ACL_XATTR_ACCESS;
+               p_acl = &BTRFS_I(inode)->i_acl;
+               break;
+       case ACL_TYPE_DEFAULT:
+               name = POSIX_ACL_XATTR_DEFAULT;
+               p_acl = &BTRFS_I(inode)->i_default_acl;
+               break;
+       default:
+               return ERR_PTR(-EINVAL);
+       }
+
+       spin_lock(&inode->i_lock);
+       if (*p_acl != BTRFS_ACL_NOT_CACHED)
+               acl = posix_acl_dup(*p_acl);
+       spin_unlock(&inode->i_lock);
+
+       if (acl)
+               return acl;
+
+
+       size = __btrfs_getxattr(inode, name, "", 0);
+       if (size > 0) {
+               value = kzalloc(size, GFP_NOFS);
+               if (!value)
+                       return ERR_PTR(-ENOMEM);
+               size = __btrfs_getxattr(inode, name, value, size);
+               if (size > 0) {
+                       acl = posix_acl_from_xattr(value, size);
+                       btrfs_update_cached_acl(inode, p_acl, acl);
+               }
+               kfree(value);
+       } else if (size == -ENOENT) {
+               acl = NULL;
+               btrfs_update_cached_acl(inode, p_acl, acl);
+       }
+
+       return acl;
+}
+
+static int btrfs_xattr_get_acl(struct inode *inode, int type,
+                              void *value, size_t size)
+{
+       struct posix_acl *acl;
+       int ret = 0;
+
+       acl = btrfs_get_acl(inode, type);
+
+       if (IS_ERR(acl))
+               return PTR_ERR(acl);
+       if (acl == NULL)
+               return -ENODATA;
+       ret = posix_acl_to_xattr(acl, value, size);
+       posix_acl_release(acl);
+
+       return ret;
+}
+
+/*
+ * Needs to be called with fs_mutex held
+ */
+static int btrfs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
+{
+       int ret, size = 0;
+       const char *name;
+       struct posix_acl **p_acl;
+       char *value = NULL;
+       mode_t mode;
+
+       if (acl) {
+               ret = posix_acl_valid(acl);
+               if (ret < 0)
+                       return ret;
+               ret = 0;
+       }
+
+       switch (type) {
+       case ACL_TYPE_ACCESS:
+               mode = inode->i_mode;
+               ret = posix_acl_equiv_mode(acl, &mode);
+               if (ret < 0)
+                       return ret;
+               ret = 0;
+               inode->i_mode = mode;
+               name = POSIX_ACL_XATTR_ACCESS;
+               p_acl = &BTRFS_I(inode)->i_acl;
+               break;
+       case ACL_TYPE_DEFAULT:
+               if (!S_ISDIR(inode->i_mode))
+                       return acl ? -EINVAL : 0;
+               name = POSIX_ACL_XATTR_DEFAULT;
+               p_acl = &BTRFS_I(inode)->i_default_acl;
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       if (acl) {
+               size = posix_acl_xattr_size(acl->a_count);
+               value = kmalloc(size, GFP_NOFS);
+               if (!value) {
+                       ret = -ENOMEM;
+                       goto out;
+               }
+
+               ret = posix_acl_to_xattr(acl, value, size);
+               if (ret < 0)
+                       goto out;
+       }
+
+       ret = __btrfs_setxattr(inode, name, value, size, 0);
+
+out:
+       if (value)
+               kfree(value);
+
+       if (!ret)
+               btrfs_update_cached_acl(inode, p_acl, acl);
+
+       return ret;
+}
+
+static int btrfs_xattr_set_acl(struct inode *inode, int type,
+                              const void *value, size_t size)
+{
+       int ret = 0;
+       struct posix_acl *acl = NULL;
+
+       if (value) {
+               acl = posix_acl_from_xattr(value, size);
+               if (acl == NULL) {
+                       value = NULL;
+                       size = 0;
+               } else if (IS_ERR(acl)) {
+                       return PTR_ERR(acl);
+               }
+       }
+
+       ret = btrfs_set_acl(inode, acl, type);
+
+       posix_acl_release(acl);
+
+       return ret;
+}
+
+
+static int btrfs_xattr_acl_access_get(struct inode *inode, const char *name,
+                                     void *value, size_t size)
+{
+       return btrfs_xattr_get_acl(inode, ACL_TYPE_ACCESS, value, size);
+}
+
+static int btrfs_xattr_acl_access_set(struct inode *inode, const char *name,
+                                     const void *value, size_t size, int flags)
+{
+       return btrfs_xattr_set_acl(inode, ACL_TYPE_ACCESS, value, size);
+}
+
+static int btrfs_xattr_acl_default_get(struct inode *inode, const char *name,
+                                      void *value, size_t size)
+{
+       return btrfs_xattr_get_acl(inode, ACL_TYPE_DEFAULT, value, size);
+}
+
+static int btrfs_xattr_acl_default_set(struct inode *inode, const char *name,
+                                      const void *value, size_t size, int flags)
+{
+       return btrfs_xattr_set_acl(inode, ACL_TYPE_DEFAULT, value, size);
+}
+
+int btrfs_check_acl(struct inode *inode, int mask)
+{
+       struct posix_acl *acl;
+       int error = -EAGAIN;
+
+       acl = btrfs_get_acl(inode, ACL_TYPE_ACCESS);
+
+       if (IS_ERR(acl))
+               return PTR_ERR(acl);
+       if (acl) {
+               error = posix_acl_permission(inode, acl, mask);
+               posix_acl_release(acl);
+       }
+
+       return error;
+}
+
+/*
+ * btrfs_init_acl is already generally called under fs_mutex, so the locking
+ * stuff has been fixed to work with that.  If the locking stuff changes, we
+ * need to re-evaluate the acl locking stuff.
+ */
+int btrfs_init_acl(struct inode *inode, struct inode *dir)
+{
+       struct posix_acl *acl = NULL;
+       int ret = 0;
+
+       /* this happens with subvols */
+       if (!dir)
+               return 0;
+
+       if (!S_ISLNK(inode->i_mode)) {
+               if (IS_POSIXACL(dir)) {
+                       acl = btrfs_get_acl(dir, ACL_TYPE_DEFAULT);
+                       if (IS_ERR(acl))
+                               return PTR_ERR(acl);
+               }
+
+               if (!acl)
+                       inode->i_mode &= ~current->fs->umask;
+       }
+
+       if (IS_POSIXACL(dir) && acl) {
+               struct posix_acl *clone;
+               mode_t mode;
+
+               if (S_ISDIR(inode->i_mode)) {
+                       ret = btrfs_set_acl(inode, acl, ACL_TYPE_DEFAULT);
+                       if (ret)
+                               goto failed;
+               }
+               clone = posix_acl_clone(acl, GFP_NOFS);
+               ret = -ENOMEM;
+               if (!clone)
+                       goto failed;
+
+               mode = inode->i_mode;
+               ret = posix_acl_create_masq(clone, &mode);
+               if (ret >= 0) {
+                       inode->i_mode = mode;
+                       if (ret > 0) {
+                               /* we need an acl */
+                               ret = btrfs_set_acl(inode, clone,
+                                                   ACL_TYPE_ACCESS);
+                       }
+               }
+       }
+failed:
+       posix_acl_release(acl);
+
+       return ret;
+}
+
+int btrfs_acl_chmod(struct inode *inode)
+{
+       struct posix_acl *acl, *clone;
+       int ret = 0;
+
+       if (S_ISLNK(inode->i_mode))
+               return -EOPNOTSUPP;
+
+       if (!IS_POSIXACL(inode))
+               return 0;
+
+       acl = btrfs_get_acl(inode, ACL_TYPE_ACCESS);
+       if (IS_ERR(acl) || !acl)
+               return PTR_ERR(acl);
+
+       clone = posix_acl_clone(acl, GFP_KERNEL);
+       posix_acl_release(acl);
+       if (!clone)
+               return -ENOMEM;
+
+       ret = posix_acl_chmod_masq(clone, inode->i_mode);
+       if (!ret)
+               ret = btrfs_set_acl(inode, clone, ACL_TYPE_ACCESS);
+
+       posix_acl_release(clone);
+
+       return ret;
+}
+
+struct xattr_handler btrfs_xattr_acl_default_handler = {
+       .prefix = POSIX_ACL_XATTR_DEFAULT,
+       .get    = btrfs_xattr_acl_default_get,
+       .set    = btrfs_xattr_acl_default_set,
+};
+
+struct xattr_handler btrfs_xattr_acl_access_handler = {
+       .prefix = POSIX_ACL_XATTR_ACCESS,
+       .get    = btrfs_xattr_acl_access_get,
+       .set    = btrfs_xattr_acl_access_set,
+};
+
+#else /* CONFIG_FS_POSIX_ACL */
+
+int btrfs_acl_chmod(struct inode *inode)
+{
+       return 0;
+}
+
+int btrfs_init_acl(struct inode *inode, struct inode *dir)
+{
+       return 0;
+}
+
+int btrfs_check_acl(struct inode *inode, int mask)
+{
+       return 0;
+}
+
+#endif /* CONFIG_FS_POSIX_ACL */
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c
new file mode 100644 (file)
index 0000000..2ee3017
--- /dev/null
@@ -0,0 +1,343 @@
+/*
+ * Copyright (C) 2007 Oracle.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include <linux/version.h>
+#include <linux/kthread.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,20)
+# include <linux/freezer.h>
+#else
+# include <linux/sched.h>
+#endif
+
+#include "async-thread.h"
+
+/*
+ * container for the kthread task pointer and the list of pending work
+ * One of these is allocated per thread.
+ */
+struct btrfs_worker_thread {
+       /* pool we belong to */
+       struct btrfs_workers *workers;
+
+       /* list of struct btrfs_work that are waiting for service */
+       struct list_head pending;
+
+       /* list of worker threads from struct btrfs_workers */
+       struct list_head worker_list;
+
+       /* kthread */
+       struct task_struct *task;
+
+       /* number of things on the pending list */
+       atomic_t num_pending;
+
+       unsigned long sequence;
+
+       /* protects the pending list. */
+       spinlock_t lock;
+
+       /* set to non-zero when this thread is already awake and kicking */
+       int working;
+
+       /* are we currently idle */
+       int idle;
+};
+
+/*
+ * helper function to move a thread onto the idle list after it
+ * has finished some requests.
+ */
+static void check_idle_worker(struct btrfs_worker_thread *worker)
+{
+       if (!worker->idle && atomic_read(&worker->num_pending) <
+           worker->workers->idle_thresh / 2) {
+               unsigned long flags;
+               spin_lock_irqsave(&worker->workers->lock, flags);
+               worker->idle = 1;
+               list_move(&worker->worker_list, &worker->workers->idle_list);
+               spin_unlock_irqrestore(&worker->workers->lock, flags);
+       }
+}
+
+/*
+ * helper function to move a thread off the idle list after new
+ * pending work is added.
+ */
+static void check_busy_worker(struct btrfs_worker_thread *worker)
+{
+       if (worker->idle && atomic_read(&worker->num_pending) >=
+           worker->workers->idle_thresh) {
+               unsigned long flags;
+               spin_lock_irqsave(&worker->workers->lock, flags);
+               worker->idle = 0;
+               list_move_tail(&worker->worker_list,
+                              &worker->workers->worker_list);
+               spin_unlock_irqrestore(&worker->workers->lock, flags);
+       }
+}
+
+/*
+ * main loop for servicing work items
+ */
+static int worker_loop(void *arg)
+{
+       struct btrfs_worker_thread *worker = arg;
+       struct list_head *cur;
+       struct btrfs_work *work;
+       do {
+               spin_lock_irq(&worker->lock);
+               while(!list_empty(&worker->pending)) {
+                       cur = worker->pending.next;
+                       work = list_entry(cur, struct btrfs_work, list);
+                       list_del(&work->list);
+                       clear_bit(0, &work->flags);
+
+                       work->worker = worker;
+                       spin_unlock_irq(&worker->lock);
+
+                       work->func(work);
+
+                       atomic_dec(&worker->num_pending);
+                       spin_lock_irq(&worker->lock);
+                       check_idle_worker(worker);
+               }
+               worker->working = 0;
+               if (freezing(current)) {
+                       refrigerator();
+               } else {
+                       set_current_state(TASK_INTERRUPTIBLE);
+                       spin_unlock_irq(&worker->lock);
+                       schedule();
+                       __set_current_state(TASK_RUNNING);
+               }
+       } while (!kthread_should_stop());
+       return 0;
+}
+
+/*
+ * this will wait for all the worker threads to shutdown
+ */
+int btrfs_stop_workers(struct btrfs_workers *workers)
+{
+       struct list_head *cur;
+       struct btrfs_worker_thread *worker;
+
+       list_splice_init(&workers->idle_list, &workers->worker_list);
+       while(!list_empty(&workers->worker_list)) {
+               cur = workers->worker_list.next;
+               worker = list_entry(cur, struct btrfs_worker_thread,
+                                   worker_list);
+               kthread_stop(worker->task);
+               list_del(&worker->worker_list);
+               kfree(worker);
+       }
+       return 0;
+}
+
+/*
+ * simple init on struct btrfs_workers
+ */
+void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max)
+{
+       workers->num_workers = 0;
+       INIT_LIST_HEAD(&workers->worker_list);
+       INIT_LIST_HEAD(&workers->idle_list);
+       spin_lock_init(&workers->lock);
+       workers->max_workers = max;
+       workers->idle_thresh = 32;
+       workers->name = name;
+}
+
+/*
+ * starts new worker threads.  This does not enforce the max worker
+ * count in case you need to temporarily go past it.
+ */
+int btrfs_start_workers(struct btrfs_workers *workers, int num_workers)
+{
+       struct btrfs_worker_thread *worker;
+       int ret = 0;
+       int i;
+
+       for (i = 0; i < num_workers; i++) {
+               worker = kzalloc(sizeof(*worker), GFP_NOFS);
+               if (!worker) {
+                       ret = -ENOMEM;
+                       goto fail;
+               }
+
+               INIT_LIST_HEAD(&worker->pending);
+               INIT_LIST_HEAD(&worker->worker_list);
+               spin_lock_init(&worker->lock);
+               atomic_set(&worker->num_pending, 0);
+               worker->task = kthread_run(worker_loop, worker,
+                                          "btrfs-%s-%d", workers->name,
+                                          workers->num_workers + i);
+               worker->workers = workers;
+               if (IS_ERR(worker->task)) {
+                       kfree(worker);
+                       ret = PTR_ERR(worker->task);
+                       goto fail;
+               }
+
+               spin_lock_irq(&workers->lock);
+               list_add_tail(&worker->worker_list, &workers->idle_list);
+               worker->idle = 1;
+               workers->num_workers++;
+               spin_unlock_irq(&workers->lock);
+       }
+       return 0;
+fail:
+       btrfs_stop_workers(workers);
+       return ret;
+}
+
+/*
+ * run through the list and find a worker thread that doesn't have a lot
+ * to do right now.  This can return null if we aren't yet at the thread
+ * count limit and all of the threads are busy.
+ */
+static struct btrfs_worker_thread *next_worker(struct btrfs_workers *workers)
+{
+       struct btrfs_worker_thread *worker;
+       struct list_head *next;
+       int enforce_min = workers->num_workers < workers->max_workers;
+
+       /*
+        * if we find an idle thread, don't move it to the end of the
+        * idle list.  This improves the chance that the next submission
+        * will reuse the same thread, and maybe catch it while it is still
+        * working
+        */
+       if (!list_empty(&workers->idle_list)) {
+               next = workers->idle_list.next;
+               worker = list_entry(next, struct btrfs_worker_thread,
+                                   worker_list);
+               return worker;
+       }
+       if (enforce_min || list_empty(&workers->worker_list))
+               return NULL;
+
+       /*
+        * if we pick a busy task, move the task to the end of the list.
+        * hopefully this will keep things somewhat evenly balanced
+        */
+       next = workers->worker_list.next;
+       worker = list_entry(next, struct btrfs_worker_thread, worker_list);
+       atomic_inc(&worker->num_pending);
+       worker->sequence++;
+       if (worker->sequence % workers->idle_thresh == 0)
+               list_move_tail(next, &workers->worker_list);
+       return worker;
+}
+
+static struct btrfs_worker_thread *find_worker(struct btrfs_workers *workers)
+{
+       struct btrfs_worker_thread *worker;
+       unsigned long flags;
+
+again:
+       spin_lock_irqsave(&workers->lock, flags);
+       worker = next_worker(workers);
+       spin_unlock_irqrestore(&workers->lock, flags);
+
+       if (!worker) {
+               spin_lock_irqsave(&workers->lock, flags);
+               if (workers->num_workers >= workers->max_workers) {
+                       struct list_head *fallback = NULL;
+                       /*
+                        * we have failed to find any workers, just
+                        * return the force one
+                        */
+                       if (!list_empty(&workers->worker_list))
+                               fallback = workers->worker_list.next;
+                       if (!list_empty(&workers->idle_list))
+                               fallback = workers->idle_list.next;
+                       BUG_ON(!fallback);
+                       worker = list_entry(fallback,
+                                 struct btrfs_worker_thread, worker_list);
+                       spin_unlock_irqrestore(&workers->lock, flags);
+               } else {
+                       spin_unlock_irqrestore(&workers->lock, flags);
+                       /* we're below the limit, start another worker */
+                       btrfs_start_workers(workers, 1);
+                       goto again;
+               }
+       }
+       return worker;
+}
+
+/*
+ * btrfs_requeue_work just puts the work item back on the tail of the list
+ * it was taken from.  It is intended for use with long running work functions
+ * that make some progress and want to give the cpu up for others.
+ */
+int btrfs_requeue_work(struct btrfs_work *work)
+{
+       struct btrfs_worker_thread *worker = work->worker;
+       unsigned long flags;
+
+       if (test_and_set_bit(0, &work->flags))
+               goto out;
+
+       spin_lock_irqsave(&worker->lock, flags);
+       atomic_inc(&worker->num_pending);
+       list_add_tail(&work->list, &worker->pending);
+       check_busy_worker(worker);
+       spin_unlock_irqrestore(&worker->lock, flags);
+out:
+       return 0;
+}
+
+/*
+ * places a struct btrfs_work into the pending queue of one of the kthreads
+ */
+int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work)
+{
+       struct btrfs_worker_thread *worker;
+       unsigned long flags;
+       int wake = 0;
+
+       /* don't requeue something already on a list */
+       if (test_and_set_bit(0, &work->flags))
+               goto out;
+
+       worker = find_worker(workers);
+
+       spin_lock_irqsave(&worker->lock, flags);
+       atomic_inc(&worker->num_pending);
+       check_busy_worker(worker);
+       list_add_tail(&work->list, &worker->pending);
+
+       /*
+        * avoid calling into wake_up_process if this thread has already
+        * been kicked
+        */
+       if (!worker->working)
+               wake = 1;
+       worker->working = 1;
+
+       spin_unlock_irqrestore(&worker->lock, flags);
+
+       if (wake)
+               wake_up_process(worker->task);
+out:
+       return 0;
+}
diff --git a/fs/btrfs/async-thread.h b/fs/btrfs/async-thread.h
new file mode 100644 (file)
index 0000000..43e44d1
--- /dev/null
@@ -0,0 +1,82 @@
+/*
+ * Copyright (C) 2007 Oracle.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#ifndef __BTRFS_ASYNC_THREAD_
+#define __BTRFS_ASYNC_THREAD_
+
+struct btrfs_worker_thread;
+
+/*
+ * This is similar to a workqueue, but it is meant to spread the operations
+ * across all available cpus instead of just the CPU that was used to
+ * queue the work.  There is also some batching introduced to try and
+ * cut down on context switches.
+ *
+ * By default threads are added on demand up to 2 * the number of cpus.
+ * Changing struct btrfs_workers->max_workers is one way to prevent
+ * demand creation of kthreads.
+ *
+ * the basic model of these worker threads is to embed a btrfs_work
+ * structure in your own data struct, and use container_of in a
+ * work function to get back to your data struct.
+ */
+struct btrfs_work {
+       /*
+        * only func should be set to the function you want called
+        * your work struct is passed as the only arg
+        */
+       void (*func)(struct btrfs_work *work);
+
+       /*
+        * flags should be set to zero.  It is used to make sure the
+        * struct is only inserted once into the list.
+        */
+       unsigned long flags;
+
+       /* don't touch these */
+       struct btrfs_worker_thread *worker;
+       struct list_head list;
+};
+
+struct btrfs_workers {
+       /* current number of running workers */
+       int num_workers;
+
+       /* max number of workers allowed.  changed by btrfs_start_workers */
+       int max_workers;
+
+       /* once a worker has this many requests or fewer, it is idle */
+       int idle_thresh;
+
+       /* list with all the work threads */
+       struct list_head worker_list;
+       struct list_head idle_list;
+
+       /* lock for finding the next worker thread to queue on */
+       spinlock_t lock;
+
+       /* extra name for this worker */
+       char *name;
+};
+
+int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work);
+int btrfs_start_workers(struct btrfs_workers *workers, int num_workers);
+int btrfs_stop_workers(struct btrfs_workers *workers);
+void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max);
+int btrfs_requeue_work(struct btrfs_work *work);
+#endif
diff --git a/fs/btrfs/bit-radix.c b/fs/btrfs/bit-radix.c
new file mode 100644 (file)
index 0000000..e8bf876
--- /dev/null
@@ -0,0 +1,130 @@
+/*
+ * Copyright (C) 2007 Oracle.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include "bit-radix.h"
+
+#define BIT_ARRAY_BYTES 256
+#define BIT_RADIX_BITS_PER_ARRAY ((BIT_ARRAY_BYTES - sizeof(unsigned long)) * 8)
+
+extern struct kmem_cache *btrfs_bit_radix_cachep;
+int set_radix_bit(struct radix_tree_root *radix, unsigned long bit)
+{
+       unsigned long *bits;
+       unsigned long slot;
+       int bit_slot;
+       int ret;
+
+       slot = bit / BIT_RADIX_BITS_PER_ARRAY;
+       bit_slot = bit % BIT_RADIX_BITS_PER_ARRAY;
+
+       bits = radix_tree_lookup(radix, slot);
+       if (!bits) {
+               bits = kmem_cache_alloc(btrfs_bit_radix_cachep, GFP_NOFS);
+               if (!bits)
+                       return -ENOMEM;
+               memset(bits + 1, 0, BIT_ARRAY_BYTES - sizeof(unsigned long));
+               bits[0] = slot;
+               ret = radix_tree_insert(radix, slot, bits);
+               if (ret)
+                       return ret;
+       }
+       ret = test_and_set_bit(bit_slot, bits + 1);
+       if (ret < 0)
+               ret = 1;
+       return ret;
+}
+
+int test_radix_bit(struct radix_tree_root *radix, unsigned long bit)
+{
+       unsigned long *bits;
+       unsigned long slot;
+       int bit_slot;
+
+       slot = bit / BIT_RADIX_BITS_PER_ARRAY;
+       bit_slot = bit % BIT_RADIX_BITS_PER_ARRAY;
+
+       bits = radix_tree_lookup(radix, slot);
+       if (!bits)
+               return 0;
+       return test_bit(bit_slot, bits + 1);
+}
+
+int clear_radix_bit(struct radix_tree_root *radix, unsigned long bit)
+{
+       unsigned long *bits;
+       unsigned long slot;
+       int bit_slot;
+       int i;
+       int empty = 1;
+
+       slot = bit / BIT_RADIX_BITS_PER_ARRAY;
+       bit_slot = bit % BIT_RADIX_BITS_PER_ARRAY;
+
+       bits = radix_tree_lookup(radix, slot);
+       if (!bits)
+               return 0;
+       clear_bit(bit_slot, bits + 1);
+       for (i = 1; i < BIT_ARRAY_BYTES / sizeof(unsigned long); i++) {
+               if (bits[i]) {
+                       empty = 0;
+                       break;
+               }
+       }
+       if (empty) {
+               bits = radix_tree_delete(radix, slot);
+               BUG_ON(!bits);
+               kmem_cache_free(btrfs_bit_radix_cachep, bits);
+       }
+       return 0;
+}
+
+int find_first_radix_bit(struct radix_tree_root *radix, unsigned long *retbits,
+                        unsigned long start, int nr)
+{
+       unsigned long *bits;
+       unsigned long *gang[4];
+       int found;
+       int ret;
+       int i;
+       int total_found = 0;
+       unsigned long slot;
+
+       slot = start / BIT_RADIX_BITS_PER_ARRAY;
+       ret = radix_tree_gang_lookup(radix, (void **)gang, slot,
+                                    ARRAY_SIZE(gang));
+       found = start % BIT_RADIX_BITS_PER_ARRAY;
+       for (i = 0; i < ret && nr > 0; i++) {
+               bits = gang[i];
+               while(nr > 0) {
+                       found = find_next_bit(bits + 1,
+                                             BIT_RADIX_BITS_PER_ARRAY,
+                                             found);
+                       if (found < BIT_RADIX_BITS_PER_ARRAY) {
+                               *retbits = bits[0] *
+                                       BIT_RADIX_BITS_PER_ARRAY + found;
+                               retbits++;
+                               nr--;
+                               total_found++;
+                               found++;
+                       } else
+                               break;
+               }
+               found = 0;
+       }
+       return total_found;
+}
diff --git a/fs/btrfs/bit-radix.h b/fs/btrfs/bit-radix.h
new file mode 100644 (file)
index 0000000..c100f54
--- /dev/null
@@ -0,0 +1,33 @@
+/*
+ * Copyright (C) 2007 Oracle.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#ifndef __BIT_RADIX__
+#define __BIT_RADIX__
+#include <linux/radix-tree.h>
+
+int set_radix_bit(struct radix_tree_root *radix, unsigned long bit);
+int test_radix_bit(struct radix_tree_root *radix, unsigned long bit);
+int clear_radix_bit(struct radix_tree_root *radix, unsigned long bit);
+int find_first_radix_bit(struct radix_tree_root *radix, unsigned long *retbits,
+                        unsigned long start, int nr);
+
+static inline void init_bit_radix(struct radix_tree_root *radix)
+{
+       INIT_RADIX_TREE(radix, GFP_NOFS);
+}
+#endif
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
new file mode 100644 (file)
index 0000000..0577fda
--- /dev/null
@@ -0,0 +1,85 @@
+/*
+ * Copyright (C) 2007 Oracle.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#ifndef __BTRFS_I__
+#define __BTRFS_I__
+
+#include "extent_map.h"
+#include "extent_io.h"
+#include "ordered-data.h"
+
+/* in memory btrfs inode */
+struct btrfs_inode {
+       struct btrfs_root *root;
+       struct btrfs_block_group_cache *block_group;
+       struct btrfs_key location;
+       struct extent_map_tree extent_tree;
+       struct extent_io_tree io_tree;
+       struct extent_io_tree io_failure_tree;
+       struct mutex csum_mutex;
+       struct mutex extent_mutex;
+       struct mutex log_mutex;
+       struct inode vfs_inode;
+       struct btrfs_ordered_inode_tree ordered_tree;
+
+       struct posix_acl *i_acl;
+       struct posix_acl *i_default_acl;
+
+       /* for keeping track of orphaned inodes */
+       struct list_head i_orphan;
+
+       struct list_head delalloc_inodes;
+
+       /* full 64 bit generation number */
+       u64 generation;
+
+       /*
+        * transid of the trans_handle that last modified this inode
+        */
+       u64 last_trans;
+       /*
+        * transid that last logged this inode
+        */
+       u64 logged_trans;
+
+       /* trans that last made a change that should be fully fsync'd */
+       u64 log_dirty_trans;
+       u64 delalloc_bytes;
+       u64 disk_i_size;
+       u32 flags;
+
+       /*
+        * if this is a directory then index_cnt is the counter for the index
+        * number for new files that are created
+        */
+       u64 index_cnt;
+};
+
+static inline struct btrfs_inode *BTRFS_I(struct inode *inode)
+{
+       return container_of(inode, struct btrfs_inode, vfs_inode);
+}
+
+static inline void btrfs_i_size_write(struct inode *inode, u64 size)
+{
+       inode->i_size = size;
+       BTRFS_I(inode)->disk_i_size = size;
+}
+
+
+#endif
diff --git a/fs/btrfs/compat.h b/fs/btrfs/compat.h
new file mode 100644 (file)
index 0000000..b0ed188
--- /dev/null
@@ -0,0 +1,60 @@
+#ifndef _COMPAT_H_
+#define _COMPAT_H_
+
+#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,26)
+#define trylock_page(page) (!TestSetPageLocked(page))
+#endif
+
+#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,27)
+static inline struct dentry *d_obtain_alias(struct inode *inode)
+{
+       struct dentry *d;
+
+       if (!inode)
+               return NULL;
+       if (IS_ERR(inode))
+               return ERR_CAST(inode);
+
+       d = d_alloc_anon(inode);
+       if (!d)
+               iput(inode);
+       return d;
+}
+#endif
+
+#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
+static inline void btrfs_drop_nlink(struct inode *inode)
+{
+       inode->i_nlink--;
+}
+
+static inline void btrfs_inc_nlink(struct inode *inode)
+{
+       inode->i_nlink++;
+}
+#else
+# define btrfs_drop_nlink(inode) drop_nlink(inode)
+# define btrfs_inc_nlink(inode)        inc_nlink(inode)
+#endif
+
+/*
+ * Even if AppArmor isn't enabled, it still has different prototypes.
+ * Add more distro/version pairs here to declare which has AppArmor applied.
+ */
+#if defined(CONFIG_SUSE_KERNEL)
+# if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,22)
+# define REMOVE_SUID_PATH 1
+# endif
+#endif
+
+/*
+ * catch any other distros that have patched in apparmor.  This isn't
+ * 100% reliable because it won't catch people that hand compile their
+ * own distro kernels without apparmor compiled in.  But, it is better
+ * than nothing.
+ */
+#ifdef CONFIG_SECURITY_APPARMOR
+# define REMOVE_SUID_PATH 1
+#endif
+
+#endif /* _COMPAT_H_ */
diff --git a/fs/btrfs/crc32c.h b/fs/btrfs/crc32c.h
new file mode 100644 (file)
index 0000000..bf6c12e
--- /dev/null
@@ -0,0 +1,108 @@
+#ifndef __BTRFS_CRC32C__
+#define __BTRFS_CRC32C__
+#include <asm/byteorder.h>
+#include <linux/crc32c.h>
+#include <linux/version.h>
+
+/* #define CONFIG_BTRFS_HW_SUM 1 */
+
+#ifdef CONFIG_BTRFS_HW_SUM
+#ifdef CONFIG_X86
+/*
+ * Using hardware provided CRC32 instruction to accelerate the CRC32 disposal.
+ * CRC32C polynomial:0x1EDC6F41(BE)/0x82F63B78(LE)
+ * CRC32 is a new instruction in Intel SSE4.2, the reference can be found at:
+ * http://www.intel.com/products/processor/manuals/
+ * Intel(R) 64 and IA-32 Architectures Software Developer's Manual
+ * Volume 2A: Instruction Set Reference, A-M
+ */
+
+#include <asm/cpufeature.h>
+#include <asm/processor.h>
+
+#define X86_FEATURE_XMM4_2     (4*32+20) /* Streaming SIMD Extensions-4.2 */
+#define cpu_has_xmm4_2         boot_cpu_has(X86_FEATURE_XMM4_2)
+
+#ifdef CONFIG_X86_64
+#define REX_PRE        "0x48, "
+#define SCALE_F        8
+#else
+#define REX_PRE
+#define SCALE_F        4
+#endif
+
+static inline u32 btrfs_crc32c_le_hw_byte(u32 crc, unsigned char const *data,
+                                  size_t length)
+{
+       while (length--) {
+               __asm__ __volatile__(
+                       ".byte 0xf2, 0xf, 0x38, 0xf0, 0xf1"
+                       :"=S"(crc)
+                       :"0"(crc), "c"(*data)
+               );
+               data++;
+       }
+
+       return crc;
+}
+
+static inline u32 __pure btrfs_crc32c_le_hw(u32 crc, unsigned char const *p,
+                                    size_t len)
+{
+       unsigned int iquotient = len / SCALE_F;
+       unsigned int iremainder = len % SCALE_F;
+#ifdef CONFIG_X86_64
+       u64 *ptmp = (u64 *)p;
+#else
+       u32 *ptmp = (u32 *)p;
+#endif
+
+       while (iquotient--) {
+               __asm__ __volatile__(
+                       ".byte 0xf2, " REX_PRE "0xf, 0x38, 0xf1, 0xf1;"
+                       :"=S"(crc)
+                       :"0"(crc), "c"(*ptmp)
+               );
+               ptmp++;
+       }
+
+       if (iremainder)
+               crc = btrfs_crc32c_le_hw_byte(crc, (unsigned char *)ptmp,
+                                             iremainder);
+
+       return crc;
+}
+#endif /* CONFIG_BTRFS_HW_SUM */
+
+static inline u32 __btrfs_crc32c(u32 crc, unsigned char const *address,
+                                size_t len)
+{
+#ifdef CONFIG_BTRFS_HW_SUM
+       if (cpu_has_xmm4_2)
+               return btrfs_crc32c_le_hw(crc, address, len);
+#endif
+       return crc32c_le(crc, address, len);
+}
+
+#else
+
+#define __btrfs_crc32c(seed, data, length) crc32c(seed, data, length)
+
+#endif /* CONFIG_X86 */
+
+/**
+ * implementation of crc32c_le() changed in linux-2.6.23,
+ * has of v0.13 btrfs-progs is using the latest version.
+ * We must workaround older implementations of crc32c_le()
+ * found on older kernel versions.
+ */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23)
+#define btrfs_crc32c(seed, data, length) \
+       __cpu_to_le32( __btrfs_crc32c( __le32_to_cpu(seed), \
+                                     (unsigned char const *)data, length) )
+#else
+#define btrfs_crc32c(seed, data, length) \
+       __btrfs_crc32c(seed, (unsigned char const *)data, length)
+#endif
+#endif
+
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
new file mode 100644 (file)
index 0000000..50aea8c
--- /dev/null
@@ -0,0 +1,3450 @@
+/*
+ * Copyright (C) 2007 Oracle.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include <linux/sched.h>
+#include "ctree.h"
+#include "disk-io.h"
+#include "transaction.h"
+#include "print-tree.h"
+#include "locking.h"
+
+static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
+                     *root, struct btrfs_path *path, int level);
+static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root
+                     *root, struct btrfs_key *ins_key,
+                     struct btrfs_path *path, int data_size, int extend);
+static int push_node_left(struct btrfs_trans_handle *trans,
+                         struct btrfs_root *root, struct extent_buffer *dst,
+                         struct extent_buffer *src, int empty);
+static int balance_node_right(struct btrfs_trans_handle *trans,
+                             struct btrfs_root *root,
+                             struct extent_buffer *dst_buf,
+                             struct extent_buffer *src_buf);
+static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
+                  struct btrfs_path *path, int level, int slot);
+
+inline void btrfs_init_path(struct btrfs_path *p)
+{
+       memset(p, 0, sizeof(*p));
+}
+
+struct btrfs_path *btrfs_alloc_path(void)
+{
+       struct btrfs_path *path;
+       path = kmem_cache_alloc(btrfs_path_cachep, GFP_NOFS);
+       if (path) {
+               btrfs_init_path(path);
+               path->reada = 1;
+       }
+       return path;
+}
+
+void btrfs_free_path(struct btrfs_path *p)
+{
+       btrfs_release_path(NULL, p);
+       kmem_cache_free(btrfs_path_cachep, p);
+}
+
+void noinline btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p)
+{
+       int i;
+
+       for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
+               p->slots[i] = 0;
+               if (!p->nodes[i])
+                       continue;
+               if (p->locks[i]) {
+                       btrfs_tree_unlock(p->nodes[i]);
+                       p->locks[i] = 0;
+               }
+               free_extent_buffer(p->nodes[i]);
+               p->nodes[i] = NULL;
+       }
+}
+
+struct extent_buffer *btrfs_root_node(struct btrfs_root *root)
+{
+       struct extent_buffer *eb;
+       spin_lock(&root->node_lock);
+       eb = root->node;
+       extent_buffer_get(eb);
+       spin_unlock(&root->node_lock);
+       return eb;
+}
+
+struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root)
+{
+       struct extent_buffer *eb;
+
+       while(1) {
+               eb = btrfs_root_node(root);
+               btrfs_tree_lock(eb);
+
+               spin_lock(&root->node_lock);
+               if (eb == root->node) {
+                       spin_unlock(&root->node_lock);
+                       break;
+               }
+               spin_unlock(&root->node_lock);
+
+               btrfs_tree_unlock(eb);
+               free_extent_buffer(eb);
+       }
+       return eb;
+}
+
+static void add_root_to_dirty_list(struct btrfs_root *root)
+{
+       if (root->track_dirty && list_empty(&root->dirty_list)) {
+               list_add(&root->dirty_list,
+                        &root->fs_info->dirty_cowonly_roots);
+       }
+}
+
+int btrfs_copy_root(struct btrfs_trans_handle *trans,
+                     struct btrfs_root *root,
+                     struct extent_buffer *buf,
+                     struct extent_buffer **cow_ret, u64 new_root_objectid)
+{
+       struct extent_buffer *cow;
+       u32 nritems;
+       int ret = 0;
+       int level;
+       struct btrfs_root *new_root;
+
+       new_root = kmalloc(sizeof(*new_root), GFP_NOFS);
+       if (!new_root)
+               return -ENOMEM;
+
+       memcpy(new_root, root, sizeof(*new_root));
+       new_root->root_key.objectid = new_root_objectid;
+
+       WARN_ON(root->ref_cows && trans->transid !=
+               root->fs_info->running_transaction->transid);
+       WARN_ON(root->ref_cows && trans->transid != root->last_trans);
+
+       level = btrfs_header_level(buf);
+       nritems = btrfs_header_nritems(buf);
+
+       cow = btrfs_alloc_free_block(trans, new_root, buf->len, 0,
+                                    new_root_objectid, trans->transid,
+                                    level, buf->start, 0);
+       if (IS_ERR(cow)) {
+               kfree(new_root);
+               return PTR_ERR(cow);
+       }
+
+       copy_extent_buffer(cow, buf, 0, 0, cow->len);
+       btrfs_set_header_bytenr(cow, cow->start);
+       btrfs_set_header_generation(cow, trans->transid);
+       btrfs_set_header_owner(cow, new_root_objectid);
+       btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN);
+
+       WARN_ON(btrfs_header_generation(buf) > trans->transid);
+       ret = btrfs_inc_ref(trans, new_root, buf, cow, NULL);
+       kfree(new_root);
+
+       if (ret)
+               return ret;
+
+       btrfs_mark_buffer_dirty(cow);
+       *cow_ret = cow;
+       return 0;
+}
+
+int noinline __btrfs_cow_block(struct btrfs_trans_handle *trans,
+                            struct btrfs_root *root,
+                            struct extent_buffer *buf,
+                            struct extent_buffer *parent, int parent_slot,
+                            struct extent_buffer **cow_ret,
+                            u64 search_start, u64 empty_size,
+                            u64 prealloc_dest)
+{
+       u64 parent_start;
+       struct extent_buffer *cow;
+       u32 nritems;
+       int ret = 0;
+       int different_trans = 0;
+       int level;
+       int unlock_orig = 0;
+
+       if (*cow_ret == buf)
+               unlock_orig = 1;
+
+       WARN_ON(!btrfs_tree_locked(buf));
+
+       if (parent)
+               parent_start = parent->start;
+       else
+               parent_start = 0;
+
+       WARN_ON(root->ref_cows && trans->transid !=
+               root->fs_info->running_transaction->transid);
+       WARN_ON(root->ref_cows && trans->transid != root->last_trans);
+
+       level = btrfs_header_level(buf);
+       nritems = btrfs_header_nritems(buf);
+
+       if (prealloc_dest) {
+               struct btrfs_key ins;
+
+               ins.objectid = prealloc_dest;
+               ins.offset = buf->len;
+               ins.type = BTRFS_EXTENT_ITEM_KEY;
+
+               ret = btrfs_alloc_reserved_extent(trans, root, parent_start,
+                                                 root->root_key.objectid,
+                                                 trans->transid, level, 0,
+                                                 &ins);
+               BUG_ON(ret);
+               cow = btrfs_init_new_buffer(trans, root, prealloc_dest,
+                                           buf->len);
+       } else {
+               cow = btrfs_alloc_free_block(trans, root, buf->len,
+                                            parent_start,
+                                            root->root_key.objectid,
+                                            trans->transid, level,
+                                            search_start, empty_size);
+       }
+       if (IS_ERR(cow))
+               return PTR_ERR(cow);
+
+       copy_extent_buffer(cow, buf, 0, 0, cow->len);
+       btrfs_set_header_bytenr(cow, cow->start);
+       btrfs_set_header_generation(cow, trans->transid);
+       btrfs_set_header_owner(cow, root->root_key.objectid);
+       btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN);
+
+       WARN_ON(btrfs_header_generation(buf) > trans->transid);
+       if (btrfs_header_generation(buf) != trans->transid) {
+               u32 nr_extents;
+               different_trans = 1;
+               ret = btrfs_inc_ref(trans, root, buf, cow, &nr_extents);
+               if (ret)
+                       return ret;
+
+               ret = btrfs_cache_ref(trans, root, buf, nr_extents);
+               WARN_ON(ret);
+       } else {
+               ret = btrfs_update_ref(trans, root, buf, cow, 0, nritems);
+               if (ret)
+                       return ret;
+               clean_tree_block(trans, root, buf);
+       }
+
+       if (buf == root->node) {
+               WARN_ON(parent && parent != buf);
+
+               spin_lock(&root->node_lock);
+               root->node = cow;
+               extent_buffer_get(cow);
+               spin_unlock(&root->node_lock);
+
+               if (buf != root->commit_root) {
+                       btrfs_free_extent(trans, root, buf->start,
+                                         buf->len, buf->start,
+                                         root->root_key.objectid,
+                                         btrfs_header_generation(buf),
+                                         0, 0, 1);
+               }
+               free_extent_buffer(buf);
+               add_root_to_dirty_list(root);
+       } else {
+               btrfs_set_node_blockptr(parent, parent_slot,
+                                       cow->start);
+               WARN_ON(trans->transid == 0);
+               btrfs_set_node_ptr_generation(parent, parent_slot,
+                                             trans->transid);
+               btrfs_mark_buffer_dirty(parent);
+               WARN_ON(btrfs_header_generation(parent) != trans->transid);
+               btrfs_free_extent(trans, root, buf->start, buf->len,
+                                 parent_start, btrfs_header_owner(parent),
+                                 btrfs_header_generation(parent), 0, 0, 1);
+       }
+       if (unlock_orig)
+               btrfs_tree_unlock(buf);
+       free_extent_buffer(buf);
+       btrfs_mark_buffer_dirty(cow);
+       *cow_ret = cow;
+       return 0;
+}
+
+int noinline btrfs_cow_block(struct btrfs_trans_handle *trans,
+                   struct btrfs_root *root, struct extent_buffer *buf,
+                   struct extent_buffer *parent, int parent_slot,
+                   struct extent_buffer **cow_ret, u64 prealloc_dest)
+{
+       u64 search_start;
+       u64 header_trans;
+       int ret;
+
+       if (trans->transaction != root->fs_info->running_transaction) {
+               printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
+                      root->fs_info->running_transaction->transid);
+               WARN_ON(1);
+       }
+       if (trans->transid != root->fs_info->generation) {
+               printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
+                      root->fs_info->generation);
+               WARN_ON(1);
+       }
+
+       header_trans = btrfs_header_generation(buf);
+       spin_lock(&root->fs_info->hash_lock);
+       if (header_trans == trans->transid &&
+           !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
+               *cow_ret = buf;
+               spin_unlock(&root->fs_info->hash_lock);
+               WARN_ON(prealloc_dest);
+               return 0;
+       }
+       spin_unlock(&root->fs_info->hash_lock);
+       search_start = buf->start & ~((u64)(1024 * 1024 * 1024) - 1);
+       ret = __btrfs_cow_block(trans, root, buf, parent,
+                                parent_slot, cow_ret, search_start, 0,
+                                prealloc_dest);
+       return ret;
+}
+
+static int close_blocks(u64 blocknr, u64 other, u32 blocksize)
+{
+       if (blocknr < other && other - (blocknr + blocksize) < 32768)
+               return 1;
+       if (blocknr > other && blocknr - (other + blocksize) < 32768)
+               return 1;
+       return 0;
+}
+
+/*
+ * compare two keys in a memcmp fashion
+ */
+static int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2)
+{
+       struct btrfs_key k1;
+
+       btrfs_disk_key_to_cpu(&k1, disk);
+
+       if (k1.objectid > k2->objectid)
+               return 1;
+       if (k1.objectid < k2->objectid)
+               return -1;
+       if (k1.type > k2->type)
+               return 1;
+       if (k1.type < k2->type)
+               return -1;
+       if (k1.offset > k2->offset)
+               return 1;
+       if (k1.offset < k2->offset)
+               return -1;
+       return 0;
+}
+
+
+int btrfs_realloc_node(struct btrfs_trans_handle *trans,
+                      struct btrfs_root *root, struct extent_buffer *parent,
+                      int start_slot, int cache_only, u64 *last_ret,
+                      struct btrfs_key *progress)
+{
+       struct extent_buffer *cur;
+       u64 blocknr;
+       u64 gen;
+       u64 search_start = *last_ret;
+       u64 last_block = 0;
+       u64 other;
+       u32 parent_nritems;
+       int end_slot;
+       int i;
+       int err = 0;
+       int parent_level;
+       int uptodate;
+       u32 blocksize;
+       int progress_passed = 0;
+       struct btrfs_disk_key disk_key;
+
+       parent_level = btrfs_header_level(parent);
+       if (cache_only && parent_level != 1)
+               return 0;
+
+       if (trans->transaction != root->fs_info->running_transaction) {
+               printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
+                      root->fs_info->running_transaction->transid);
+               WARN_ON(1);
+       }
+       if (trans->transid != root->fs_info->generation) {
+               printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
+                      root->fs_info->generation);
+               WARN_ON(1);
+       }
+
+       parent_nritems = btrfs_header_nritems(parent);
+       blocksize = btrfs_level_size(root, parent_level - 1);
+       end_slot = parent_nritems;
+
+       if (parent_nritems == 1)
+               return 0;
+
+       for (i = start_slot; i < end_slot; i++) {
+               int close = 1;
+
+               if (!parent->map_token) {
+                       map_extent_buffer(parent,
+                                       btrfs_node_key_ptr_offset(i),
+                                       sizeof(struct btrfs_key_ptr),
+                                       &parent->map_token, &parent->kaddr,
+                                       &parent->map_start, &parent->map_len,
+                                       KM_USER1);
+               }
+               btrfs_node_key(parent, &disk_key, i);
+               if (!progress_passed && comp_keys(&disk_key, progress) < 0)
+                       continue;
+
+               progress_passed = 1;
+               blocknr = btrfs_node_blockptr(parent, i);
+               gen = btrfs_node_ptr_generation(parent, i);
+               if (last_block == 0)
+                       last_block = blocknr;
+
+               if (i > 0) {
+                       other = btrfs_node_blockptr(parent, i - 1);
+                       close = close_blocks(blocknr, other, blocksize);
+               }
+               if (!close && i < end_slot - 2) {
+                       other = btrfs_node_blockptr(parent, i + 1);
+                       close = close_blocks(blocknr, other, blocksize);
+               }
+               if (close) {
+                       last_block = blocknr;
+                       continue;
+               }
+               if (parent->map_token) {
+                       unmap_extent_buffer(parent, parent->map_token,
+                                           KM_USER1);
+                       parent->map_token = NULL;
+               }
+
+               cur = btrfs_find_tree_block(root, blocknr, blocksize);
+               if (cur)
+                       uptodate = btrfs_buffer_uptodate(cur, gen);
+               else
+                       uptodate = 0;
+               if (!cur || !uptodate) {
+                       if (cache_only) {
+                               free_extent_buffer(cur);
+                               continue;
+                       }
+                       if (!cur) {
+                               cur = read_tree_block(root, blocknr,
+                                                        blocksize, gen);
+                       } else if (!uptodate) {
+                               btrfs_read_buffer(cur, gen);
+                       }
+               }
+               if (search_start == 0)
+                       search_start = last_block;
+
+               btrfs_tree_lock(cur);
+               err = __btrfs_cow_block(trans, root, cur, parent, i,
+                                       &cur, search_start,
+                                       min(16 * blocksize,
+                                           (end_slot - i) * blocksize), 0);
+               if (err) {
+                       btrfs_tree_unlock(cur);
+                       free_extent_buffer(cur);
+                       break;
+               }
+               search_start = cur->start;
+               last_block = cur->start;
+               *last_ret = search_start;
+               btrfs_tree_unlock(cur);
+               free_extent_buffer(cur);
+       }
+       if (parent->map_token) {
+               unmap_extent_buffer(parent, parent->map_token,
+                                   KM_USER1);
+               parent->map_token = NULL;
+       }
+       return err;
+}
+
+/*
+ * The leaf data grows from end-to-front in the node.
+ * this returns the address of the start of the last item,
+ * which is the stop of the leaf data stack
+ */
+static inline unsigned int leaf_data_end(struct btrfs_root *root,
+                                        struct extent_buffer *leaf)
+{
+       u32 nr = btrfs_header_nritems(leaf);
+       if (nr == 0)
+               return BTRFS_LEAF_DATA_SIZE(root);
+       return btrfs_item_offset_nr(leaf, nr - 1);
+}
+
+static int check_node(struct btrfs_root *root, struct btrfs_path *path,
+                     int level)
+{
+       struct extent_buffer *parent = NULL;
+       struct extent_buffer *node = path->nodes[level];
+       struct btrfs_disk_key parent_key;
+       struct btrfs_disk_key node_key;
+       int parent_slot;
+       int slot;
+       struct btrfs_key cpukey;
+       u32 nritems = btrfs_header_nritems(node);
+
+       if (path->nodes[level + 1])
+               parent = path->nodes[level + 1];
+
+       slot = path->slots[level];
+       BUG_ON(nritems == 0);
+       if (parent) {
+               parent_slot = path->slots[level + 1];
+               btrfs_node_key(parent, &parent_key, parent_slot);
+               btrfs_node_key(node, &node_key, 0);
+               BUG_ON(memcmp(&parent_key, &node_key,
+                             sizeof(struct btrfs_disk_key)));
+               BUG_ON(btrfs_node_blockptr(parent, parent_slot) !=
+                      btrfs_header_bytenr(node));
+       }
+       BUG_ON(nritems > BTRFS_NODEPTRS_PER_BLOCK(root));
+       if (slot != 0) {
+               btrfs_node_key_to_cpu(node, &cpukey, slot - 1);
+               btrfs_node_key(node, &node_key, slot);
+               BUG_ON(comp_keys(&node_key, &cpukey) <= 0);
+       }
+       if (slot < nritems - 1) {
+               btrfs_node_key_to_cpu(node, &cpukey, slot + 1);
+               btrfs_node_key(node, &node_key, slot);
+               BUG_ON(comp_keys(&node_key, &cpukey) >= 0);
+       }
+       return 0;
+}
+
+static int check_leaf(struct btrfs_root *root, struct btrfs_path *path,
+                     int level)
+{
+       struct extent_buffer *leaf = path->nodes[level];
+       struct extent_buffer *parent = NULL;
+       int parent_slot;
+       struct btrfs_key cpukey;
+       struct btrfs_disk_key parent_key;
+       struct btrfs_disk_key leaf_key;
+       int slot = path->slots[0];
+
+       u32 nritems = btrfs_header_nritems(leaf);
+
+       if (path->nodes[level + 1])
+               parent = path->nodes[level + 1];
+
+       if (nritems == 0)
+               return 0;
+
+       if (parent) {
+               parent_slot = path->slots[level + 1];
+               btrfs_node_key(parent, &parent_key, parent_slot);
+               btrfs_item_key(leaf, &leaf_key, 0);
+
+               BUG_ON(memcmp(&parent_key, &leaf_key,
+                      sizeof(struct btrfs_disk_key)));
+               BUG_ON(btrfs_node_blockptr(parent, parent_slot) !=
+                      btrfs_header_bytenr(leaf));
+       }
+#if 0
+       for (i = 0; nritems > 1 && i < nritems - 2; i++) {
+               btrfs_item_key_to_cpu(leaf, &cpukey, i + 1);
+               btrfs_item_key(leaf, &leaf_key, i);
+               if (comp_keys(&leaf_key, &cpukey) >= 0) {
+                       btrfs_print_leaf(root, leaf);
+                       printk("slot %d offset bad key\n", i);
+                       BUG_ON(1);
+               }
+               if (btrfs_item_offset_nr(leaf, i) !=
+                       btrfs_item_end_nr(leaf, i + 1)) {
+                       btrfs_print_leaf(root, leaf);
+                       printk("slot %d offset bad\n", i);
+                       BUG_ON(1);
+               }
+               if (i == 0) {
+                       if (btrfs_item_offset_nr(leaf, i) +
+                              btrfs_item_size_nr(leaf, i) !=
+                              BTRFS_LEAF_DATA_SIZE(root)) {
+                               btrfs_print_leaf(root, leaf);
+                               printk("slot %d first offset bad\n", i);
+                               BUG_ON(1);
+                       }
+               }
+       }
+       if (nritems > 0) {
+               if (btrfs_item_size_nr(leaf, nritems - 1) > 4096) {
+                               btrfs_print_leaf(root, leaf);
+                               printk("slot %d bad size \n", nritems - 1);
+                               BUG_ON(1);
+               }
+       }
+#endif
+       if (slot != 0 && slot < nritems - 1) {
+               btrfs_item_key(leaf, &leaf_key, slot);
+               btrfs_item_key_to_cpu(leaf, &cpukey, slot - 1);
+               if (comp_keys(&leaf_key, &cpukey) <= 0) {
+                       btrfs_print_leaf(root, leaf);
+                       printk("slot %d offset bad key\n", slot);
+                       BUG_ON(1);
+               }
+               if (btrfs_item_offset_nr(leaf, slot - 1) !=
+                      btrfs_item_end_nr(leaf, slot)) {
+                       btrfs_print_leaf(root, leaf);
+                       printk("slot %d offset bad\n", slot);
+                       BUG_ON(1);
+               }
+       }
+       if (slot < nritems - 1) {
+               btrfs_item_key(leaf, &leaf_key, slot);
+               btrfs_item_key_to_cpu(leaf, &cpukey, slot + 1);
+               BUG_ON(comp_keys(&leaf_key, &cpukey) >= 0);
+               if (btrfs_item_offset_nr(leaf, slot) !=
+                       btrfs_item_end_nr(leaf, slot + 1)) {
+                       btrfs_print_leaf(root, leaf);
+                       printk("slot %d offset bad\n", slot);
+                       BUG_ON(1);
+               }
+       }
+       BUG_ON(btrfs_item_offset_nr(leaf, 0) +
+              btrfs_item_size_nr(leaf, 0) != BTRFS_LEAF_DATA_SIZE(root));
+       return 0;
+}
+
+static int noinline check_block(struct btrfs_root *root,
+                               struct btrfs_path *path, int level)
+{
+       u64 found_start;
+       return 0;
+       if (btrfs_header_level(path->nodes[level]) != level)
+           printk("warning: bad level %Lu wanted %d found %d\n",
+                  path->nodes[level]->start, level,
+                  btrfs_header_level(path->nodes[level]));
+       found_start = btrfs_header_bytenr(path->nodes[level]);
+       if (found_start != path->nodes[level]->start) {
+           printk("warning: bad bytentr %Lu found %Lu\n",
+                  path->nodes[level]->start, found_start);
+       }
+#if 0
+       struct extent_buffer *buf = path->nodes[level];
+
+       if (memcmp_extent_buffer(buf, root->fs_info->fsid,
+                                (unsigned long)btrfs_header_fsid(buf),
+                                BTRFS_FSID_SIZE)) {
+               printk("warning bad block %Lu\n", buf->start);
+               return 1;
+       }
+#endif
+       if (level == 0)
+               return check_leaf(root, path, level);
+       return check_node(root, path, level);
+}
+
+/*
+ * search for key in the extent_buffer.  The items start at offset p,
+ * and they are item_size apart.  There are 'max' items in p.
+ *
+ * the slot in the array is returned via slot, and it points to
+ * the place where you would insert key if it is not found in
+ * the array.
+ *
+ * slot may point to max if the key is bigger than all of the keys
+ */
+static noinline int generic_bin_search(struct extent_buffer *eb,
+                                      unsigned long p,
+                                      int item_size, struct btrfs_key *key,
+                                      int max, int *slot)
+{
+       int low = 0;
+       int high = max;
+       int mid;
+       int ret;
+       struct btrfs_disk_key *tmp = NULL;
+       struct btrfs_disk_key unaligned;
+       unsigned long offset;
+       char *map_token = NULL;
+       char *kaddr = NULL;
+       unsigned long map_start = 0;
+       unsigned long map_len = 0;
+       int err;
+
+       while(low < high) {
+               mid = (low + high) / 2;
+               offset = p + mid * item_size;
+
+               if (!map_token || offset < map_start ||
+                   (offset + sizeof(struct btrfs_disk_key)) >
+                   map_start + map_len) {
+                       if (map_token) {
+                               unmap_extent_buffer(eb, map_token, KM_USER0);
+                               map_token = NULL;
+                       }
+                       err = map_extent_buffer(eb, offset,
+                                               sizeof(struct btrfs_disk_key),
+                                               &map_token, &kaddr,
+                                               &map_start, &map_len, KM_USER0);
+
+                       if (!err) {
+                               tmp = (struct btrfs_disk_key *)(kaddr + offset -
+                                                       map_start);
+                       } else {
+                               read_extent_buffer(eb, &unaligned,
+                                                  offset, sizeof(unaligned));
+                               tmp = &unaligned;
+                       }
+
+               } else {
+                       tmp = (struct btrfs_disk_key *)(kaddr + offset -
+                                                       map_start);
+               }
+               ret = comp_keys(tmp, key);
+
+               if (ret < 0)
+                       low = mid + 1;
+               else if (ret > 0)
+                       high = mid;
+               else {
+                       *slot = mid;
+                       if (map_token)
+                               unmap_extent_buffer(eb, map_token, KM_USER0);
+                       return 0;
+               }
+       }
+       *slot = low;
+       if (map_token)
+               unmap_extent_buffer(eb, map_token, KM_USER0);
+       return 1;
+}
+
+/*
+ * simple bin_search frontend that does the right thing for
+ * leaves vs nodes
+ */
+static int bin_search(struct extent_buffer *eb, struct btrfs_key *key,
+                     int level, int *slot)
+{
+       if (level == 0) {
+               return generic_bin_search(eb,
+                                         offsetof(struct btrfs_leaf, items),
+                                         sizeof(struct btrfs_item),
+                                         key, btrfs_header_nritems(eb),
+                                         slot);
+       } else {
+               return generic_bin_search(eb,
+                                         offsetof(struct btrfs_node, ptrs),
+                                         sizeof(struct btrfs_key_ptr),
+                                         key, btrfs_header_nritems(eb),
+                                         slot);
+       }
+       return -1;
+}
+
+static noinline struct extent_buffer *read_node_slot(struct btrfs_root *root,
+                                  struct extent_buffer *parent, int slot)
+{
+       int level = btrfs_header_level(parent);
+       if (slot < 0)
+               return NULL;
+       if (slot >= btrfs_header_nritems(parent))
+               return NULL;
+
+       BUG_ON(level == 0);
+
+       return read_tree_block(root, btrfs_node_blockptr(parent, slot),
+                      btrfs_level_size(root, level - 1),
+                      btrfs_node_ptr_generation(parent, slot));
+}
+
+static noinline int balance_level(struct btrfs_trans_handle *trans,
+                        struct btrfs_root *root,
+                        struct btrfs_path *path, int level)
+{
+       struct extent_buffer *right = NULL;
+       struct extent_buffer *mid;
+       struct extent_buffer *left = NULL;
+       struct extent_buffer *parent = NULL;
+       int ret = 0;
+       int wret;
+       int pslot;
+       int orig_slot = path->slots[level];
+       int err_on_enospc = 0;
+       u64 orig_ptr;
+
+       if (level == 0)
+               return 0;
+
+       mid = path->nodes[level];
+       WARN_ON(!path->locks[level]);
+       WARN_ON(btrfs_header_generation(mid) != trans->transid);
+
+       orig_ptr = btrfs_node_blockptr(mid, orig_slot);
+
+       if (level < BTRFS_MAX_LEVEL - 1)
+               parent = path->nodes[level + 1];
+       pslot = path->slots[level + 1];
+
+       /*
+        * deal with the case where there is only one pointer in the root
+        * by promoting the node below to a root
+        */
+       if (!parent) {
+               struct extent_buffer *child;
+
+               if (btrfs_header_nritems(mid) != 1)
+                       return 0;
+
+               /* promote the child to a root */
+               child = read_node_slot(root, mid, 0);
+               btrfs_tree_lock(child);
+               BUG_ON(!child);
+               ret = btrfs_cow_block(trans, root, child, mid, 0, &child, 0);
+               BUG_ON(ret);
+
+               spin_lock(&root->node_lock);
+               root->node = child;
+               spin_unlock(&root->node_lock);
+
+               ret = btrfs_update_extent_ref(trans, root, child->start,
+                                             mid->start, child->start,
+                                             root->root_key.objectid,
+                                             trans->transid, level - 1, 0);
+               BUG_ON(ret);
+
+               add_root_to_dirty_list(root);
+               btrfs_tree_unlock(child);
+               path->locks[level] = 0;
+               path->nodes[level] = NULL;
+               clean_tree_block(trans, root, mid);
+               btrfs_tree_unlock(mid);
+               /* once for the path */
+               free_extent_buffer(mid);
+               ret = btrfs_free_extent(trans, root, mid->start, mid->len,
+                                       mid->start, root->root_key.objectid,
+                                       btrfs_header_generation(mid), 0, 0, 1);
+               /* once for the root ptr */
+               free_extent_buffer(mid);
+               return ret;
+       }
+       if (btrfs_header_nritems(mid) >
+           BTRFS_NODEPTRS_PER_BLOCK(root) / 4)
+               return 0;
+
+       if (btrfs_header_nritems(mid) < 2)
+               err_on_enospc = 1;
+
+       left = read_node_slot(root, parent, pslot - 1);
+       if (left) {
+               btrfs_tree_lock(left);
+               wret = btrfs_cow_block(trans, root, left,
+                                      parent, pslot - 1, &left, 0);
+               if (wret) {
+                       ret = wret;
+                       goto enospc;
+               }
+       }
+       right = read_node_slot(root, parent, pslot + 1);
+       if (right) {
+               btrfs_tree_lock(right);
+               wret = btrfs_cow_block(trans, root, right,
+                                      parent, pslot + 1, &right, 0);
+               if (wret) {
+                       ret = wret;
+                       goto enospc;
+               }
+       }
+
+       /* first, try to make some room in the middle buffer */
+       if (left) {
+               orig_slot += btrfs_header_nritems(left);
+               wret = push_node_left(trans, root, left, mid, 1);
+               if (wret < 0)
+                       ret = wret;
+               if (btrfs_header_nritems(mid) < 2)
+                       err_on_enospc = 1;
+       }
+
+       /*
+        * then try to empty the right most buffer into the middle
+        */
+       if (right) {
+               wret = push_node_left(trans, root, mid, right, 1);
+               if (wret < 0 && wret != -ENOSPC)
+                       ret = wret;
+               if (btrfs_header_nritems(right) == 0) {
+                       u64 bytenr = right->start;
+                       u64 generation = btrfs_header_generation(parent);
+                       u32 blocksize = right->len;
+
+                       clean_tree_block(trans, root, right);
+                       btrfs_tree_unlock(right);
+                       free_extent_buffer(right);
+                       right = NULL;
+                       wret = del_ptr(trans, root, path, level + 1, pslot +
+                                      1);
+                       if (wret)
+                               ret = wret;
+                       wret = btrfs_free_extent(trans, root, bytenr,
+                                                blocksize, parent->start,
+                                                btrfs_header_owner(parent),
+                                                generation, 0, 0, 1);
+                       if (wret)
+                               ret = wret;
+               } else {
+                       struct btrfs_disk_key right_key;
+                       btrfs_node_key(right, &right_key, 0);
+                       btrfs_set_node_key(parent, &right_key, pslot + 1);
+                       btrfs_mark_buffer_dirty(parent);
+               }
+       }
+       if (btrfs_header_nritems(mid) == 1) {
+               /*
+                * we're not allowed to leave a node with one item in the
+                * tree during a delete.  A deletion from lower in the tree
+                * could try to delete the only pointer in this node.
+                * So, pull some keys from the left.
+                * There has to be a left pointer at this point because
+                * otherwise we would have pulled some pointers from the
+                * right
+                */
+               BUG_ON(!left);
+               wret = balance_node_right(trans, root, mid, left);
+               if (wret < 0) {
+                       ret = wret;
+                       goto enospc;
+               }
+               if (wret == 1) {
+                       wret = push_node_left(trans, root, left, mid, 1);
+                       if (wret < 0)
+                               ret = wret;
+               }
+               BUG_ON(wret == 1);
+       }
+       if (btrfs_header_nritems(mid) == 0) {
+               /* we've managed to empty the middle node, drop it */
+               u64 root_gen = btrfs_header_generation(parent);
+               u64 bytenr = mid->start;
+               u32 blocksize = mid->len;
+
+               clean_tree_block(trans, root, mid);
+               btrfs_tree_unlock(mid);
+               free_extent_buffer(mid);
+               mid = NULL;
+               wret = del_ptr(trans, root, path, level + 1, pslot);
+               if (wret)
+                       ret = wret;
+               wret = btrfs_free_extent(trans, root, bytenr, blocksize,
+                                        parent->start,
+                                        btrfs_header_owner(parent),
+                                        root_gen, 0, 0, 1);
+               if (wret)
+                       ret = wret;
+       } else {
+               /* update the parent key to reflect our changes */
+               struct btrfs_disk_key mid_key;
+               btrfs_node_key(mid, &mid_key, 0);
+               btrfs_set_node_key(parent, &mid_key, pslot);
+               btrfs_mark_buffer_dirty(parent);
+       }
+
+       /* update the path */
+       if (left) {
+               if (btrfs_header_nritems(left) > orig_slot) {
+                       extent_buffer_get(left);
+                       /* left was locked after cow */
+                       path->nodes[level] = left;
+                       path->slots[level + 1] -= 1;
+                       path->slots[level] = orig_slot;
+                       if (mid) {
+                               btrfs_tree_unlock(mid);
+                               free_extent_buffer(mid);
+                       }
+               } else {
+                       orig_slot -= btrfs_header_nritems(left);
+                       path->slots[level] = orig_slot;
+               }
+       }
+       /* double check we haven't messed things up */
+       check_block(root, path, level);
+       if (orig_ptr !=
+           btrfs_node_blockptr(path->nodes[level], path->slots[level]))
+               BUG();
+enospc:
+       if (right) {
+               btrfs_tree_unlock(right);
+               free_extent_buffer(right);
+       }
+       if (left) {
+               if (path->nodes[level] != left)
+                       btrfs_tree_unlock(left);
+               free_extent_buffer(left);
+       }
+       return ret;
+}
+
+/* returns zero if the push worked, non-zero otherwise */
+static int noinline push_nodes_for_insert(struct btrfs_trans_handle *trans,
+                                         struct btrfs_root *root,
+                                         struct btrfs_path *path, int level)
+{
+       struct extent_buffer *right = NULL;
+       struct extent_buffer *mid;
+       struct extent_buffer *left = NULL;
+       struct extent_buffer *parent = NULL;
+       int ret = 0;
+       int wret;
+       int pslot;
+       int orig_slot = path->slots[level];
+       u64 orig_ptr;
+
+       if (level == 0)
+               return 1;
+
+       mid = path->nodes[level];
+       WARN_ON(btrfs_header_generation(mid) != trans->transid);
+       orig_ptr = btrfs_node_blockptr(mid, orig_slot);
+
+       if (level < BTRFS_MAX_LEVEL - 1)
+               parent = path->nodes[level + 1];
+       pslot = path->slots[level + 1];
+
+       if (!parent)
+               return 1;
+
+       left = read_node_slot(root, parent, pslot - 1);
+
+       /* first, try to make some room in the middle buffer */
+       if (left) {
+               u32 left_nr;
+
+               btrfs_tree_lock(left);
+               left_nr = btrfs_header_nritems(left);
+               if (left_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
+                       wret = 1;
+               } else {
+                       ret = btrfs_cow_block(trans, root, left, parent,
+                                             pslot - 1, &left, 0);
+                       if (ret)
+                               wret = 1;
+                       else {
+                               wret = push_node_left(trans, root,
+                                                     left, mid, 0);
+                       }
+               }
+               if (wret < 0)
+                       ret = wret;
+               if (wret == 0) {
+                       struct btrfs_disk_key disk_key;
+                       orig_slot += left_nr;
+                       btrfs_node_key(mid, &disk_key, 0);
+                       btrfs_set_node_key(parent, &disk_key, pslot);
+                       btrfs_mark_buffer_dirty(parent);
+                       if (btrfs_header_nritems(left) > orig_slot) {
+                               path->nodes[level] = left;
+                               path->slots[level + 1] -= 1;
+                               path->slots[level] = orig_slot;
+                               btrfs_tree_unlock(mid);
+                               free_extent_buffer(mid);
+                       } else {
+                               orig_slot -=
+                                       btrfs_header_nritems(left);
+                               path->slots[level] = orig_slot;
+                               btrfs_tree_unlock(left);
+                               free_extent_buffer(left);
+                       }
+                       return 0;
+               }
+               btrfs_tree_unlock(left);
+               free_extent_buffer(left);
+       }
+       right = read_node_slot(root, parent, pslot + 1);
+
+       /*
+        * then try to empty the right most buffer into the middle
+        */
+       if (right) {
+               u32 right_nr;
+               btrfs_tree_lock(right);
+               right_nr = btrfs_header_nritems(right);
+               if (right_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
+                       wret = 1;
+               } else {
+                       ret = btrfs_cow_block(trans, root, right,
+                                             parent, pslot + 1,
+                                             &right, 0);
+                       if (ret)
+                               wret = 1;
+                       else {
+                               wret = balance_node_right(trans, root,
+                                                         right, mid);
+                       }
+               }
+               if (wret < 0)
+                       ret = wret;
+               if (wret == 0) {
+                       struct btrfs_disk_key disk_key;
+
+                       btrfs_node_key(right, &disk_key, 0);
+                       btrfs_set_node_key(parent, &disk_key, pslot + 1);
+                       btrfs_mark_buffer_dirty(parent);
+
+                       if (btrfs_header_nritems(mid) <= orig_slot) {
+                               path->nodes[level] = right;
+                               path->slots[level + 1] += 1;
+                               path->slots[level] = orig_slot -
+                                       btrfs_header_nritems(mid);
+                               btrfs_tree_unlock(mid);
+                               free_extent_buffer(mid);
+                       } else {
+                               btrfs_tree_unlock(right);
+                               free_extent_buffer(right);
+                       }
+                       return 0;
+               }
+               btrfs_tree_unlock(right);
+               free_extent_buffer(right);
+       }
+       return 1;
+}
+
+/*
+ * readahead one full node of leaves
+ */
+static noinline void reada_for_search(struct btrfs_root *root,
+                                     struct btrfs_path *path,
+                                     int level, int slot, u64 objectid)
+{
+       struct extent_buffer *node;
+       struct btrfs_disk_key disk_key;
+       u32 nritems;
+       u64 search;
+       u64 lowest_read;
+       u64 highest_read;
+       u64 nread = 0;
+       int direction = path->reada;
+       struct extent_buffer *eb;
+       u32 nr;
+       u32 blocksize;
+       u32 nscan = 0;
+
+       if (level != 1)
+               return;
+
+       if (!path->nodes[level])
+               return;
+
+       node = path->nodes[level];
+
+       search = btrfs_node_blockptr(node, slot);
+       blocksize = btrfs_level_size(root, level - 1);
+       eb = btrfs_find_tree_block(root, search, blocksize);
+       if (eb) {
+               free_extent_buffer(eb);
+               return;
+       }
+
+       highest_read = search;
+       lowest_read = search;
+
+       nritems = btrfs_header_nritems(node);
+       nr = slot;
+       while(1) {
+               if (direction < 0) {
+                       if (nr == 0)
+                               break;
+                       nr--;
+               } else if (direction > 0) {
+                       nr++;
+                       if (nr >= nritems)
+                               break;
+               }
+               if (path->reada < 0 && objectid) {
+                       btrfs_node_key(node, &disk_key, nr);
+                       if (btrfs_disk_key_objectid(&disk_key) != objectid)
+                               break;
+               }
+               search = btrfs_node_blockptr(node, nr);
+               if ((search >= lowest_read && search <= highest_read) ||
+                   (search < lowest_read && lowest_read - search <= 32768) ||
+                   (search > highest_read && search - highest_read <= 32768)) {
+                       readahead_tree_block(root, search, blocksize,
+                                    btrfs_node_ptr_generation(node, nr));
+                       nread += blocksize;
+               }
+               nscan++;
+               if (path->reada < 2 && (nread > (256 * 1024) || nscan > 32))
+                       break;
+               if(nread > (1024 * 1024) || nscan > 128)
+                       break;
+
+               if (search < lowest_read)
+                       lowest_read = search;
+               if (search > highest_read)
+                       highest_read = search;
+       }
+}
+
+static noinline void unlock_up(struct btrfs_path *path, int level,
+                              int lowest_unlock)
+{
+       int i;
+       int skip_level = level;
+       int no_skips = 0;
+       struct extent_buffer *t;
+
+       for (i = level; i < BTRFS_MAX_LEVEL; i++) {
+               if (!path->nodes[i])
+                       break;
+               if (!path->locks[i])
+                       break;
+               if (!no_skips && path->slots[i] == 0) {
+                       skip_level = i + 1;
+                       continue;
+               }
+               if (!no_skips && path->keep_locks) {
+                       u32 nritems;
+                       t = path->nodes[i];
+                       nritems = btrfs_header_nritems(t);
+                       if (nritems < 1 || path->slots[i] >= nritems - 1) {
+                               skip_level = i + 1;
+                               continue;
+                       }
+               }
+               if (skip_level < i && i >= lowest_unlock)
+                       no_skips = 1;
+
+               t = path->nodes[i];
+               if (i >= lowest_unlock && i > skip_level && path->locks[i]) {
+                       btrfs_tree_unlock(t);
+                       path->locks[i] = 0;
+               }
+       }
+}
+
+/*
+ * look for key in the tree.  path is filled in with nodes along the way
+ * if key is found, we return zero and you can find the item in the leaf
+ * level of the path (level 0)
+ *
+ * If the key isn't found, the path points to the slot where it should
+ * be inserted, and 1 is returned.  If there are other errors during the
+ * search a negative error number is returned.
+ *
+ * if ins_len > 0, nodes and leaves will be split as we walk down the
+ * tree.  if ins_len < 0, nodes will be merged as we walk down the tree (if
+ * possible)
+ */
+int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
+                     *root, struct btrfs_key *key, struct btrfs_path *p, int
+                     ins_len, int cow)
+{
+       struct extent_buffer *b;
+       struct extent_buffer *tmp;
+       int slot;
+       int ret;
+       int level;
+       int should_reada = p->reada;
+       int lowest_unlock = 1;
+       int blocksize;
+       u8 lowest_level = 0;
+       u64 blocknr;
+       u64 gen;
+       struct btrfs_key prealloc_block;
+
+       lowest_level = p->lowest_level;
+       WARN_ON(lowest_level && ins_len);
+       WARN_ON(p->nodes[0] != NULL);
+       WARN_ON(cow && root == root->fs_info->extent_root &&
+               !mutex_is_locked(&root->fs_info->alloc_mutex));
+       if (ins_len < 0)
+               lowest_unlock = 2;
+
+       prealloc_block.objectid = 0;
+
+again:
+       if (p->skip_locking)
+               b = btrfs_root_node(root);
+       else
+               b = btrfs_lock_root_node(root);
+
+       while (b) {
+               level = btrfs_header_level(b);
+
+               /*
+                * setup the path here so we can release it under lock
+                * contention with the cow code
+                */
+               p->nodes[level] = b;
+               if (!p->skip_locking)
+                       p->locks[level] = 1;
+
+               if (cow) {
+                       int wret;
+
+                       /* is a cow on this block not required */
+                       spin_lock(&root->fs_info->hash_lock);
+                       if (btrfs_header_generation(b) == trans->transid &&
+                           !btrfs_header_flag(b, BTRFS_HEADER_FLAG_WRITTEN)) {
+                               spin_unlock(&root->fs_info->hash_lock);
+                               goto cow_done;
+                       }
+                       spin_unlock(&root->fs_info->hash_lock);
+
+                       /* ok, we have to cow, is our old prealloc the right
+                        * size?
+                        */
+                       if (prealloc_block.objectid &&
+                           prealloc_block.offset != b->len) {
+                               btrfs_free_reserved_extent(root,
+                                          prealloc_block.objectid,
+                                          prealloc_block.offset);
+                               prealloc_block.objectid = 0;
+                       }
+
+                       /*
+                        * for higher level blocks, try not to allocate blocks
+                        * with the block and the parent locks held.
+                        */
+                       if (level > 1 && !prealloc_block.objectid &&
+                           btrfs_path_lock_waiting(p, level)) {
+                               u32 size = b->len;
+                               u64 hint = b->start;
+
+                               btrfs_release_path(root, p);
+                               ret = btrfs_reserve_extent(trans, root,
+                                                          size, size, 0,
+                                                          hint, (u64)-1,
+                                                          &prealloc_block, 0);
+                               BUG_ON(ret);
+                               goto again;
+                       }
+
+                       wret = btrfs_cow_block(trans, root, b,
+                                              p->nodes[level + 1],
+                                              p->slots[level + 1],
+                                              &b, prealloc_block.objectid);
+                       prealloc_block.objectid = 0;
+                       if (wret) {
+                               free_extent_buffer(b);
+                               ret = wret;
+                               goto done;
+                       }
+               }
+cow_done:
+               BUG_ON(!cow && ins_len);
+               if (level != btrfs_header_level(b))
+                       WARN_ON(1);
+               level = btrfs_header_level(b);
+
+               p->nodes[level] = b;
+               if (!p->skip_locking)
+                       p->locks[level] = 1;
+
+               ret = check_block(root, p, level);
+               if (ret) {
+                       ret = -1;
+                       goto done;
+               }
+
+               ret = bin_search(b, key, level, &slot);
+               if (level != 0) {
+                       if (ret && slot > 0)
+                               slot -= 1;
+                       p->slots[level] = slot;
+                       if (ins_len > 0 && btrfs_header_nritems(b) >=
+                           BTRFS_NODEPTRS_PER_BLOCK(root) - 3) {
+                               int sret = split_node(trans, root, p, level);
+                               BUG_ON(sret > 0);
+                               if (sret) {
+                                       ret = sret;
+                                       goto done;
+                               }
+                               b = p->nodes[level];
+                               slot = p->slots[level];
+                       } else if (ins_len < 0) {
+                               int sret = balance_level(trans, root, p,
+                                                        level);
+                               if (sret) {
+                                       ret = sret;
+                                       goto done;
+                               }
+                               b = p->nodes[level];
+                               if (!b) {
+                                       btrfs_release_path(NULL, p);
+                                       goto again;
+                               }
+                               slot = p->slots[level];
+                               BUG_ON(btrfs_header_nritems(b) == 1);
+                       }
+                       unlock_up(p, level, lowest_unlock);
+
+                       /* this is only true while dropping a snapshot */
+                       if (level == lowest_level) {
+                               break;
+                       }
+
+                       blocknr = btrfs_node_blockptr(b, slot);
+                       gen = btrfs_node_ptr_generation(b, slot);
+                       blocksize = btrfs_level_size(root, level - 1);
+
+                       tmp = btrfs_find_tree_block(root, blocknr, blocksize);
+                       if (tmp && btrfs_buffer_uptodate(tmp, gen)) {
+                               b = tmp;
+                       } else {
+                               /*
+                                * reduce lock contention at high levels
+                                * of the btree by dropping locks before
+                                * we read.
+                                */
+                               if (level > 1) {
+                                       btrfs_release_path(NULL, p);
+                                       if (tmp)
+                                               free_extent_buffer(tmp);
+                                       if (should_reada)
+                                               reada_for_search(root, p,
+                                                                level, slot,
+                                                                key->objectid);
+
+                                       tmp = read_tree_block(root, blocknr,
+                                                        blocksize, gen);
+                                       if (tmp)
+                                               free_extent_buffer(tmp);
+                                       goto again;
+                               } else {
+                                       if (tmp)
+                                               free_extent_buffer(tmp);
+                                       if (should_reada)
+                                               reada_for_search(root, p,
+                                                                level, slot,
+                                                                key->objectid);
+                                       b = read_node_slot(root, b, slot);
+                               }
+                       }
+                       if (!p->skip_locking)
+                               btrfs_tree_lock(b);
+               } else {
+                       p->slots[level] = slot;
+                       if (ins_len > 0 && btrfs_leaf_free_space(root, b) <
+                           sizeof(struct btrfs_item) + ins_len) {
+                               int sret = split_leaf(trans, root, key,
+                                                     p, ins_len, ret == 0);
+                               BUG_ON(sret > 0);
+                               if (sret) {
+                                       ret = sret;
+                                       goto done;
+                               }
+                       }
+                       unlock_up(p, level, lowest_unlock);
+                       goto done;
+               }
+       }
+       ret = 1;
+done:
+       if (prealloc_block.objectid) {
+               btrfs_free_reserved_extent(root,
+                          prealloc_block.objectid,
+                          prealloc_block.offset);
+       }
+
+       return ret;
+}
+
+/*
+ * adjust the pointers going up the tree, starting at level
+ * making sure the right key of each node is points to 'key'.
+ * This is used after shifting pointers to the left, so it stops
+ * fixing up pointers when a given leaf/node is not in slot 0 of the
+ * higher levels
+ *
+ * If this fails to write a tree block, it returns -1, but continues
+ * fixing up the blocks in ram so the tree is consistent.
+ */
+static int fixup_low_keys(struct btrfs_trans_handle *trans,
+                         struct btrfs_root *root, struct btrfs_path *path,
+                         struct btrfs_disk_key *key, int level)
+{
+       int i;
+       int ret = 0;
+       struct extent_buffer *t;
+
+       for (i = level; i < BTRFS_MAX_LEVEL; i++) {
+               int tslot = path->slots[i];
+               if (!path->nodes[i])
+                       break;
+               t = path->nodes[i];
+               btrfs_set_node_key(t, key, tslot);
+               btrfs_mark_buffer_dirty(path->nodes[i]);
+               if (tslot != 0)
+                       break;
+       }
+       return ret;
+}
+
+/*
+ * update item key.
+ *
+ * This function isn't completely safe. It's the caller's responsibility
+ * that the new key won't break the order
+ */
+int btrfs_set_item_key_safe(struct btrfs_trans_handle *trans,
+                           struct btrfs_root *root, struct btrfs_path *path,
+                           struct btrfs_key *new_key)
+{
+       struct btrfs_disk_key disk_key;
+       struct extent_buffer *eb;
+       int slot;
+
+       eb = path->nodes[0];
+       slot = path->slots[0];
+       if (slot > 0) {
+               btrfs_item_key(eb, &disk_key, slot - 1);
+               if (comp_keys(&disk_key, new_key) >= 0)
+                       return -1;
+       }
+       if (slot < btrfs_header_nritems(eb) - 1) {
+               btrfs_item_key(eb, &disk_key, slot + 1);
+               if (comp_keys(&disk_key, new_key) <= 0)
+                       return -1;
+       }
+
+       btrfs_cpu_key_to_disk(&disk_key, new_key);
+       btrfs_set_item_key(eb, &disk_key, slot);
+       btrfs_mark_buffer_dirty(eb);
+       if (slot == 0)
+               fixup_low_keys(trans, root, path, &disk_key, 1);
+       return 0;
+}
+
+/*
+ * try to push data from one node into the next node left in the
+ * tree.
+ *
+ * returns 0 if some ptrs were pushed left, < 0 if there was some horrible
+ * error, and > 0 if there was no room in the left hand block.
+ */
+static int push_node_left(struct btrfs_trans_handle *trans,
+                         struct btrfs_root *root, struct extent_buffer *dst,
+                         struct extent_buffer *src, int empty)
+{
+       int push_items = 0;
+       int src_nritems;
+       int dst_nritems;
+       int ret = 0;
+
+       src_nritems = btrfs_header_nritems(src);
+       dst_nritems = btrfs_header_nritems(dst);
+       push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems;
+       WARN_ON(btrfs_header_generation(src) != trans->transid);
+       WARN_ON(btrfs_header_generation(dst) != trans->transid);
+
+       if (!empty && src_nritems <= 8)
+               return 1;
+
+       if (push_items <= 0) {
+               return 1;
+       }
+
+       if (empty) {
+               push_items = min(src_nritems, push_items);
+               if (push_items < src_nritems) {
+                       /* leave at least 8 pointers in the node if
+                        * we aren't going to empty it
+                        */
+                       if (src_nritems - push_items < 8) {
+                               if (push_items <= 8)
+                                       return 1;
+                               push_items -= 8;
+                       }
+               }
+       } else
+               push_items = min(src_nritems - 8, push_items);
+
+       copy_extent_buffer(dst, src,
+                          btrfs_node_key_ptr_offset(dst_nritems),
+                          btrfs_node_key_ptr_offset(0),
+                          push_items * sizeof(struct btrfs_key_ptr));
+
+       if (push_items < src_nritems) {
+               memmove_extent_buffer(src, btrfs_node_key_ptr_offset(0),
+                                     btrfs_node_key_ptr_offset(push_items),
+                                     (src_nritems - push_items) *
+                                     sizeof(struct btrfs_key_ptr));
+       }
+       btrfs_set_header_nritems(src, src_nritems - push_items);
+       btrfs_set_header_nritems(dst, dst_nritems + push_items);
+       btrfs_mark_buffer_dirty(src);
+       btrfs_mark_buffer_dirty(dst);
+
+       ret = btrfs_update_ref(trans, root, src, dst, dst_nritems, push_items);
+       BUG_ON(ret);
+
+       return ret;
+}
+
+/*
+ * try to push data from one node into the next node right in the
+ * tree.
+ *
+ * returns 0 if some ptrs were pushed, < 0 if there was some horrible
+ * error, and > 0 if there was no room in the right hand block.
+ *
+ * this will  only push up to 1/2 the contents of the left node over
+ */
+static int balance_node_right(struct btrfs_trans_handle *trans,
+                             struct btrfs_root *root,
+                             struct extent_buffer *dst,
+                             struct extent_buffer *src)
+{
+       int push_items = 0;
+       int max_push;
+       int src_nritems;
+       int dst_nritems;
+       int ret = 0;
+
+       WARN_ON(btrfs_header_generation(src) != trans->transid);
+       WARN_ON(btrfs_header_generation(dst) != trans->transid);
+
+       src_nritems = btrfs_header_nritems(src);
+       dst_nritems = btrfs_header_nritems(dst);
+       push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems;
+       if (push_items <= 0) {
+               return 1;
+       }
+
+       if (src_nritems < 4) {
+               return 1;
+       }
+
+       max_push = src_nritems / 2 + 1;
+       /* don't try to empty the node */
+       if (max_push >= src_nritems) {
+               return 1;
+       }
+
+       if (max_push < push_items)
+               push_items = max_push;
+
+       memmove_extent_buffer(dst, btrfs_node_key_ptr_offset(push_items),
+                                     btrfs_node_key_ptr_offset(0),
+                                     (dst_nritems) *
+                                     sizeof(struct btrfs_key_ptr));
+
+       copy_extent_buffer(dst, src,
+                          btrfs_node_key_ptr_offset(0),
+                          btrfs_node_key_ptr_offset(src_nritems - push_items),
+                          push_items * sizeof(struct btrfs_key_ptr));
+
+       btrfs_set_header_nritems(src, src_nritems - push_items);
+       btrfs_set_header_nritems(dst, dst_nritems + push_items);
+
+       btrfs_mark_buffer_dirty(src);
+       btrfs_mark_buffer_dirty(dst);
+
+       ret = btrfs_update_ref(trans, root, src, dst, 0, push_items);
+       BUG_ON(ret);
+
+       return ret;
+}
+
+/*
+ * helper function to insert a new root level in the tree.
+ * A new node is allocated, and a single item is inserted to
+ * point to the existing root
+ *
+ * returns zero on success or < 0 on failure.
+ */
+static int noinline insert_new_root(struct btrfs_trans_handle *trans,
+                          struct btrfs_root *root,
+                          struct btrfs_path *path, int level)
+{
+       u64 lower_gen;
+       struct extent_buffer *lower;
+       struct extent_buffer *c;
+       struct extent_buffer *old;
+       struct btrfs_disk_key lower_key;
+       int ret;
+
+       BUG_ON(path->nodes[level]);
+       BUG_ON(path->nodes[level-1] != root->node);
+
+       lower = path->nodes[level-1];
+       if (level == 1)
+               btrfs_item_key(lower, &lower_key, 0);
+       else
+               btrfs_node_key(lower, &lower_key, 0);
+
+       c = btrfs_alloc_free_block(trans, root, root->nodesize, 0,
+                                  root->root_key.objectid, trans->transid,
+                                  level, root->node->start, 0);
+       if (IS_ERR(c))
+               return PTR_ERR(c);
+
+       memset_extent_buffer(c, 0, 0, root->nodesize);
+       btrfs_set_header_nritems(c, 1);
+       btrfs_set_header_level(c, level);
+       btrfs_set_header_bytenr(c, c->start);
+       btrfs_set_header_generation(c, trans->transid);
+       btrfs_set_header_owner(c, root->root_key.objectid);
+
+       write_extent_buffer(c, root->fs_info->fsid,
+                           (unsigned long)btrfs_header_fsid(c),
+                           BTRFS_FSID_SIZE);
+
+       write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
+                           (unsigned long)btrfs_header_chunk_tree_uuid(c),
+                           BTRFS_UUID_SIZE);
+
+       btrfs_set_node_key(c, &lower_key, 0);
+       btrfs_set_node_blockptr(c, 0, lower->start);
+       lower_gen = btrfs_header_generation(lower);
+       WARN_ON(lower_gen != trans->transid);
+
+       btrfs_set_node_ptr_generation(c, 0, lower_gen);
+
+       btrfs_mark_buffer_dirty(c);
+
+       spin_lock(&root->node_lock);
+       old = root->node;
+       root->node = c;
+       spin_unlock(&root->node_lock);
+
+       ret = btrfs_update_extent_ref(trans, root, lower->start,
+                                     lower->start, c->start,
+                                     root->root_key.objectid,
+                                     trans->transid, level - 1, 0);
+       BUG_ON(ret);
+
+       /* the super has an extra ref to root->node */
+       free_extent_buffer(old);
+
+       add_root_to_dirty_list(root);
+       extent_buffer_get(c);
+       path->nodes[level] = c;
+       path->locks[level] = 1;
+       path->slots[level] = 0;
+       return 0;
+}
+
+/*
+ * worker function to insert a single pointer in a node.
+ * the node should have enough room for the pointer already
+ *
+ * slot and level indicate where you want the key to go, and
+ * blocknr is the block the key points to.
+ *
+ * returns zero on success and < 0 on any error
+ */
+static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root
+                     *root, struct btrfs_path *path, struct btrfs_disk_key
+                     *key, u64 bytenr, int slot, int level)
+{
+       struct extent_buffer *lower;
+       int nritems;
+
+       BUG_ON(!path->nodes[level]);
+       lower = path->nodes[level];
+       nritems = btrfs_header_nritems(lower);
+       if (slot > nritems)
+               BUG();
+       if (nritems == BTRFS_NODEPTRS_PER_BLOCK(root))
+               BUG();
+       if (slot != nritems) {
+               memmove_extent_buffer(lower,
+                             btrfs_node_key_ptr_offset(slot + 1),
+                             btrfs_node_key_ptr_offset(slot),
+                             (nritems - slot) * sizeof(struct btrfs_key_ptr));
+       }
+       btrfs_set_node_key(lower, key, slot);
+       btrfs_set_node_blockptr(lower, slot, bytenr);
+       WARN_ON(trans->transid == 0);
+       btrfs_set_node_ptr_generation(lower, slot, trans->transid);
+       btrfs_set_header_nritems(lower, nritems + 1);
+       btrfs_mark_buffer_dirty(lower);
+       return 0;
+}
+
+/*
+ * split the node at the specified level in path in two.
+ * The path is corrected to point to the appropriate node after the split
+ *
+ * Before splitting this tries to make some room in the node by pushing
+ * left and right, if either one works, it returns right away.
+ *
+ * returns 0 on success and < 0 on failure
+ */
+static noinline int split_node(struct btrfs_trans_handle *trans,
+                              struct btrfs_root *root,
+                              struct btrfs_path *path, int level)
+{
+       struct extent_buffer *c;
+       struct extent_buffer *split;
+       struct btrfs_disk_key disk_key;
+       int mid;
+       int ret;
+       int wret;
+       u32 c_nritems;
+
+       c = path->nodes[level];
+       WARN_ON(btrfs_header_generation(c) != trans->transid);
+       if (c == root->node) {
+               /* trying to split the root, lets make a new one */
+               ret = insert_new_root(trans, root, path, level + 1);
+               if (ret)
+                       return ret;
+       } else {
+               ret = push_nodes_for_insert(trans, root, path, level);
+               c = path->nodes[level];
+               if (!ret && btrfs_header_nritems(c) <
+                   BTRFS_NODEPTRS_PER_BLOCK(root) - 3)
+                       return 0;
+               if (ret < 0)
+                       return ret;
+       }
+
+       c_nritems = btrfs_header_nritems(c);
+
+       split = btrfs_alloc_free_block(trans, root, root->nodesize,
+                                       path->nodes[level + 1]->start,
+                                       root->root_key.objectid,
+                                       trans->transid, level, c->start, 0);
+       if (IS_ERR(split))
+               return PTR_ERR(split);
+
+       btrfs_set_header_flags(split, btrfs_header_flags(c));
+       btrfs_set_header_level(split, btrfs_header_level(c));
+       btrfs_set_header_bytenr(split, split->start);
+       btrfs_set_header_generation(split, trans->transid);
+       btrfs_set_header_owner(split, root->root_key.objectid);
+       btrfs_set_header_flags(split, 0);
+       write_extent_buffer(split, root->fs_info->fsid,
+                           (unsigned long)btrfs_header_fsid(split),
+                           BTRFS_FSID_SIZE);
+       write_extent_buffer(split, root->fs_info->chunk_tree_uuid,
+                           (unsigned long)btrfs_header_chunk_tree_uuid(split),
+                           BTRFS_UUID_SIZE);
+
+       mid = (c_nritems + 1) / 2;
+
+       copy_extent_buffer(split, c,
+                          btrfs_node_key_ptr_offset(0),
+                          btrfs_node_key_ptr_offset(mid),
+                          (c_nritems - mid) * sizeof(struct btrfs_key_ptr));
+       btrfs_set_header_nritems(split, c_nritems - mid);
+       btrfs_set_header_nritems(c, mid);
+       ret = 0;
+
+       btrfs_mark_buffer_dirty(c);
+       btrfs_mark_buffer_dirty(split);
+
+       btrfs_node_key(split, &disk_key, 0);
+       wret = insert_ptr(trans, root, path, &disk_key, split->start,
+                         path->slots[level + 1] + 1,
+                         level + 1);
+       if (wret)
+               ret = wret;
+
+       ret = btrfs_update_ref(trans, root, c, split, 0, c_nritems - mid);
+       BUG_ON(ret);
+
+       if (path->slots[level] >= mid) {
+               path->slots[level] -= mid;
+               btrfs_tree_unlock(c);
+               free_extent_buffer(c);
+               path->nodes[level] = split;
+               path->slots[level + 1] += 1;
+       } else {
+               btrfs_tree_unlock(split);
+               free_extent_buffer(split);
+       }
+       return ret;
+}
+
+/*
+ * how many bytes are required to store the items in a leaf.  start
+ * and nr indicate which items in the leaf to check.  This totals up the
+ * space used both by the item structs and the item data
+ */
+static int leaf_space_used(struct extent_buffer *l, int start, int nr)
+{
+       int data_len;
+       int nritems = btrfs_header_nritems(l);
+       int end = min(nritems, start + nr) - 1;
+
+       if (!nr)
+               return 0;
+       data_len = btrfs_item_end_nr(l, start);
+       data_len = data_len - btrfs_item_offset_nr(l, end);
+       data_len += sizeof(struct btrfs_item) * nr;
+       WARN_ON(data_len < 0);
+       return data_len;
+}
+
+/*
+ * The space between the end of the leaf items and
+ * the start of the leaf data.  IOW, how much room
+ * the leaf has left for both items and data
+ */
+int noinline btrfs_leaf_free_space(struct btrfs_root *root,
+                                  struct extent_buffer *leaf)
+{
+       int nritems = btrfs_header_nritems(leaf);
+       int ret;
+       ret = BTRFS_LEAF_DATA_SIZE(root) - leaf_space_used(leaf, 0, nritems);
+       if (ret < 0) {
+               printk("leaf free space ret %d, leaf data size %lu, used %d nritems %d\n",
+                      ret, (unsigned long) BTRFS_LEAF_DATA_SIZE(root),
+                      leaf_space_used(leaf, 0, nritems), nritems);
+       }
+       return ret;
+}
+
+/*
+ * push some data in the path leaf to the right, trying to free up at
+ * least data_size bytes.  returns zero if the push worked, nonzero otherwise
+ *
+ * returns 1 if the push failed because the other node didn't have enough
+ * room, 0 if everything worked out and < 0 if there were major errors.
+ */
+static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
+                          *root, struct btrfs_path *path, int data_size,
+                          int empty)
+{
+       struct extent_buffer *left = path->nodes[0];
+       struct extent_buffer *right;
+       struct extent_buffer *upper;
+       struct btrfs_disk_key disk_key;
+       int slot;
+       u32 i;
+       int free_space;
+       int push_space = 0;
+       int push_items = 0;
+       struct btrfs_item *item;
+       u32 left_nritems;
+       u32 nr;
+       u32 right_nritems;
+       u32 data_end;
+       u32 this_item_size;
+       int ret;
+
+       slot = path->slots[1];
+       if (!path->nodes[1]) {
+               return 1;
+       }
+       upper = path->nodes[1];
+       if (slot >= btrfs_header_nritems(upper) - 1)
+               return 1;
+
+       WARN_ON(!btrfs_tree_locked(path->nodes[1]));
+
+       right = read_node_slot(root, upper, slot + 1);
+       btrfs_tree_lock(right);
+       free_space = btrfs_leaf_free_space(root, right);
+       if (free_space < data_size + sizeof(struct btrfs_item))
+               goto out_unlock;
+
+       /* cow and double check */
+       ret = btrfs_cow_block(trans, root, right, upper,
+                             slot + 1, &right, 0);
+       if (ret)
+               goto out_unlock;
+
+       free_space = btrfs_leaf_free_space(root, right);
+       if (free_space < data_size + sizeof(struct btrfs_item))
+               goto out_unlock;
+
+       left_nritems = btrfs_header_nritems(left);
+       if (left_nritems == 0)
+               goto out_unlock;
+
+       if (empty)
+               nr = 0;
+       else
+               nr = 1;
+
+       if (path->slots[0] >= left_nritems)
+               push_space += data_size + sizeof(*item);
+
+       i = left_nritems - 1;
+       while (i >= nr) {
+               item = btrfs_item_nr(left, i);
+
+               if (!empty && push_items > 0) {
+                       if (path->slots[0] > i)
+                               break;
+                       if (path->slots[0] == i) {
+                               int space = btrfs_leaf_free_space(root, left);
+                               if (space + push_space * 2 > free_space)
+                                       break;
+                       }
+               }
+
+               if (path->slots[0] == i)
+                       push_space += data_size + sizeof(*item);
+
+               if (!left->map_token) {
+                       map_extent_buffer(left, (unsigned long)item,
+                                       sizeof(struct btrfs_item),
+                                       &left->map_token, &left->kaddr,
+                                       &left->map_start, &left->map_len,
+                                       KM_USER1);
+               }
+
+               this_item_size = btrfs_item_size(left, item);
+               if (this_item_size + sizeof(*item) + push_space > free_space)
+                       break;
+
+               push_items++;
+               push_space += this_item_size + sizeof(*item);
+               if (i == 0)
+                       break;
+               i--;
+       }
+       if (left->map_token) {
+               unmap_extent_buffer(left, left->map_token, KM_USER1);
+               left->map_token = NULL;
+       }
+
+       if (push_items == 0)
+               goto out_unlock;
+
+       if (!empty && push_items == left_nritems)
+               WARN_ON(1);
+
+       /* push left to right */
+       right_nritems = btrfs_header_nritems(right);
+
+       push_space = btrfs_item_end_nr(left, left_nritems - push_items);
+       push_space -= leaf_data_end(root, left);
+
+       /* make room in the right data area */
+       data_end = leaf_data_end(root, right);
+       memmove_extent_buffer(right,
+                             btrfs_leaf_data(right) + data_end - push_space,
+                             btrfs_leaf_data(right) + data_end,
+                             BTRFS_LEAF_DATA_SIZE(root) - data_end);
+
+       /* copy from the left data area */
+       copy_extent_buffer(right, left, btrfs_leaf_data(right) +
+                    BTRFS_LEAF_DATA_SIZE(root) - push_space,
+                    btrfs_leaf_data(left) + leaf_data_end(root, left),
+                    push_space);
+
+       memmove_extent_buffer(right, btrfs_item_nr_offset(push_items),
+                             btrfs_item_nr_offset(0),
+                             right_nritems * sizeof(struct btrfs_item));
+
+       /* copy the items from left to right */
+       copy_extent_buffer(right, left, btrfs_item_nr_offset(0),
+                  btrfs_item_nr_offset(left_nritems - push_items),
+                  push_items * sizeof(struct btrfs_item));
+
+       /* update the item pointers */
+       right_nritems += push_items;
+       btrfs_set_header_nritems(right, right_nritems);
+       push_space = BTRFS_LEAF_DATA_SIZE(root);
+       for (i = 0; i < right_nritems; i++) {
+               item = btrfs_item_nr(right, i);
+               if (!right->map_token) {
+                       map_extent_buffer(right, (unsigned long)item,
+                                       sizeof(struct btrfs_item),
+                                       &right->map_token, &right->kaddr,
+                                       &right->map_start, &right->map_len,
+                                       KM_USER1);
+               }
+               push_space -= btrfs_item_size(right, item);
+               btrfs_set_item_offset(right, item, push_space);
+       }
+
+       if (right->map_token) {
+               unmap_extent_buffer(right, right->map_token, KM_USER1);
+               right->map_token = NULL;
+       }
+       left_nritems -= push_items;
+       btrfs_set_header_nritems(left, left_nritems);
+
+       if (left_nritems)
+               btrfs_mark_buffer_dirty(left);
+       btrfs_mark_buffer_dirty(right);
+
+       ret = btrfs_update_ref(trans, root, left, right, 0, push_items);
+       BUG_ON(ret);
+
+       btrfs_item_key(right, &disk_key, 0);
+       btrfs_set_node_key(upper, &disk_key, slot + 1);
+       btrfs_mark_buffer_dirty(upper);
+
+       /* then fixup the leaf pointer in the path */
+       if (path->slots[0] >= left_nritems) {
+               path->slots[0] -= left_nritems;
+               if (btrfs_header_nritems(path->nodes[0]) == 0)
+                       clean_tree_block(trans, root, path->nodes[0]);
+               btrfs_tree_unlock(path->nodes[0]);
+               free_extent_buffer(path->nodes[0]);
+               path->nodes[0] = right;
+               path->slots[1] += 1;
+       } else {
+               btrfs_tree_unlock(right);
+               free_extent_buffer(right);
+       }
+       return 0;
+
+out_unlock:
+       btrfs_tree_unlock(right);
+       free_extent_buffer(right);
+       return 1;
+}
+
+/*
+ * push some data in the path leaf to the left, trying to free up at
+ * least data_size bytes.  returns zero if the push worked, nonzero otherwise
+ */
+static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
+                         *root, struct btrfs_path *path, int data_size,
+                         int empty)
+{
+       struct btrfs_disk_key disk_key;
+       struct extent_buffer *right = path->nodes[0];
+       struct extent_buffer *left;
+       int slot;
+       int i;
+       int free_space;
+       int push_space = 0;
+       int push_items = 0;
+       struct btrfs_item *item;
+       u32 old_left_nritems;
+       u32 right_nritems;
+       u32 nr;
+       int ret = 0;
+       int wret;
+       u32 this_item_size;
+       u32 old_left_item_size;
+
+       slot = path->slots[1];
+       if (slot == 0)
+               return 1;
+       if (!path->nodes[1])
+               return 1;
+
+       right_nritems = btrfs_header_nritems(right);
+       if (right_nritems == 0) {
+               return 1;
+       }
+
+       WARN_ON(!btrfs_tree_locked(path->nodes[1]));
+
+       left = read_node_slot(root, path->nodes[1], slot - 1);
+       btrfs_tree_lock(left);
+       free_space = btrfs_leaf_free_space(root, left);
+       if (free_space < data_size + sizeof(struct btrfs_item)) {
+               ret = 1;
+               goto out;
+       }
+
+       /* cow and double check */
+       ret = btrfs_cow_block(trans, root, left,
+                             path->nodes[1], slot - 1, &left, 0);
+       if (ret) {
+               /* we hit -ENOSPC, but it isn't fatal here */
+               ret = 1;
+               goto out;
+       }
+
+       free_space = btrfs_leaf_free_space(root, left);
+       if (free_space < data_size + sizeof(struct btrfs_item)) {
+               ret = 1;
+               goto out;
+       }
+
+       if (empty)
+               nr = right_nritems;
+       else
+               nr = right_nritems - 1;
+
+       for (i = 0; i < nr; i++) {
+               item = btrfs_item_nr(right, i);
+               if (!right->map_token) {
+                       map_extent_buffer(right, (unsigned long)item,
+                                       sizeof(struct btrfs_item),
+                                       &right->map_token, &right->kaddr,
+                                       &right->map_start, &right->map_len,
+                                       KM_USER1);
+               }
+
+               if (!empty && push_items > 0) {
+                       if (path->slots[0] < i)
+                               break;
+                       if (path->slots[0] == i) {
+                               int space = btrfs_leaf_free_space(root, right);
+                               if (space + push_space * 2 > free_space)
+                                       break;
+                       }
+               }
+
+               if (path->slots[0] == i)
+                       push_space += data_size + sizeof(*item);
+
+               this_item_size = btrfs_item_size(right, item);
+               if (this_item_size + sizeof(*item) + push_space > free_space)
+                       break;
+
+               push_items++;
+               push_space += this_item_size + sizeof(*item);
+       }
+
+       if (right->map_token) {
+               unmap_extent_buffer(right, right->map_token, KM_USER1);
+               right->map_token = NULL;
+       }
+
+       if (push_items == 0) {
+               ret = 1;
+               goto out;
+       }
+       if (!empty && push_items == btrfs_header_nritems(right))
+               WARN_ON(1);
+
+       /* push data from right to left */
+       copy_extent_buffer(left, right,
+                          btrfs_item_nr_offset(btrfs_header_nritems(left)),
+                          btrfs_item_nr_offset(0),
+                          push_items * sizeof(struct btrfs_item));
+
+       push_space = BTRFS_LEAF_DATA_SIZE(root) -
+                    btrfs_item_offset_nr(right, push_items -1);
+
+       copy_extent_buffer(left, right, btrfs_leaf_data(left) +
+                    leaf_data_end(root, left) - push_space,
+                    btrfs_leaf_data(right) +
+                    btrfs_item_offset_nr(right, push_items - 1),
+                    push_space);
+       old_left_nritems = btrfs_header_nritems(left);
+       BUG_ON(old_left_nritems < 0);
+
+       old_left_item_size = btrfs_item_offset_nr(left, old_left_nritems - 1);
+       for (i = old_left_nritems; i < old_left_nritems + push_items; i++) {
+               u32 ioff;
+
+               item = btrfs_item_nr(left, i);
+               if (!left->map_token) {
+                       map_extent_buffer(left, (unsigned long)item,
+                                       sizeof(struct btrfs_item),
+                                       &left->map_token, &left->kaddr,
+                                       &left->map_start, &left->map_len,
+                                       KM_USER1);
+               }
+
+               ioff = btrfs_item_offset(left, item);
+               btrfs_set_item_offset(left, item,
+                     ioff - (BTRFS_LEAF_DATA_SIZE(root) - old_left_item_size));
+       }
+       btrfs_set_header_nritems(left, old_left_nritems + push_items);
+       if (left->map_token) {
+               unmap_extent_buffer(left, left->map_token, KM_USER1);
+               left->map_token = NULL;
+       }
+
+       /* fixup right node */
+       if (push_items > right_nritems) {
+               printk("push items %d nr %u\n", push_items, right_nritems);
+               WARN_ON(1);
+       }
+
+       if (push_items < right_nritems) {
+               push_space = btrfs_item_offset_nr(right, push_items - 1) -
+                                                 leaf_data_end(root, right);
+               memmove_extent_buffer(right, btrfs_leaf_data(right) +
+                                     BTRFS_LEAF_DATA_SIZE(root) - push_space,
+                                     btrfs_leaf_data(right) +
+                                     leaf_data_end(root, right), push_space);
+
+               memmove_extent_buffer(right, btrfs_item_nr_offset(0),
+                             btrfs_item_nr_offset(push_items),
+                            (btrfs_header_nritems(right) - push_items) *
+                            sizeof(struct btrfs_item));
+       }
+       right_nritems -= push_items;
+       btrfs_set_header_nritems(right, right_nritems);
+       push_space = BTRFS_LEAF_DATA_SIZE(root);
+       for (i = 0; i < right_nritems; i++) {
+               item = btrfs_item_nr(right, i);
+
+               if (!right->map_token) {
+                       map_extent_buffer(right, (unsigned long)item,
+                                       sizeof(struct btrfs_item),
+                                       &right->map_token, &right->kaddr,
+                                       &right->map_start, &right->map_len,
+                                       KM_USER1);
+               }
+
+               push_space = push_space - btrfs_item_size(right, item);
+               btrfs_set_item_offset(right, item, push_space);
+       }
+       if (right->map_token) {
+               unmap_extent_buffer(right, right->map_token, KM_USER1);
+               right->map_token = NULL;
+       }
+
+       btrfs_mark_buffer_dirty(left);
+       if (right_nritems)
+               btrfs_mark_buffer_dirty(right);
+
+       ret = btrfs_update_ref(trans, root, right, left,
+                              old_left_nritems, push_items);
+       BUG_ON(ret);
+
+       btrfs_item_key(right, &disk_key, 0);
+       wret = fixup_low_keys(trans, root, path, &disk_key, 1);
+       if (wret)
+               ret = wret;
+
+       /* then fixup the leaf pointer in the path */
+       if (path->slots[0] < push_items) {
+               path->slots[0] += old_left_nritems;
+               if (btrfs_header_nritems(path->nodes[0]) == 0)
+                       clean_tree_block(trans, root, path->nodes[0]);
+               btrfs_tree_unlock(path->nodes[0]);
+               free_extent_buffer(path->nodes[0]);
+               path->nodes[0] = left;
+               path->slots[1] -= 1;
+       } else {
+               btrfs_tree_unlock(left);
+               free_extent_buffer(left);
+               path->slots[0] -= push_items;
+       }
+       BUG_ON(path->slots[0] < 0);
+       return ret;
+out:
+       btrfs_tree_unlock(left);
+       free_extent_buffer(left);
+       return ret;
+}
+
+/*
+ * split the path's leaf in two, making sure there is at least data_size
+ * available for the resulting leaf level of the path.
+ *
+ * returns 0 if all went well and < 0 on failure.
+ */
+static noinline int split_leaf(struct btrfs_trans_handle *trans,
+                              struct btrfs_root *root,
+                              struct btrfs_key *ins_key,
+                              struct btrfs_path *path, int data_size,
+                              int extend)
+{
+       struct extent_buffer *l;
+       u32 nritems;
+       int mid;
+       int slot;
+       struct extent_buffer *right;
+       int space_needed = data_size + sizeof(struct btrfs_item);
+       int data_copy_size;
+       int rt_data_off;
+       int i;
+       int ret = 0;
+       int wret;
+       int double_split;
+       int num_doubles = 0;
+       struct btrfs_disk_key disk_key;
+
+       if (extend)
+               space_needed = data_size;
+
+       /* first try to make some room by pushing left and right */
+       if (ins_key->type != BTRFS_DIR_ITEM_KEY) {
+               wret = push_leaf_right(trans, root, path, data_size, 0);
+               if (wret < 0) {
+                       return wret;
+               }
+               if (wret) {
+                       wret = push_leaf_left(trans, root, path, data_size, 0);
+                       if (wret < 0)
+                               return wret;
+               }
+               l = path->nodes[0];
+
+               /* did the pushes work? */
+               if (btrfs_leaf_free_space(root, l) >= space_needed)
+                       return 0;
+       }
+
+       if (!path->nodes[1]) {
+               ret = insert_new_root(trans, root, path, 1);
+               if (ret)
+                       return ret;
+       }
+again:
+       double_split = 0;
+       l = path->nodes[0];
+       slot = path->slots[0];
+       nritems = btrfs_header_nritems(l);
+       mid = (nritems + 1)/ 2;
+
+       right = btrfs_alloc_free_block(trans, root, root->leafsize,
+                                       path->nodes[1]->start,
+                                       root->root_key.objectid,
+                                       trans->transid, 0, l->start, 0);
+       if (IS_ERR(right)) {
+               BUG_ON(1);
+               return PTR_ERR(right);
+       }
+
+       memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header));
+       btrfs_set_header_bytenr(right, right->start);
+       btrfs_set_header_generation(right, trans->transid);
+       btrfs_set_header_owner(right, root->root_key.objectid);
+       btrfs_set_header_level(right, 0);
+       write_extent_buffer(right, root->fs_info->fsid,
+                           (unsigned long)btrfs_header_fsid(right),
+                           BTRFS_FSID_SIZE);
+
+       write_extent_buffer(right, root->fs_info->chunk_tree_uuid,
+                           (unsigned long)btrfs_header_chunk_tree_uuid(right),
+                           BTRFS_UUID_SIZE);
+       if (mid <= slot) {
+               if (nritems == 1 ||
+                   leaf_space_used(l, mid, nritems - mid) + space_needed >
+                       BTRFS_LEAF_DATA_SIZE(root)) {
+                       if (slot >= nritems) {
+                               btrfs_cpu_key_to_disk(&disk_key, ins_key);
+                               btrfs_set_header_nritems(right, 0);
+                               wret = insert_ptr(trans, root, path,
+                                                 &disk_key, right->start,
+                                                 path->slots[1] + 1, 1);
+                               if (wret)
+                                       ret = wret;
+
+                               btrfs_tree_unlock(path->nodes[0]);
+                               free_extent_buffer(path->nodes[0]);
+                               path->nodes[0] = right;
+                               path->slots[0] = 0;
+                               path->slots[1] += 1;
+                               btrfs_mark_buffer_dirty(right);
+                               return ret;
+                       }
+                       mid = slot;
+                       if (mid != nritems &&
+                           leaf_space_used(l, mid, nritems - mid) +
+                           space_needed > BTRFS_LEAF_DATA_SIZE(root)) {
+                               double_split = 1;
+                       }
+               }
+       } else {
+               if (leaf_space_used(l, 0, mid + 1) + space_needed >
+                       BTRFS_LEAF_DATA_SIZE(root)) {
+                       if (!extend && slot == 0) {
+                               btrfs_cpu_key_to_disk(&disk_key, ins_key);
+                               btrfs_set_header_nritems(right, 0);
+                               wret = insert_ptr(trans, root, path,
+                                                 &disk_key,
+                                                 right->start,
+                                                 path->slots[1], 1);
+                               if (wret)
+                                       ret = wret;
+                               btrfs_tree_unlock(path->nodes[0]);
+                               free_extent_buffer(path->nodes[0]);
+                               path->nodes[0] = right;
+                               path->slots[0] = 0;
+                               if (path->slots[1] == 0) {
+                                       wret = fixup_low_keys(trans, root,
+                                                  path, &disk_key, 1);
+                                       if (wret)
+                                               ret = wret;
+                               }
+                               btrfs_mark_buffer_dirty(right);
+                               return ret;
+                       } else if (extend && slot == 0) {
+                               mid = 1;
+                       } else {
+                               mid = slot;
+                               if (mid != nritems &&
+                                   leaf_space_used(l, mid, nritems - mid) +
+                                   space_needed > BTRFS_LEAF_DATA_SIZE(root)) {
+                                       double_split = 1;
+                               }
+                       }
+               }
+       }
+       nritems = nritems - mid;
+       btrfs_set_header_nritems(right, nritems);
+       data_copy_size = btrfs_item_end_nr(l, mid) - leaf_data_end(root, l);
+
+       copy_extent_buffer(right, l, btrfs_item_nr_offset(0),
+                          btrfs_item_nr_offset(mid),
+                          nritems * sizeof(struct btrfs_item));
+
+       copy_extent_buffer(right, l,
+                    btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) -
+                    data_copy_size, btrfs_leaf_data(l) +
+                    leaf_data_end(root, l), data_copy_size);
+
+       rt_data_off = BTRFS_LEAF_DATA_SIZE(root) -
+                     btrfs_item_end_nr(l, mid);
+
+       for (i = 0; i < nritems; i++) {
+               struct btrfs_item *item = btrfs_item_nr(right, i);
+               u32 ioff;
+
+               if (!right->map_token) {
+                       map_extent_buffer(right, (unsigned long)item,
+                                       sizeof(struct btrfs_item),
+                                       &right->map_token, &right->kaddr,
+                                       &right->map_start, &right->map_len,
+                                       KM_USER1);
+               }
+
+               ioff = btrfs_item_offset(right, item);
+               btrfs_set_item_offset(right, item, ioff + rt_data_off);
+       }
+
+       if (right->map_token) {
+               unmap_extent_buffer(right, right->map_token, KM_USER1);
+               right->map_token = NULL;
+       }
+
+       btrfs_set_header_nritems(l, mid);
+       ret = 0;
+       btrfs_item_key(right, &disk_key, 0);
+       wret = insert_ptr(trans, root, path, &disk_key, right->start,
+                         path->slots[1] + 1, 1);
+       if (wret)
+               ret = wret;
+
+       btrfs_mark_buffer_dirty(right);
+       btrfs_mark_buffer_dirty(l);
+       BUG_ON(path->slots[0] != slot);
+
+       ret = btrfs_update_ref(trans, root, l, right, 0, nritems);
+       BUG_ON(ret);
+
+       if (mid <= slot) {
+               btrfs_tree_unlock(path->nodes[0]);
+               free_extent_buffer(path->nodes[0]);
+               path->nodes[0] = right;
+               path->slots[0] -= mid;
+               path->slots[1] += 1;
+       } else {
+               btrfs_tree_unlock(right);
+               free_extent_buffer(right);
+       }
+
+       BUG_ON(path->slots[0] < 0);
+
+       if (double_split) {
+               BUG_ON(num_doubles != 0);
+               num_doubles++;
+               goto again;
+       }
+       return ret;
+}
+
+int btrfs_truncate_item(struct btrfs_trans_handle *trans,
+                       struct btrfs_root *root,
+                       struct btrfs_path *path,
+                       u32 new_size, int from_end)
+{
+       int ret = 0;
+       int slot;
+       int slot_orig;
+       struct extent_buffer *leaf;
+       struct btrfs_item *item;
+       u32 nritems;
+       unsigned int data_end;
+       unsigned int old_data_start;
+       unsigned int old_size;
+       unsigned int size_diff;
+       int i;
+
+       slot_orig = path->slots[0];
+       leaf = path->nodes[0];
+       slot = path->slots[0];
+
+       old_size = btrfs_item_size_nr(leaf, slot);
+       if (old_size == new_size)
+               return 0;
+
+       nritems = btrfs_header_nritems(leaf);
+       data_end = leaf_data_end(root, leaf);
+
+       old_data_start = btrfs_item_offset_nr(leaf, slot);
+
+       size_diff = old_size - new_size;
+
+       BUG_ON(slot < 0);
+       BUG_ON(slot >= nritems);
+
+       /*
+        * item0..itemN ... dataN.offset..dataN.size .. data0.size
+        */
+       /* first correct the data pointers */
+       for (i = slot; i < nritems; i++) {
+               u32 ioff;
+               item = btrfs_item_nr(leaf, i);
+
+               if (!leaf->map_token) {
+                       map_extent_buffer(leaf, (unsigned long)item,
+                                       sizeof(struct btrfs_item),
+                                       &leaf->map_token, &leaf->kaddr,
+                                       &leaf->map_start, &leaf->map_len,
+                                       KM_USER1);
+               }
+
+               ioff = btrfs_item_offset(leaf, item);
+               btrfs_set_item_offset(leaf, item, ioff + size_diff);
+       }
+
+       if (leaf->map_token) {
+               unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
+               leaf->map_token = NULL;
+       }
+
+       /* shift the data */
+       if (from_end) {
+               memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
+                             data_end + size_diff, btrfs_leaf_data(leaf) +
+                             data_end, old_data_start + new_size - data_end);
+       } else {
+               struct btrfs_disk_key disk_key;
+               u64 offset;
+
+               btrfs_item_key(leaf, &disk_key, slot);
+
+               if (btrfs_disk_key_type(&disk_key) == BTRFS_EXTENT_DATA_KEY) {
+                       unsigned long ptr;
+                       struct btrfs_file_extent_item *fi;
+
+                       fi = btrfs_item_ptr(leaf, slot,
+                                           struct btrfs_file_extent_item);
+                       fi = (struct btrfs_file_extent_item *)(
+                            (unsigned long)fi - size_diff);
+
+                       if (btrfs_file_extent_type(leaf, fi) ==
+                           BTRFS_FILE_EXTENT_INLINE) {
+                               ptr = btrfs_item_ptr_offset(leaf, slot);
+                               memmove_extent_buffer(leaf, ptr,
+                                       (unsigned long)fi,
+                                       offsetof(struct btrfs_file_extent_item,
+                                                disk_bytenr));
+                       }
+               }
+
+               memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
+                             data_end + size_diff, btrfs_leaf_data(leaf) +
+                             data_end, old_data_start - data_end);
+
+               offset = btrfs_disk_key_offset(&disk_key);
+               btrfs_set_disk_key_offset(&disk_key, offset + size_diff);
+               btrfs_set_item_key(leaf, &disk_key, slot);
+               if (slot == 0)
+                       fixup_low_keys(trans, root, path, &disk_key, 1);
+       }
+
+       item = btrfs_item_nr(leaf, slot);
+       btrfs_set_item_size(leaf, item, new_size);
+       btrfs_mark_buffer_dirty(leaf);
+
+       ret = 0;
+       if (btrfs_leaf_free_space(root, leaf) < 0) {
+               btrfs_print_leaf(root, leaf);
+               BUG();
+       }
+       return ret;
+}
+
+int btrfs_extend_item(struct btrfs_trans_handle *trans,
+                     struct btrfs_root *root, struct btrfs_path *path,
+                     u32 data_size)
+{
+       int ret = 0;
+       int slot;
+       int slot_orig;
+       struct extent_buffer *leaf;
+       struct btrfs_item *item;
+       u32 nritems;
+       unsigned int data_end;
+       unsigned int old_data;
+       unsigned int old_size;
+       int i;
+
+       slot_orig = path->slots[0];
+       leaf = path->nodes[0];
+
+       nritems = btrfs_header_nritems(leaf);
+       data_end = leaf_data_end(root, leaf);
+
+       if (btrfs_leaf_free_space(root, leaf) < data_size) {
+               btrfs_print_leaf(root, leaf);
+               BUG();
+       }
+       slot = path->slots[0];
+       old_data = btrfs_item_end_nr(leaf, slot);
+
+       BUG_ON(slot < 0);
+       if (slot >= nritems) {
+               btrfs_print_leaf(root, leaf);
+               printk("slot %d too large, nritems %d\n", slot, nritems);
+               BUG_ON(1);
+       }
+
+       /*
+        * item0..itemN ... dataN.offset..dataN.size .. data0.size
+        */
+       /* first correct the data pointers */
+       for (i = slot; i < nritems; i++) {
+               u32 ioff;
+               item = btrfs_item_nr(leaf, i);
+
+               if (!leaf->map_token) {
+                       map_extent_buffer(leaf, (unsigned long)item,
+                                       sizeof(struct btrfs_item),
+                                       &leaf->map_token, &leaf->kaddr,
+                                       &leaf->map_start, &leaf->map_len,
+                                       KM_USER1);
+               }
+               ioff = btrfs_item_offset(leaf, item);
+               btrfs_set_item_offset(leaf, item, ioff - data_size);
+       }
+
+       if (leaf->map_token) {
+               unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
+               leaf->map_token = NULL;
+       }
+
+       /* shift the data */
+       memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
+                     data_end - data_size, btrfs_leaf_data(leaf) +
+                     data_end, old_data - data_end);
+
+       data_end = old_data;
+       old_size = btrfs_item_size_nr(leaf, slot);
+       item = btrfs_item_nr(leaf, slot);
+       btrfs_set_item_size(leaf, item, old_size + data_size);
+       btrfs_mark_buffer_dirty(leaf);
+
+       ret = 0;
+       if (btrfs_leaf_free_space(root, leaf) < 0) {
+               btrfs_print_leaf(root, leaf);
+               BUG();
+       }
+       return ret;
+}
+
+/*
+ * Given a key and some data, insert an item into the tree.
+ * This does all the path init required, making room in the tree if needed.
+ */
+int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
+                           struct btrfs_root *root,
+                           struct btrfs_path *path,
+                           struct btrfs_key *cpu_key, u32 *data_size,
+                           int nr)
+{
+       struct extent_buffer *leaf;
+       struct btrfs_item *item;
+       int ret = 0;
+       int slot;
+       int slot_orig;
+       int i;
+       u32 nritems;
+       u32 total_size = 0;
+       u32 total_data = 0;
+       unsigned int data_end;
+       struct btrfs_disk_key disk_key;
+
+       for (i = 0; i < nr; i++) {
+               total_data += data_size[i];
+       }
+
+       total_size = total_data + (nr * sizeof(struct btrfs_item));
+       ret = btrfs_search_slot(trans, root, cpu_key, path, total_size, 1);
+       if (ret == 0)
+               return -EEXIST;
+       if (ret < 0)
+               goto out;
+
+       slot_orig = path->slots[0];
+       leaf = path->nodes[0];
+
+       nritems = btrfs_header_nritems(leaf);
+       data_end = leaf_data_end(root, leaf);
+
+       if (btrfs_leaf_free_space(root, leaf) < total_size) {
+               btrfs_print_leaf(root, leaf);
+               printk("not enough freespace need %u have %d\n",
+                      total_size, btrfs_leaf_free_space(root, leaf));
+               BUG();
+       }
+
+       slot = path->slots[0];
+       BUG_ON(slot < 0);
+
+       if (slot != nritems) {
+               unsigned int old_data = btrfs_item_end_nr(leaf, slot);
+
+               if (old_data < data_end) {
+                       btrfs_print_leaf(root, leaf);
+                       printk("slot %d old_data %d data_end %d\n",
+                              slot, old_data, data_end);
+                       BUG_ON(1);
+               }
+               /*
+                * item0..itemN ... dataN.offset..dataN.size .. data0.size
+                */
+               /* first correct the data pointers */
+               WARN_ON(leaf->map_token);
+               for (i = slot; i < nritems; i++) {
+                       u32 ioff;
+
+                       item = btrfs_item_nr(leaf, i);
+                       if (!leaf->map_token) {
+                               map_extent_buffer(leaf, (unsigned long)item,
+                                       sizeof(struct btrfs_item),
+                                       &leaf->map_token, &leaf->kaddr,
+                                       &leaf->map_start, &leaf->map_len,
+                                       KM_USER1);
+                       }
+
+                       ioff = btrfs_item_offset(leaf, item);
+                       btrfs_set_item_offset(leaf, item, ioff - total_data);
+               }
+               if (leaf->map_token) {
+                       unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
+                       leaf->map_token = NULL;
+               }
+
+               /* shift the items */
+               memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr),
+                             btrfs_item_nr_offset(slot),
+                             (nritems - slot) * sizeof(struct btrfs_item));
+
+               /* shift the data */
+               memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
+                             data_end - total_data, btrfs_leaf_data(leaf) +
+                             data_end, old_data - data_end);
+               data_end = old_data;
+       }
+
+       /* setup the item for the new data */
+       for (i = 0; i < nr; i++) {
+               btrfs_cpu_key_to_disk(&disk_key, cpu_key + i);
+               btrfs_set_item_key(leaf, &disk_key, slot + i);
+               item = btrfs_item_nr(leaf, slot + i);
+               btrfs_set_item_offset(leaf, item, data_end - data_size[i]);
+               data_end -= data_size[i];
+               btrfs_set_item_size(leaf, item, data_size[i]);
+       }
+       btrfs_set_header_nritems(leaf, nritems + nr);
+       btrfs_mark_buffer_dirty(leaf);
+
+       ret = 0;
+       if (slot == 0) {
+               btrfs_cpu_key_to_disk(&disk_key, cpu_key);
+               ret = fixup_low_keys(trans, root, path, &disk_key, 1);
+       }
+
+       if (btrfs_leaf_free_space(root, leaf) < 0) {
+               btrfs_print_leaf(root, leaf);
+               BUG();
+       }
+out:
+       return ret;
+}
+
+/*
+ * Given a key and some data, insert an item into the tree.
+ * This does all the path init required, making room in the tree if needed.
+ */
+int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root
+                     *root, struct btrfs_key *cpu_key, void *data, u32
+                     data_size)
+{
+       int ret = 0;
+       struct btrfs_path *path;
+       struct extent_buffer *leaf;
+       unsigned long ptr;
+
+       path = btrfs_alloc_path();
+       BUG_ON(!path);
+       ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size);
+       if (!ret) {
+               leaf = path->nodes[0];
+               ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
+               write_extent_buffer(leaf, data, ptr, data_size);
+               btrfs_mark_buffer_dirty(leaf);
+       }
+       btrfs_free_path(path);
+       return ret;
+}
+
+/*
+ * delete the pointer from a given node.
+ *
+ * If the delete empties a node, the node is removed from the tree,
+ * continuing all the way the root if required.  The root is converted into
+ * a leaf if all the nodes are emptied.
+ */
+static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
+                  struct btrfs_path *path, int level, int slot)
+{
+       struct extent_buffer *parent = path->nodes[level];
+       u32 nritems;
+       int ret = 0;
+       int wret;
+
+       nritems = btrfs_header_nritems(parent);
+       if (slot != nritems -1) {
+               memmove_extent_buffer(parent,
+                             btrfs_node_key_ptr_offset(slot),
+                             btrfs_node_key_ptr_offset(slot + 1),
+                             sizeof(struct btrfs_key_ptr) *
+                             (nritems - slot - 1));
+       }
+       nritems--;
+       btrfs_set_header_nritems(parent, nritems);
+       if (nritems == 0 && parent == root->node) {
+               BUG_ON(btrfs_header_level(root->node) != 1);
+               /* just turn the root into a leaf and break */
+               btrfs_set_header_level(root->node, 0);
+       } else if (slot == 0) {
+               struct btrfs_disk_key disk_key;
+
+               btrfs_node_key(parent, &disk_key, 0);
+               wret = fixup_low_keys(trans, root, path, &disk_key, level + 1);
+               if (wret)
+                       ret = wret;
+       }
+       btrfs_mark_buffer_dirty(parent);
+       return ret;
+}
+
+/*
+ * delete the item at the leaf level in path.  If that empties
+ * the leaf, remove it from the tree
+ */
+int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
+                   struct btrfs_path *path, int slot, int nr)
+{
+       struct extent_buffer *leaf;
+       struct btrfs_item *item;
+       int last_off;
+       int dsize = 0;
+       int ret = 0;
+       int wret;
+       int i;
+       u32 nritems;
+
+       leaf = path->nodes[0];
+       last_off = btrfs_item_offset_nr(leaf, slot + nr - 1);
+
+       for (i = 0; i < nr; i++)
+               dsize += btrfs_item_size_nr(leaf, slot + i);
+
+       nritems = btrfs_header_nritems(leaf);
+
+       if (slot + nr != nritems) {
+               int data_end = leaf_data_end(root, leaf);
+
+               memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
+                             data_end + dsize,
+                             btrfs_leaf_data(leaf) + data_end,
+                             last_off - data_end);
+
+               for (i = slot + nr; i < nritems; i++) {
+                       u32 ioff;
+
+                       item = btrfs_item_nr(leaf, i);
+                       if (!leaf->map_token) {
+                               map_extent_buffer(leaf, (unsigned long)item,
+                                       sizeof(struct btrfs_item),
+                                       &leaf->map_token, &leaf->kaddr,
+                                       &leaf->map_start, &leaf->map_len,
+                                       KM_USER1);
+                       }
+                       ioff = btrfs_item_offset(leaf, item);
+                       btrfs_set_item_offset(leaf, item, ioff + dsize);
+               }
+
+               if (leaf->map_token) {
+                       unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
+                       leaf->map_token = NULL;
+               }
+
+               memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot),
+                             btrfs_item_nr_offset(slot + nr),
+                             sizeof(struct btrfs_item) *
+                             (nritems - slot - nr));
+       }
+       btrfs_set_header_nritems(leaf, nritems - nr);
+       nritems -= nr;
+
+       /* delete the leaf if we've emptied it */
+       if (nritems == 0) {
+               if (leaf == root->node) {
+                       btrfs_set_header_level(leaf, 0);
+               } else {
+                       u64 root_gen = btrfs_header_generation(path->nodes[1]);
+                       wret = del_ptr(trans, root, path, 1, path->slots[1]);
+                       if (wret)
+                               ret = wret;
+                       wret = btrfs_free_extent(trans, root,
+                                        leaf->start, leaf->len,
+                                        path->nodes[1]->start,
+                                        btrfs_header_owner(path->nodes[1]),
+                                        root_gen, 0, 0, 1);
+                       if (wret)
+                               ret = wret;
+               }
+       } else {
+               int used = leaf_space_used(leaf, 0, nritems);
+               if (slot == 0) {
+                       struct btrfs_disk_key disk_key;
+
+                       btrfs_item_key(leaf, &disk_key, 0);
+                       wret = fixup_low_keys(trans, root, path,
+                                             &disk_key, 1);
+                       if (wret)
+                               ret = wret;
+               }
+
+               /* delete the leaf if it is mostly empty */
+               if (used < BTRFS_LEAF_DATA_SIZE(root) / 4) {
+                       /* push_leaf_left fixes the path.
+                        * make sure the path still points to our leaf
+                        * for possible call to del_ptr below
+                        */
+                       slot = path->slots[1];
+                       extent_buffer_get(leaf);
+
+                       wret = push_leaf_left(trans, root, path, 1, 1);
+                       if (wret < 0 && wret != -ENOSPC)
+                               ret = wret;
+
+                       if (path->nodes[0] == leaf &&
+                           btrfs_header_nritems(leaf)) {
+                               wret = push_leaf_right(trans, root, path, 1, 1);
+                               if (wret < 0 && wret != -ENOSPC)
+                                       ret = wret;
+                       }
+
+                       if (btrfs_header_nritems(leaf) == 0) {
+                               u64 root_gen;
+                               u64 bytenr = leaf->start;
+                               u32 blocksize = leaf->len;
+
+                               root_gen = btrfs_header_generation(
+                                                          path->nodes[1]);
+
+                               wret = del_ptr(trans, root, path, 1, slot);
+                               if (wret)
+                                       ret = wret;
+
+                               free_extent_buffer(leaf);
+                               wret = btrfs_free_extent(trans, root, bytenr,
+                                            blocksize, path->nodes[1]->start,
+                                            btrfs_header_owner(path->nodes[1]),
+                                            root_gen, 0, 0, 1);
+                               if (wret)
+                                       ret = wret;
+                       } else {
+                               /* if we're still in the path, make sure
+                                * we're dirty.  Otherwise, one of the
+                                * push_leaf functions must have already
+                                * dirtied this buffer
+                                */
+                               if (path->nodes[0] == leaf)
+                                       btrfs_mark_buffer_dirty(leaf);
+                               free_extent_buffer(leaf);
+                       }
+               } else {
+                       btrfs_mark_buffer_dirty(leaf);
+               }
+       }
+       return ret;
+}
+
+/*
+ * search the tree again to find a leaf with lesser keys
+ * returns 0 if it found something or 1 if there are no lesser leaves.
+ * returns < 0 on io errors.
+ */
+int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
+{
+       struct btrfs_key key;
+       struct btrfs_disk_key found_key;
+       int ret;
+
+       btrfs_item_key_to_cpu(path->nodes[0], &key, 0);
+
+       if (key.offset > 0)
+               key.offset--;
+       else if (key.type > 0)
+               key.type--;
+       else if (key.objectid > 0)
+               key.objectid--;
+       else
+               return 1;
+
+       btrfs_release_path(root, path);
+       ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+       if (ret < 0)
+               return ret;
+       btrfs_item_key(path->nodes[0], &found_key, 0);
+       ret = comp_keys(&found_key, &key);
+       if (ret < 0)
+               return 0;
+       return 1;
+}
+
+/*
+ * A helper function to walk down the tree starting at min_key, and looking
+ * for nodes or leaves that are either in cache or have a minimum
+ * transaction id.  This is used by the btree defrag code, but could
+ * also be used to search for blocks that have changed since a given
+ * transaction id.
+ *
+ * This does not cow, but it does stuff the starting key it finds back
+ * into min_key, so you can call btrfs_search_slot with cow=1 on the
+ * key and get a writable path.
+ *
+ * This does lock as it descends, and path->keep_locks should be set
+ * to 1 by the caller.
+ *
+ * This honors path->lowest_level to prevent descent past a given level
+ * of the tree.
+ *
+ * returns zero if something useful was found, < 0 on error and 1 if there
+ * was nothing in the tree that matched the search criteria.
+ */
+int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key,
+                        struct btrfs_key *max_key,
+                        struct btrfs_path *path, int cache_only,
+                        u64 min_trans)
+{
+       struct extent_buffer *cur;
+       struct btrfs_key found_key;
+       int slot;
+       int sret;
+       u32 nritems;
+       int level;
+       int ret = 1;
+
+again:
+       cur = btrfs_lock_root_node(root);
+       level = btrfs_header_level(cur);
+       WARN_ON(path->nodes[level]);
+       path->nodes[level] = cur;
+       path->locks[level] = 1;
+
+       if (btrfs_header_generation(cur) < min_trans) {
+               ret = 1;
+               goto out;
+       }
+       while(1) {
+               nritems = btrfs_header_nritems(cur);
+               level = btrfs_header_level(cur);
+               sret = bin_search(cur, min_key, level, &slot);
+
+               /* at level = 0, we're done, setup the path and exit */
+               if (level == 0) {
+                       if (slot >= nritems)
+                               goto find_next_key;
+                       ret = 0;
+                       path->slots[level] = slot;
+                       btrfs_item_key_to_cpu(cur, &found_key, slot);
+                       goto out;
+               }
+               if (sret && slot > 0)
+                       slot--;
+               /*
+                * check this node pointer against the cache_only and
+                * min_trans parameters.  If it isn't in cache or is too
+                * old, skip to the next one.
+                */
+               while(slot < nritems) {
+                       u64 blockptr;
+                       u64 gen;
+                       struct extent_buffer *tmp;
+                       struct btrfs_disk_key disk_key;
+
+                       blockptr = btrfs_node_blockptr(cur, slot);
+                       gen = btrfs_node_ptr_generation(cur, slot);
+                       if (gen < min_trans) {
+                               slot++;
+                               continue;
+                       }
+                       if (!cache_only)
+                               break;
+
+                       if (max_key) {
+                               btrfs_node_key(cur, &disk_key, slot);
+                               if (comp_keys(&disk_key, max_key) >= 0) {
+                                       ret = 1;
+                                       goto out;
+                               }
+                       }
+
+                       tmp = btrfs_find_tree_block(root, blockptr,
+                                           btrfs_level_size(root, level - 1));
+
+                       if (tmp && btrfs_buffer_uptodate(tmp, gen)) {
+                               free_extent_buffer(tmp);
+                               break;
+                       }
+                       if (tmp)
+                               free_extent_buffer(tmp);
+                       slot++;
+               }
+find_next_key:
+               /*
+                * we didn't find a candidate key in this node, walk forward
+                * and find another one
+                */
+               if (slot >= nritems) {
+                       path->slots[level] = slot;
+                       sret = btrfs_find_next_key(root, path, min_key, level,
+                                                 cache_only, min_trans);
+                       if (sret == 0) {
+                               btrfs_release_path(root, path);
+                               goto again;
+                       } else {
+                               goto out;
+                       }
+               }
+               /* save our key for returning back */
+               btrfs_node_key_to_cpu(cur, &found_key, slot);
+               path->slots[level] = slot;
+               if (level == path->lowest_level) {
+                       ret = 0;
+                       unlock_up(path, level, 1);
+                       goto out;
+               }
+               cur = read_node_slot(root, cur, slot);
+
+               btrfs_tree_lock(cur);
+               path->locks[level - 1] = 1;
+               path->nodes[level - 1] = cur;
+               unlock_up(path, level, 1);
+       }
+out:
+       if (ret == 0)
+               memcpy(min_key, &found_key, sizeof(found_key));
+       return ret;
+}
+
+/*
+ * this is similar to btrfs_next_leaf, but does not try to preserve
+ * and fixup the path.  It looks for and returns the next key in the
+ * tree based on the current path and the cache_only and min_trans
+ * parameters.
+ *
+ * 0 is returned if another key is found, < 0 if there are any errors
+ * and 1 is returned if there are no higher keys in the tree
+ *
+ * path->keep_locks should be set to 1 on the search made before
+ * calling this function.
+ */
+int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path,
+                       struct btrfs_key *key, int lowest_level,
+                       int cache_only, u64 min_trans)
+{
+       int level = lowest_level;
+       int slot;
+       struct extent_buffer *c;
+
+       while(level < BTRFS_MAX_LEVEL) {
+               if (!path->nodes[level])
+                       return 1;
+
+               slot = path->slots[level] + 1;
+               c = path->nodes[level];
+next:
+               if (slot >= btrfs_header_nritems(c)) {
+                       level++;
+                       if (level == BTRFS_MAX_LEVEL) {
+                               return 1;
+                       }
+                       continue;
+               }
+               if (level == 0)
+                       btrfs_item_key_to_cpu(c, key, slot);
+               else {
+                       u64 blockptr = btrfs_node_blockptr(c, slot);
+                       u64 gen = btrfs_node_ptr_generation(c, slot);
+
+                       if (cache_only) {
+                               struct extent_buffer *cur;
+                               cur = btrfs_find_tree_block(root, blockptr,
+                                           btrfs_level_size(root, level - 1));
+                               if (!cur || !btrfs_buffer_uptodate(cur, gen)) {
+                                       slot++;
+                                       if (cur)
+                                               free_extent_buffer(cur);
+                                       goto next;
+                               }
+                               free_extent_buffer(cur);
+                       }
+                       if (gen < min_trans) {
+                               slot++;
+                               goto next;
+                       }
+                       btrfs_node_key_to_cpu(c, key, slot);
+               }
+               return 0;
+       }
+       return 1;
+}
+
+/*
+ * search the tree again to find a leaf with greater keys
+ * returns 0 if it found something or 1 if there are no greater leaves.
+ * returns < 0 on io errors.
+ */
+int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
+{
+       int slot;
+       int level = 1;
+       struct extent_buffer *c;
+       struct extent_buffer *next = NULL;
+       struct btrfs_key key;
+       u32 nritems;
+       int ret;
+
+       nritems = btrfs_header_nritems(path->nodes[0]);
+       if (nritems == 0) {
+               return 1;
+       }
+
+       btrfs_item_key_to_cpu(path->nodes[0], &key, nritems - 1);
+
+       btrfs_release_path(root, path);
+       path->keep_locks = 1;
+       ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+       path->keep_locks = 0;
+
+       if (ret < 0)
+               return ret;
+
+       nritems = btrfs_header_nritems(path->nodes[0]);
+       /*
+        * by releasing the path above we dropped all our locks.  A balance
+        * could have added more items next to the key that used to be
+        * at the very end of the block.  So, check again here and
+        * advance the path if there are now more items available.
+        */
+       if (nritems > 0 && path->slots[0] < nritems - 1) {
+               path->slots[0]++;
+               goto done;
+       }
+
+       while(level < BTRFS_MAX_LEVEL) {
+               if (!path->nodes[level])
+                       return 1;
+
+               slot = path->slots[level] + 1;
+               c = path->nodes[level];
+               if (slot >= btrfs_header_nritems(c)) {
+                       level++;
+                       if (level == BTRFS_MAX_LEVEL) {
+                               return 1;
+                       }
+                       continue;
+               }
+
+               if (next) {
+                       btrfs_tree_unlock(next);
+                       free_extent_buffer(next);
+               }
+
+               if (level == 1 && (path->locks[1] || path->skip_locking) &&
+                   path->reada)
+                       reada_for_search(root, path, level, slot, 0);
+
+               next = read_node_slot(root, c, slot);
+               if (!path->skip_locking) {
+                       WARN_ON(!btrfs_tree_locked(c));
+                       btrfs_tree_lock(next);
+               }
+               break;
+       }
+       path->slots[level] = slot;
+       while(1) {
+               level--;
+               c = path->nodes[level];
+               if (path->locks[level])
+                       btrfs_tree_unlock(c);
+               free_extent_buffer(c);
+               path->nodes[level] = next;
+               path->slots[level] = 0;
+               if (!path->skip_locking)
+                       path->locks[level] = 1;
+               if (!level)
+                       break;
+               if (level == 1 && path->locks[1] && path->reada)
+                       reada_for_search(root, path, level, slot, 0);
+               next = read_node_slot(root, next, 0);
+               if (!path->skip_locking) {
+                       WARN_ON(!btrfs_tree_locked(path->nodes[level]));
+                       btrfs_tree_lock(next);
+               }
+       }
+done:
+       unlock_up(path, 0, 1);
+       return 0;
+}
+
+/*
+ * this uses btrfs_prev_leaf to walk backwards in the tree, and keeps
+ * searching until it gets past min_objectid or finds an item of 'type'
+ *
+ * returns 0 if something is found, 1 if nothing was found and < 0 on error
+ */
+int btrfs_previous_item(struct btrfs_root *root,
+                       struct btrfs_path *path, u64 min_objectid,
+                       int type)
+{
+       struct btrfs_key found_key;
+       struct extent_buffer *leaf;
+       u32 nritems;
+       int ret;
+
+       while(1) {
+               if (path->slots[0] == 0) {
+                       ret = btrfs_prev_leaf(root, path);
+                       if (ret != 0)
+                               return ret;
+               } else {
+                       path->slots[0]--;
+               }
+               leaf = path->nodes[0];
+               nritems = btrfs_header_nritems(leaf);
+               if (nritems == 0)
+                       return 1;
+               if (path->slots[0] == nritems)
+                       path->slots[0]--;
+
+               btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
+               if (found_key.type == type)
+                       return 0;
+               if (found_key.objectid < min_objectid)
+                       break;
+               if (found_key.objectid == min_objectid &&
+                   found_key.type < type)
+                       break;
+       }
+       return 1;
+}
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
new file mode 100644 (file)
index 0000000..138c157
--- /dev/null
@@ -0,0 +1,1875 @@
+/*
+ * Copyright (C) 2007 Oracle.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#ifndef __BTRFS_CTREE__
+#define __BTRFS_CTREE__
+
+#include <linux/version.h>
+#include <linux/mm.h>
+#include <linux/highmem.h>
+#include <linux/fs.h>
+#include <linux/completion.h>
+#include <linux/backing-dev.h>
+#include <linux/wait.h>
+#include <asm/kmap_types.h>
+#include "bit-radix.h"
+#include "extent_io.h"
+#include "extent_map.h"
+#include "async-thread.h"
+
+struct btrfs_trans_handle;
+struct btrfs_transaction;
+extern struct kmem_cache *btrfs_trans_handle_cachep;
+extern struct kmem_cache *btrfs_transaction_cachep;
+extern struct kmem_cache *btrfs_bit_radix_cachep;
+extern struct kmem_cache *btrfs_path_cachep;
+struct btrfs_ordered_sum;
+
+#define BTRFS_MAGIC "_B9RfS_M"
+
+#define BTRFS_ACL_NOT_CACHED    ((void *)-1)
+
+#ifdef CONFIG_LOCKDEP
+# define BTRFS_MAX_LEVEL 7
+#else
+# define BTRFS_MAX_LEVEL 8
+#endif
+
+/* holds pointers to all of the tree roots */
+#define BTRFS_ROOT_TREE_OBJECTID 1ULL
+
+/* stores information about which extents are in use, and reference counts */
+#define BTRFS_EXTENT_TREE_OBJECTID 2ULL
+
+/*
+ * chunk tree stores translations from logical -> physical block numbering
+ * the super block points to the chunk tree
+ */
+#define BTRFS_CHUNK_TREE_OBJECTID 3ULL
+
+/*
+ * stores information about which areas of a given device are in use.
+ * one per device.  The tree of tree roots points to the device tree
+ */
+#define BTRFS_DEV_TREE_OBJECTID 4ULL
+
+/* one per subvolume, storing files and directories */
+#define BTRFS_FS_TREE_OBJECTID 5ULL
+
+/* directory objectid inside the root tree */
+#define BTRFS_ROOT_TREE_DIR_OBJECTID 6ULL
+
+/* orhpan objectid for tracking unlinked/truncated files */
+#define BTRFS_ORPHAN_OBJECTID -5ULL
+
+/* does write ahead logging to speed up fsyncs */
+#define BTRFS_TREE_LOG_OBJECTID -6ULL
+#define BTRFS_TREE_LOG_FIXUP_OBJECTID -7ULL
+
+/* dummy objectid represents multiple objectids */
+#define BTRFS_MULTIPLE_OBJECTIDS -255ULL
+
+/*
+ * All files have objectids in this range.
+ */
+#define BTRFS_FIRST_FREE_OBJECTID 256ULL
+#define BTRFS_LAST_FREE_OBJECTID -256ULL
+#define BTRFS_FIRST_CHUNK_TREE_OBJECTID 256ULL
+
+
+/*
+ * the device items go into the chunk tree.  The key is in the form
+ * [ 1 BTRFS_DEV_ITEM_KEY device_id ]
+ */
+#define BTRFS_DEV_ITEMS_OBJECTID 1ULL
+
+/*
+ * we can actually store much bigger names, but lets not confuse the rest
+ * of linux
+ */
+#define BTRFS_NAME_LEN 255
+
+/* 32 bytes in various csum fields */
+#define BTRFS_CSUM_SIZE 32
+/* four bytes for CRC32 */
+#define BTRFS_CRC32_SIZE 4
+#define BTRFS_EMPTY_DIR_SIZE 0
+
+#define BTRFS_FT_UNKNOWN       0
+#define BTRFS_FT_REG_FILE      1
+#define BTRFS_FT_DIR           2
+#define BTRFS_FT_CHRDEV                3
+#define BTRFS_FT_BLKDEV                4
+#define BTRFS_FT_FIFO          5
+#define BTRFS_FT_SOCK          6
+#define BTRFS_FT_SYMLINK       7
+#define BTRFS_FT_XATTR         8
+#define BTRFS_FT_MAX           9
+
+/*
+ * the key defines the order in the tree, and so it also defines (optimal)
+ * block layout.  objectid corresonds to the inode number.  The flags
+ * tells us things about the object, and is a kind of stream selector.
+ * so for a given inode, keys with flags of 1 might refer to the inode
+ * data, flags of 2 may point to file data in the btree and flags == 3
+ * may point to extents.
+ *
+ * offset is the starting byte offset for this key in the stream.
+ *
+ * btrfs_disk_key is in disk byte order.  struct btrfs_key is always
+ * in cpu native order.  Otherwise they are identical and their sizes
+ * should be the same (ie both packed)
+ */
+struct btrfs_disk_key {
+       __le64 objectid;
+       u8 type;
+       __le64 offset;
+} __attribute__ ((__packed__));
+
+struct btrfs_key {
+       u64 objectid;
+       u8 type;
+       u64 offset;
+} __attribute__ ((__packed__));
+
+struct btrfs_mapping_tree {
+       struct extent_map_tree map_tree;
+};
+
+#define BTRFS_UUID_SIZE 16
+struct btrfs_dev_item {
+       /* the internal btrfs device id */
+       __le64 devid;
+
+       /* size of the device */
+       __le64 total_bytes;
+
+       /* bytes used */
+       __le64 bytes_used;
+
+       /* optimal io alignment for this device */
+       __le32 io_align;
+
+       /* optimal io width for this device */
+       __le32 io_width;
+
+       /* minimal io size for this device */
+       __le32 sector_size;
+
+       /* type and info about this device */
+       __le64 type;
+
+       /* grouping information for allocation decisions */
+       __le32 dev_group;
+
+       /* seek speed 0-100 where 100 is fastest */
+       u8 seek_speed;
+
+       /* bandwidth 0-100 where 100 is fastest */
+       u8 bandwidth;
+
+       /* btrfs generated uuid for this device */
+       u8 uuid[BTRFS_UUID_SIZE];
+} __attribute__ ((__packed__));
+
+struct btrfs_stripe {
+       __le64 devid;
+       __le64 offset;
+       u8 dev_uuid[BTRFS_UUID_SIZE];
+} __attribute__ ((__packed__));
+
+struct btrfs_chunk {
+       /* size of this chunk in bytes */
+       __le64 length;
+
+       /* objectid of the root referencing this chunk */
+       __le64 owner;
+
+       __le64 stripe_len;
+       __le64 type;
+
+       /* optimal io alignment for this chunk */
+       __le32 io_align;
+
+       /* optimal io width for this chunk */
+       __le32 io_width;
+
+       /* minimal io size for this chunk */
+       __le32 sector_size;
+
+       /* 2^16 stripes is quite a lot, a second limit is the size of a single
+        * item in the btree
+        */
+       __le16 num_stripes;
+
+       /* sub stripes only matter for raid10 */
+       __le16 sub_stripes;
+       struct btrfs_stripe stripe;
+       /* additional stripes go here */
+} __attribute__ ((__packed__));
+
+static inline unsigned long btrfs_chunk_item_size(int num_stripes)
+{
+       BUG_ON(num_stripes == 0);
+       return sizeof(struct btrfs_chunk) +
+               sizeof(struct btrfs_stripe) * (num_stripes - 1);
+}
+
+#define BTRFS_FSID_SIZE 16
+#define BTRFS_HEADER_FLAG_WRITTEN (1 << 0)
+
+/*
+ * every tree block (leaf or node) starts with this header.
+ */
+struct btrfs_header {
+       /* these first four must match the super block */
+       u8 csum[BTRFS_CSUM_SIZE];
+       u8 fsid[BTRFS_FSID_SIZE]; /* FS specific uuid */
+       __le64 bytenr; /* which block this node is supposed to live in */
+       __le64 flags;
+
+       /* allowed to be different from the super from here on down */
+       u8 chunk_tree_uuid[BTRFS_UUID_SIZE];
+       __le64 generation;
+       __le64 owner;
+       __le32 nritems;
+       u8 level;
+} __attribute__ ((__packed__));
+
+#define BTRFS_NODEPTRS_PER_BLOCK(r) (((r)->nodesize - \
+                               sizeof(struct btrfs_header)) / \
+                               sizeof(struct btrfs_key_ptr))
+#define __BTRFS_LEAF_DATA_SIZE(bs) ((bs) - sizeof(struct btrfs_header))
+#define BTRFS_LEAF_DATA_SIZE(r) (__BTRFS_LEAF_DATA_SIZE(r->leafsize))
+#define BTRFS_MAX_INLINE_DATA_SIZE(r) (BTRFS_LEAF_DATA_SIZE(r) - \
+                                       sizeof(struct btrfs_item) - \
+                                       sizeof(struct btrfs_file_extent_item))
+
+
+/*
+ * this is a very generous portion of the super block, giving us
+ * room to translate 14 chunks with 3 stripes each.
+ */
+#define BTRFS_SYSTEM_CHUNK_ARRAY_SIZE 2048
+#define BTRFS_LABEL_SIZE 256
+
+/*
+ * the super block basically lists the main trees of the FS
+ * it currently lacks any block count etc etc
+ */
+struct btrfs_super_block {
+       u8 csum[BTRFS_CSUM_SIZE];
+       /* the first 4 fields must match struct btrfs_header */
+       u8 fsid[16];    /* FS specific uuid */
+       __le64 bytenr; /* this block number */
+       __le64 flags;
+
+       /* allowed to be different from the btrfs_header from here own down */
+       __le64 magic;
+       __le64 generation;
+       __le64 root;
+       __le64 chunk_root;
+       __le64 log_root;
+       __le64 total_bytes;
+       __le64 bytes_used;
+       __le64 root_dir_objectid;
+       __le64 num_devices;
+       __le32 sectorsize;
+       __le32 nodesize;
+       __le32 leafsize;
+       __le32 stripesize;
+       __le32 sys_chunk_array_size;
+       u8 root_level;
+       u8 chunk_root_level;
+       u8 log_root_level;
+       struct btrfs_dev_item dev_item;
+       char label[BTRFS_LABEL_SIZE];
+       u8 sys_chunk_array[BTRFS_SYSTEM_CHUNK_ARRAY_SIZE];
+} __attribute__ ((__packed__));
+
+/*
+ * A leaf is full of items. offset and size tell us where to find
+ * the item in the leaf (relative to the start of the data area)
+ */
+struct btrfs_item {
+       struct btrfs_disk_key key;
+       __le32 offset;
+       __le32 size;
+} __attribute__ ((__packed__));
+
+/*
+ * leaves have an item area and a data area:
+ * [item0, item1....itemN] [free space] [dataN...data1, data0]
+ *
+ * The data is separate from the items to get the keys closer together
+ * during searches.
+ */
+struct btrfs_leaf {
+       struct btrfs_header header;
+       struct btrfs_item items[];
+} __attribute__ ((__packed__));
+
+/*
+ * all non-leaf blocks are nodes, they hold only keys and pointers to
+ * other blocks
+ */
+struct btrfs_key_ptr {
+       struct btrfs_disk_key key;
+       __le64 blockptr;
+       __le64 generation;
+} __attribute__ ((__packed__));
+
+struct btrfs_node {
+       struct btrfs_header header;
+       struct btrfs_key_ptr ptrs[];
+} __attribute__ ((__packed__));
+
+/*
+ * btrfs_paths remember the path taken from the root down to the leaf.
+ * level 0 is always the leaf, and nodes[1...BTRFS_MAX_LEVEL] will point
+ * to any other levels that are present.
+ *
+ * The slots array records the index of the item or block pointer
+ * used while walking the tree.
+ */
+struct btrfs_path {
+       struct extent_buffer *nodes[BTRFS_MAX_LEVEL];
+       int slots[BTRFS_MAX_LEVEL];
+       /* if there is real range locking, this locks field will change */
+       int locks[BTRFS_MAX_LEVEL];
+       int reada;
+       /* keep some upper locks as we walk down */
+       int keep_locks;
+       int skip_locking;
+       int lowest_level;
+};
+
+/*
+ * items in the extent btree are used to record the objectid of the
+ * owner of the block and the number of references
+ */
+struct btrfs_extent_item {
+       __le32 refs;
+} __attribute__ ((__packed__));
+
+struct btrfs_extent_ref {
+       __le64 root;
+       __le64 generation;
+       __le64 objectid;
+       __le64 offset;
+       __le32 num_refs;
+} __attribute__ ((__packed__));
+
+/* dev extents record free space on individual devices.  The owner
+ * field points back to the chunk allocation mapping tree that allocated
+ * the extent.  The chunk tree uuid field is a way to double check the owner
+ */
+struct btrfs_dev_extent {
+       __le64 chunk_tree;
+       __le64 chunk_objectid;
+       __le64 chunk_offset;
+       __le64 length;
+       u8 chunk_tree_uuid[BTRFS_UUID_SIZE];
+} __attribute__ ((__packed__));
+
+struct btrfs_inode_ref {
+       __le64 index;
+       __le16 name_len;
+       /* name goes here */
+} __attribute__ ((__packed__));
+
+struct btrfs_timespec {
+       __le64 sec;
+       __le32 nsec;
+} __attribute__ ((__packed__));
+
+/*
+ * there is no padding here on purpose.  If you want to extent the inode,
+ * make a new item type
+ */
+struct btrfs_inode_item {
+       /* nfs style generation number */
+       __le64 generation;
+       /* transid that last touched this inode */
+       __le64 transid;
+       __le64 size;
+       __le64 nblocks;
+       __le64 block_group;
+       __le32 nlink;
+       __le32 uid;
+       __le32 gid;
+       __le32 mode;
+       __le64 rdev;
+       __le16 flags;
+       __le16 compat_flags;
+       struct btrfs_timespec atime;
+       struct btrfs_timespec ctime;
+       struct btrfs_timespec mtime;
+       struct btrfs_timespec otime;
+} __attribute__ ((__packed__));
+
+struct btrfs_dir_log_item {
+       __le64 end;
+} __attribute__ ((__packed__));
+
+struct btrfs_dir_item {
+       struct btrfs_disk_key location;
+       __le64 transid;
+       __le16 data_len;
+       __le16 name_len;
+       u8 type;
+} __attribute__ ((__packed__));
+
+struct btrfs_root_item {
+       struct btrfs_inode_item inode;
+       __le64 root_dirid;
+       __le64 bytenr;
+       __le64 byte_limit;
+       __le64 bytes_used;
+       __le32 flags;
+       __le32 refs;
+       struct btrfs_disk_key drop_progress;
+       u8 drop_level;
+       u8 level;
+} __attribute__ ((__packed__));
+
+#define BTRFS_FILE_EXTENT_REG 0
+#define BTRFS_FILE_EXTENT_INLINE 1
+
+struct btrfs_file_extent_item {
+       __le64 generation;
+       u8 type;
+       /*
+        * disk space consumed by the extent, checksum blocks are included
+        * in these numbers
+        */
+       __le64 disk_bytenr;
+       __le64 disk_num_bytes;
+       /*
+        * the logical offset in file blocks (no csums)
+        * this extent record is for.  This allows a file extent to point
+        * into the middle of an existing extent on disk, sharing it
+        * between two snapshots (useful if some bytes in the middle of the
+        * extent have changed
+        */
+       __le64 offset;
+       /*
+        * the logical number of file blocks (no csums included)
+        */
+       __le64 num_bytes;
+} __attribute__ ((__packed__));
+
+struct btrfs_csum_item {
+       u8 csum;
+} __attribute__ ((__packed__));
+
+/* different types of block groups (and chunks) */
+#define BTRFS_BLOCK_GROUP_DATA     (1 << 0)
+#define BTRFS_BLOCK_GROUP_SYSTEM   (1 << 1)
+#define BTRFS_BLOCK_GROUP_METADATA (1 << 2)
+#define BTRFS_BLOCK_GROUP_RAID0    (1 << 3)
+#define BTRFS_BLOCK_GROUP_RAID1    (1 << 4)
+#define BTRFS_BLOCK_GROUP_DUP     (1 << 5)
+#define BTRFS_BLOCK_GROUP_RAID10   (1 << 6)
+
+struct btrfs_block_group_item {
+       __le64 used;
+       __le64 chunk_objectid;
+       __le64 flags;
+} __attribute__ ((__packed__));
+
+struct btrfs_space_info {
+       u64 flags;
+       u64 total_bytes;
+       u64 bytes_used;
+       u64 bytes_pinned;
+       int full;
+       int force_alloc;
+       struct list_head list;
+
+       /* for block groups in our same type */
+       struct list_head block_groups;
+       spinlock_t lock;
+};
+
+struct btrfs_free_space {
+       struct rb_node bytes_index;
+       struct rb_node offset_index;
+       u64 offset;
+       u64 bytes;
+};
+
+struct btrfs_block_group_cache {
+       struct btrfs_key key;
+       struct btrfs_block_group_item item;
+       spinlock_t lock;
+       u64 pinned;
+       u64 flags;
+       int cached;
+       int ro;
+       int dirty;
+
+       struct btrfs_space_info *space_info;
+
+       /* free space cache stuff */
+       struct rb_root free_space_bytes;
+       struct rb_root free_space_offset;
+
+       /* block group cache stuff */
+       struct rb_node cache_node;
+
+       /* for block groups in the same raid type */
+       struct list_head list;
+};
+
+struct btrfs_device;
+struct btrfs_fs_devices;
+struct btrfs_fs_info {
+       u8 fsid[BTRFS_FSID_SIZE];
+       u8 chunk_tree_uuid[BTRFS_UUID_SIZE];
+       struct btrfs_root *extent_root;
+       struct btrfs_root *tree_root;
+       struct btrfs_root *chunk_root;
+       struct btrfs_root *dev_root;
+
+       /* the log root tree is a directory of all the other log roots */
+       struct btrfs_root *log_root_tree;
+       struct radix_tree_root fs_roots_radix;
+
+       /* block group cache stuff */
+       spinlock_t block_group_cache_lock;
+       struct rb_root block_group_cache_tree;
+
+       struct extent_io_tree pinned_extents;
+       struct extent_io_tree pending_del;
+       struct extent_io_tree extent_ins;
+
+       /* logical->physical extent mapping */
+       struct btrfs_mapping_tree mapping_tree;
+
+       u64 generation;
+       u64 last_trans_committed;
+       u64 last_trans_new_blockgroup;
+       u64 open_ioctl_trans;
+       unsigned long mount_opt;
+       u64 max_extent;
+       u64 max_inline;
+       u64 alloc_start;
+       struct btrfs_transaction *running_transaction;
+       wait_queue_head_t transaction_throttle;
+       wait_queue_head_t transaction_wait;
+       wait_queue_head_t async_submit_wait;
+
+       wait_queue_head_t tree_log_wait;
+
+       struct btrfs_super_block super_copy;
+       struct btrfs_super_block super_for_commit;
+       struct block_device *__bdev;
+       struct super_block *sb;
+       struct inode *btree_inode;
+       struct backing_dev_info bdi;
+       spinlock_t hash_lock;
+       struct mutex trans_mutex;
+       struct mutex tree_log_mutex;
+       struct mutex transaction_kthread_mutex;
+       struct mutex cleaner_mutex;
+       struct mutex alloc_mutex;
+       struct mutex chunk_mutex;
+       struct mutex drop_mutex;
+       struct mutex volume_mutex;
+       struct list_head trans_list;
+       struct list_head hashers;
+       struct list_head dead_roots;
+
+       atomic_t nr_async_submits;
+       atomic_t nr_async_bios;
+       atomic_t tree_log_writers;
+       atomic_t tree_log_commit;
+       unsigned long tree_log_batch;
+       u64 tree_log_transid;
+
+       /*
+        * this is used by the balancing code to wait for all the pending
+        * ordered extents
+        */
+       spinlock_t ordered_extent_lock;
+       struct list_head ordered_extents;
+       struct list_head delalloc_inodes;
+
+       /*
+        * there is a pool of worker threads for checksumming during writes
+        * and a pool for checksumming after reads.  This is because readers
+        * can run with FS locks held, and the writers may be waiting for
+        * those locks.  We don't want ordering in the pending list to cause
+        * deadlocks, and so the two are serviced separately.
+        *
+        * A third pool does submit_bio to avoid deadlocking with the other
+        * two
+        */
+       struct btrfs_workers workers;
+       struct btrfs_workers endio_workers;
+       struct btrfs_workers endio_write_workers;
+       struct btrfs_workers submit_workers;
+       /*
+        * fixup workers take dirty pages that didn't properly go through
+        * the cow mechanism and make them safe to write.  It happens
+        * for the sys_munmap function call path
+        */
+       struct btrfs_workers fixup_workers;
+       struct task_struct *transaction_kthread;
+       struct task_struct *cleaner_kthread;
+       int thread_pool_size;
+
+       struct kobject super_kobj;
+       struct completion kobj_unregister;
+       int do_barriers;
+       int closing;
+       int log_root_recovering;
+       atomic_t throttles;
+       atomic_t throttle_gen;
+
+       u64 total_pinned;
+       struct list_head dirty_cowonly_roots;
+
+       struct btrfs_fs_devices *fs_devices;
+       struct list_head space_info;
+       spinlock_t delalloc_lock;
+       spinlock_t new_trans_lock;
+       u64 delalloc_bytes;
+       u64 last_alloc;
+       u64 last_data_alloc;
+       u64 last_log_alloc;
+
+       spinlock_t ref_cache_lock;
+       u64 total_ref_cache_size;
+
+       u64 avail_data_alloc_bits;
+       u64 avail_metadata_alloc_bits;
+       u64 avail_system_alloc_bits;
+       u64 data_alloc_profile;
+       u64 metadata_alloc_profile;
+       u64 system_alloc_profile;
+
+       void *bdev_holder;
+};
+
+struct btrfs_leaf_ref_tree {
+       struct rb_root root;
+       struct btrfs_leaf_ref *last;
+       struct list_head list;
+       spinlock_t lock;
+};
+
+/*
+ * in ram representation of the tree.  extent_root is used for all allocations
+ * and for the extent tree extent_root root.
+ */
+struct btrfs_dirty_root;
+struct btrfs_root {
+       struct extent_buffer *node;
+
+       /* the node lock is held while changing the node pointer */
+       spinlock_t node_lock;
+
+       struct extent_buffer *commit_root;
+       struct btrfs_leaf_ref_tree *ref_tree;
+       struct btrfs_leaf_ref_tree ref_tree_struct;
+       struct btrfs_dirty_root *dirty_root;
+       struct btrfs_root *log_root;
+
+       struct btrfs_root_item root_item;
+       struct btrfs_key root_key;
+       struct btrfs_fs_info *fs_info;
+       struct inode *inode;
+       struct extent_io_tree dirty_log_pages;
+
+       struct kobject root_kobj;
+       struct completion kobj_unregister;
+       struct mutex objectid_mutex;
+       struct mutex log_mutex;
+
+       u64 objectid;
+       u64 last_trans;
+
+       /* data allocations are done in sectorsize units */
+       u32 sectorsize;
+
+       /* node allocations are done in nodesize units */
+       u32 nodesize;
+
+       /* leaf allocations are done in leafsize units */
+       u32 leafsize;
+
+       u32 stripesize;
+
+       u32 type;
+       u64 highest_inode;
+       u64 last_inode_alloc;
+       int ref_cows;
+       int track_dirty;
+       u64 defrag_trans_start;
+       struct btrfs_key defrag_progress;
+       struct btrfs_key defrag_max;
+       int defrag_running;
+       int defrag_level;
+       char *name;
+       int in_sysfs;
+
+       /* the dirty list is only used by non-reference counted roots */
+       struct list_head dirty_list;
+
+       spinlock_t list_lock;
+       struct list_head dead_list;
+       struct list_head orphan_list;
+};
+
+/*
+
+ * inode items have the data typically returned from stat and store other
+ * info about object characteristics.  There is one for every file and dir in
+ * the FS
+ */
+#define BTRFS_INODE_ITEM_KEY           1
+#define BTRFS_INODE_REF_KEY            2
+#define BTRFS_XATTR_ITEM_KEY           8
+#define BTRFS_ORPHAN_ITEM_KEY          9
+/* reserve 2-15 close to the inode for later flexibility */
+
+/*
+ * dir items are the name -> inode pointers in a directory.  There is one
+ * for every name in a directory.
+ */
+#define BTRFS_DIR_LOG_ITEM_KEY  14
+#define BTRFS_DIR_LOG_INDEX_KEY 15
+#define BTRFS_DIR_ITEM_KEY     16
+#define BTRFS_DIR_INDEX_KEY    17
+/*
+ * extent data is for file data
+ */
+#define BTRFS_EXTENT_DATA_KEY  18
+/*
+ * csum items have the checksums for data in the extents
+ */
+#define BTRFS_CSUM_ITEM_KEY    19
+
+
+/* reserve 21-31 for other file/dir stuff */
+
+/*
+ * root items point to tree roots.  There are typically in the root
+ * tree used by the super block to find all the other trees
+ */
+#define BTRFS_ROOT_ITEM_KEY    32
+/*
+ * extent items are in the extent map tree.  These record which blocks
+ * are used, and how many references there are to each block
+ */
+#define BTRFS_EXTENT_ITEM_KEY  33
+#define BTRFS_EXTENT_REF_KEY   34
+
+/*
+ * block groups give us hints into the extent allocation trees.  Which
+ * blocks are free etc etc
+ */
+#define BTRFS_BLOCK_GROUP_ITEM_KEY 50
+
+#define BTRFS_DEV_EXTENT_KEY   75
+#define BTRFS_DEV_ITEM_KEY     76
+#define BTRFS_CHUNK_ITEM_KEY   77
+
+/*
+ * string items are for debugging.  They just store a short string of
+ * data in the FS
+ */
+#define BTRFS_STRING_ITEM_KEY  253
+
+#define BTRFS_MOUNT_NODATASUM          (1 << 0)
+#define BTRFS_MOUNT_NODATACOW          (1 << 1)
+#define BTRFS_MOUNT_NOBARRIER          (1 << 2)
+#define BTRFS_MOUNT_SSD                        (1 << 3)
+#define BTRFS_MOUNT_DEGRADED           (1 << 4)
+
+#define btrfs_clear_opt(o, opt)                ((o) &= ~BTRFS_MOUNT_##opt)
+#define btrfs_set_opt(o, opt)          ((o) |= BTRFS_MOUNT_##opt)
+#define btrfs_test_opt(root, opt)      ((root)->fs_info->mount_opt & \
+                                        BTRFS_MOUNT_##opt)
+/*
+ * Inode flags
+ */
+#define BTRFS_INODE_NODATASUM          (1 << 0)
+#define BTRFS_INODE_NODATACOW          (1 << 1)
+#define BTRFS_INODE_READONLY           (1 << 2)
+#define btrfs_clear_flag(inode, flag)  (BTRFS_I(inode)->flags &= \
+                                        ~BTRFS_INODE_##flag)
+#define btrfs_set_flag(inode, flag)    (BTRFS_I(inode)->flags |= \
+                                        BTRFS_INODE_##flag)
+#define btrfs_test_flag(inode, flag)   (BTRFS_I(inode)->flags & \
+                                        BTRFS_INODE_##flag)
+/* some macros to generate set/get funcs for the struct fields.  This
+ * assumes there is a lefoo_to_cpu for every type, so lets make a simple
+ * one for u8:
+ */
+#define le8_to_cpu(v) (v)
+#define cpu_to_le8(v) (v)
+#define __le8 u8
+
+#define read_eb_member(eb, ptr, type, member, result) (                        \
+       read_extent_buffer(eb, (char *)(result),                        \
+                          ((unsigned long)(ptr)) +                     \
+                           offsetof(type, member),                     \
+                          sizeof(((type *)0)->member)))
+
+#define write_eb_member(eb, ptr, type, member, result) (               \
+       write_extent_buffer(eb, (char *)(result),                       \
+                          ((unsigned long)(ptr)) +                     \
+                           offsetof(type, member),                     \
+                          sizeof(((type *)0)->member)))
+
+#ifndef BTRFS_SETGET_FUNCS
+#define BTRFS_SETGET_FUNCS(name, type, member, bits)                   \
+u##bits btrfs_##name(struct extent_buffer *eb, type *s);               \
+void btrfs_set_##name(struct extent_buffer *eb, type *s, u##bits val);
+#endif
+
+#define BTRFS_SETGET_HEADER_FUNCS(name, type, member, bits)            \
+static inline u##bits btrfs_##name(struct extent_buffer *eb)           \
+{                                                                      \
+       type *p = kmap_atomic(eb->first_page, KM_USER0);                \
+       u##bits res = le##bits##_to_cpu(p->member);                     \
+       kunmap_atomic(p, KM_USER0);                                     \
+       return res;                                                     \
+}                                                                      \
+static inline void btrfs_set_##name(struct extent_buffer *eb,          \
+                                   u##bits val)                        \
+{                                                                      \
+       type *p = kmap_atomic(eb->first_page, KM_USER0);                \
+       p->member = cpu_to_le##bits(val);                               \
+       kunmap_atomic(p, KM_USER0);                                     \
+}
+
+#define BTRFS_SETGET_STACK_FUNCS(name, type, member, bits)             \
+static inline u##bits btrfs_##name(type *s)                            \
+{                                                                      \
+       return le##bits##_to_cpu(s->member);                            \
+}                                                                      \
+static inline void btrfs_set_##name(type *s, u##bits val)              \
+{                                                                      \
+       s->member = cpu_to_le##bits(val);                               \
+}
+
+BTRFS_SETGET_FUNCS(device_type, struct btrfs_dev_item, type, 64);
+BTRFS_SETGET_FUNCS(device_total_bytes, struct btrfs_dev_item, total_bytes, 64);
+BTRFS_SETGET_FUNCS(device_bytes_used, struct btrfs_dev_item, bytes_used, 64);
+BTRFS_SETGET_FUNCS(device_io_align, struct btrfs_dev_item, io_align, 32);
+BTRFS_SETGET_FUNCS(device_io_width, struct btrfs_dev_item, io_width, 32);
+BTRFS_SETGET_FUNCS(device_sector_size, struct btrfs_dev_item, sector_size, 32);
+BTRFS_SETGET_FUNCS(device_id, struct btrfs_dev_item, devid, 64);
+BTRFS_SETGET_FUNCS(device_group, struct btrfs_dev_item, dev_group, 32);
+BTRFS_SETGET_FUNCS(device_seek_speed, struct btrfs_dev_item, seek_speed, 8);
+BTRFS_SETGET_FUNCS(device_bandwidth, struct btrfs_dev_item, bandwidth, 8);
+
+BTRFS_SETGET_STACK_FUNCS(stack_device_type, struct btrfs_dev_item, type, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_device_total_bytes, struct btrfs_dev_item,
+                        total_bytes, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_device_bytes_used, struct btrfs_dev_item,
+                        bytes_used, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_device_io_align, struct btrfs_dev_item,
+                        io_align, 32);
+BTRFS_SETGET_STACK_FUNCS(stack_device_io_width, struct btrfs_dev_item,
+                        io_width, 32);
+BTRFS_SETGET_STACK_FUNCS(stack_device_sector_size, struct btrfs_dev_item,
+                        sector_size, 32);
+BTRFS_SETGET_STACK_FUNCS(stack_device_id, struct btrfs_dev_item, devid, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_device_group, struct btrfs_dev_item,
+                        dev_group, 32);
+BTRFS_SETGET_STACK_FUNCS(stack_device_seek_speed, struct btrfs_dev_item,
+                        seek_speed, 8);
+BTRFS_SETGET_STACK_FUNCS(stack_device_bandwidth, struct btrfs_dev_item,
+                        bandwidth, 8);
+
+static inline char *btrfs_device_uuid(struct btrfs_dev_item *d)
+{
+       return (char *)d + offsetof(struct btrfs_dev_item, uuid);
+}
+
+BTRFS_SETGET_FUNCS(chunk_length, struct btrfs_chunk, length, 64);
+BTRFS_SETGET_FUNCS(chunk_owner, struct btrfs_chunk, owner, 64);
+BTRFS_SETGET_FUNCS(chunk_stripe_len, struct btrfs_chunk, stripe_len, 64);
+BTRFS_SETGET_FUNCS(chunk_io_align, struct btrfs_chunk, io_align, 32);
+BTRFS_SETGET_FUNCS(chunk_io_width, struct btrfs_chunk, io_width, 32);
+BTRFS_SETGET_FUNCS(chunk_sector_size, struct btrfs_chunk, sector_size, 32);
+BTRFS_SETGET_FUNCS(chunk_type, struct btrfs_chunk, type, 64);
+BTRFS_SETGET_FUNCS(chunk_num_stripes, struct btrfs_chunk, num_stripes, 16);
+BTRFS_SETGET_FUNCS(chunk_sub_stripes, struct btrfs_chunk, sub_stripes, 16);
+BTRFS_SETGET_FUNCS(stripe_devid, struct btrfs_stripe, devid, 64);
+BTRFS_SETGET_FUNCS(stripe_offset, struct btrfs_stripe, offset, 64);
+
+static inline char *btrfs_stripe_dev_uuid(struct btrfs_stripe *s)
+{
+       return (char *)s + offsetof(struct btrfs_stripe, dev_uuid);
+}
+
+BTRFS_SETGET_STACK_FUNCS(stack_chunk_length, struct btrfs_chunk, length, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_chunk_owner, struct btrfs_chunk, owner, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_chunk_stripe_len, struct btrfs_chunk,
+                        stripe_len, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_chunk_io_align, struct btrfs_chunk,
+                        io_align, 32);
+BTRFS_SETGET_STACK_FUNCS(stack_chunk_io_width, struct btrfs_chunk,
+                        io_width, 32);
+BTRFS_SETGET_STACK_FUNCS(stack_chunk_sector_size, struct btrfs_chunk,
+                        sector_size, 32);
+BTRFS_SETGET_STACK_FUNCS(stack_chunk_type, struct btrfs_chunk, type, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_chunk_num_stripes, struct btrfs_chunk,
+                        num_stripes, 16);
+BTRFS_SETGET_STACK_FUNCS(stack_chunk_sub_stripes, struct btrfs_chunk,
+                        sub_stripes, 16);
+BTRFS_SETGET_STACK_FUNCS(stack_stripe_devid, struct btrfs_stripe, devid, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_stripe_offset, struct btrfs_stripe, offset, 64);
+
+static inline struct btrfs_stripe *btrfs_stripe_nr(struct btrfs_chunk *c,
+                                                  int nr)
+{
+       unsigned long offset = (unsigned long)c;
+       offset += offsetof(struct btrfs_chunk, stripe);
+       offset += nr * sizeof(struct btrfs_stripe);
+       return (struct btrfs_stripe *)offset;
+}
+
+static inline char *btrfs_stripe_dev_uuid_nr(struct btrfs_chunk *c, int nr)
+{
+       return btrfs_stripe_dev_uuid(btrfs_stripe_nr(c, nr));
+}
+
+static inline u64 btrfs_stripe_offset_nr(struct extent_buffer *eb,
+                                        struct btrfs_chunk *c, int nr)
+{
+       return btrfs_stripe_offset(eb, btrfs_stripe_nr(c, nr));
+}
+
+static inline void btrfs_set_stripe_offset_nr(struct extent_buffer *eb,
+                                            struct btrfs_chunk *c, int nr,
+                                            u64 val)
+{
+       btrfs_set_stripe_offset(eb, btrfs_stripe_nr(c, nr), val);
+}
+
+static inline u64 btrfs_stripe_devid_nr(struct extent_buffer *eb,
+                                        struct btrfs_chunk *c, int nr)
+{
+       return btrfs_stripe_devid(eb, btrfs_stripe_nr(c, nr));
+}
+
+static inline void btrfs_set_stripe_devid_nr(struct extent_buffer *eb,
+                                            struct btrfs_chunk *c, int nr,
+                                            u64 val)
+{
+       btrfs_set_stripe_devid(eb, btrfs_stripe_nr(c, nr), val);
+}
+
+/* struct btrfs_block_group_item */
+BTRFS_SETGET_STACK_FUNCS(block_group_used, struct btrfs_block_group_item,
+                        used, 64);
+BTRFS_SETGET_FUNCS(disk_block_group_used, struct btrfs_block_group_item,
+                        used, 64);
+BTRFS_SETGET_STACK_FUNCS(block_group_chunk_objectid,
+                       struct btrfs_block_group_item, chunk_objectid, 64);
+
+BTRFS_SETGET_FUNCS(disk_block_group_chunk_objectid,
+                  struct btrfs_block_group_item, chunk_objectid, 64);
+BTRFS_SETGET_FUNCS(disk_block_group_flags,
+                  struct btrfs_block_group_item, flags, 64);
+BTRFS_SETGET_STACK_FUNCS(block_group_flags,
+                       struct btrfs_block_group_item, flags, 64);
+
+/* struct btrfs_inode_ref */
+BTRFS_SETGET_FUNCS(inode_ref_name_len, struct btrfs_inode_ref, name_len, 16);
+BTRFS_SETGET_FUNCS(inode_ref_index, struct btrfs_inode_ref, index, 64);
+
+/* struct btrfs_inode_item */
+BTRFS_SETGET_FUNCS(inode_generation, struct btrfs_inode_item, generation, 64);
+BTRFS_SETGET_FUNCS(inode_transid, struct btrfs_inode_item, transid, 64);
+BTRFS_SETGET_FUNCS(inode_size, struct btrfs_inode_item, size, 64);
+BTRFS_SETGET_FUNCS(inode_nblocks, struct btrfs_inode_item, nblocks, 64);
+BTRFS_SETGET_FUNCS(inode_block_group, struct btrfs_inode_item, block_group, 64);
+BTRFS_SETGET_FUNCS(inode_nlink, struct btrfs_inode_item, nlink, 32);
+BTRFS_SETGET_FUNCS(inode_uid, struct btrfs_inode_item, uid, 32);
+BTRFS_SETGET_FUNCS(inode_gid, struct btrfs_inode_item, gid, 32);
+BTRFS_SETGET_FUNCS(inode_mode, struct btrfs_inode_item, mode, 32);
+BTRFS_SETGET_FUNCS(inode_rdev, struct btrfs_inode_item, rdev, 64);
+BTRFS_SETGET_FUNCS(inode_flags, struct btrfs_inode_item, flags, 16);
+BTRFS_SETGET_FUNCS(inode_compat_flags, struct btrfs_inode_item,
+                  compat_flags, 16);
+
+static inline struct btrfs_timespec *
+btrfs_inode_atime(struct btrfs_inode_item *inode_item)
+{
+       unsigned long ptr = (unsigned long)inode_item;
+       ptr += offsetof(struct btrfs_inode_item, atime);
+       return (struct btrfs_timespec *)ptr;
+}
+
+static inline struct btrfs_timespec *
+btrfs_inode_mtime(struct btrfs_inode_item *inode_item)
+{
+       unsigned long ptr = (unsigned long)inode_item;
+       ptr += offsetof(struct btrfs_inode_item, mtime);
+       return (struct btrfs_timespec *)ptr;
+}
+
+static inline struct btrfs_timespec *
+btrfs_inode_ctime(struct btrfs_inode_item *inode_item)
+{
+       unsigned long ptr = (unsigned long)inode_item;
+       ptr += offsetof(struct btrfs_inode_item, ctime);
+       return (struct btrfs_timespec *)ptr;
+}
+
+static inline struct btrfs_timespec *
+btrfs_inode_otime(struct btrfs_inode_item *inode_item)
+{
+       unsigned long ptr = (unsigned long)inode_item;
+       ptr += offsetof(struct btrfs_inode_item, otime);
+       return (struct btrfs_timespec *)ptr;
+}
+
+BTRFS_SETGET_FUNCS(timespec_sec, struct btrfs_timespec, sec, 64);
+BTRFS_SETGET_FUNCS(timespec_nsec, struct btrfs_timespec, nsec, 32);
+
+/* struct btrfs_dev_extent */
+BTRFS_SETGET_FUNCS(dev_extent_chunk_tree, struct btrfs_dev_extent,
+                  chunk_tree, 64);
+BTRFS_SETGET_FUNCS(dev_extent_chunk_objectid, struct btrfs_dev_extent,
+                  chunk_objectid, 64);
+BTRFS_SETGET_FUNCS(dev_extent_chunk_offset, struct btrfs_dev_extent,
+                  chunk_offset, 64);
+BTRFS_SETGET_FUNCS(dev_extent_length, struct btrfs_dev_extent, length, 64);
+
+static inline u8 *btrfs_dev_extent_chunk_tree_uuid(struct btrfs_dev_extent *dev)
+{
+       unsigned long ptr = offsetof(struct btrfs_dev_extent, chunk_tree_uuid);
+       return (u8 *)((unsigned long)dev + ptr);
+}
+
+/* struct btrfs_extent_ref */
+BTRFS_SETGET_FUNCS(ref_root, struct btrfs_extent_ref, root, 64);
+BTRFS_SETGET_FUNCS(ref_generation, struct btrfs_extent_ref, generation, 64);
+BTRFS_SETGET_FUNCS(ref_objectid, struct btrfs_extent_ref, objectid, 64);
+BTRFS_SETGET_FUNCS(ref_offset, struct btrfs_extent_ref, offset, 64);
+BTRFS_SETGET_FUNCS(ref_num_refs, struct btrfs_extent_ref, num_refs, 32);
+
+BTRFS_SETGET_STACK_FUNCS(stack_ref_root, struct btrfs_extent_ref, root, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_ref_generation, struct btrfs_extent_ref,
+                        generation, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_ref_objectid, struct btrfs_extent_ref,
+                        objectid, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_ref_offset, struct btrfs_extent_ref,
+                        offset, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_ref_num_refs, struct btrfs_extent_ref,
+                        num_refs, 32);
+
+/* struct btrfs_extent_item */
+BTRFS_SETGET_FUNCS(extent_refs, struct btrfs_extent_item, refs, 32);
+BTRFS_SETGET_STACK_FUNCS(stack_extent_refs, struct btrfs_extent_item,
+                        refs, 32);
+
+/* struct btrfs_node */
+BTRFS_SETGET_FUNCS(key_blockptr, struct btrfs_key_ptr, blockptr, 64);
+BTRFS_SETGET_FUNCS(key_generation, struct btrfs_key_ptr, generation, 64);
+
+static inline u64 btrfs_node_blockptr(struct extent_buffer *eb, int nr)
+{
+       unsigned long ptr;
+       ptr = offsetof(struct btrfs_node, ptrs) +
+               sizeof(struct btrfs_key_ptr) * nr;
+       return btrfs_key_blockptr(eb, (struct btrfs_key_ptr *)ptr);
+}
+
+static inline void btrfs_set_node_blockptr(struct extent_buffer *eb,
+                                          int nr, u64 val)
+{
+       unsigned long ptr;
+       ptr = offsetof(struct btrfs_node, ptrs) +
+               sizeof(struct btrfs_key_ptr) * nr;
+       btrfs_set_key_blockptr(eb, (struct btrfs_key_ptr *)ptr, val);
+}
+
+static inline u64 btrfs_node_ptr_generation(struct extent_buffer *eb, int nr)
+{
+       unsigned long ptr;
+       ptr = offsetof(struct btrfs_node, ptrs) +
+               sizeof(struct btrfs_key_ptr) * nr;
+       return btrfs_key_generation(eb, (struct btrfs_key_ptr *)ptr);
+}
+
+static inline void btrfs_set_node_ptr_generation(struct extent_buffer *eb,
+                                                int nr, u64 val)
+{
+       unsigned long ptr;
+       ptr = offsetof(struct btrfs_node, ptrs) +
+               sizeof(struct btrfs_key_ptr) * nr;
+       btrfs_set_key_generation(eb, (struct btrfs_key_ptr *)ptr, val);
+}
+
+static inline unsigned long btrfs_node_key_ptr_offset(int nr)
+{
+       return offsetof(struct btrfs_node, ptrs) +
+               sizeof(struct btrfs_key_ptr) * nr;
+}
+
+void btrfs_node_key(struct extent_buffer *eb,
+                   struct btrfs_disk_key *disk_key, int nr);
+
+static inline void btrfs_set_node_key(struct extent_buffer *eb,
+                                     struct btrfs_disk_key *disk_key, int nr)
+{
+       unsigned long ptr;
+       ptr = btrfs_node_key_ptr_offset(nr);
+       write_eb_member(eb, (struct btrfs_key_ptr *)ptr,
+                      struct btrfs_key_ptr, key, disk_key);
+}
+
+/* struct btrfs_item */
+BTRFS_SETGET_FUNCS(item_offset, struct btrfs_item, offset, 32);
+BTRFS_SETGET_FUNCS(item_size, struct btrfs_item, size, 32);
+
+static inline unsigned long btrfs_item_nr_offset(int nr)
+{
+       return offsetof(struct btrfs_leaf, items) +
+               sizeof(struct btrfs_item) * nr;
+}
+
+static inline struct btrfs_item *btrfs_item_nr(struct extent_buffer *eb,
+                                              int nr)
+{
+       return (struct btrfs_item *)btrfs_item_nr_offset(nr);
+}
+
+static inline u32 btrfs_item_end(struct extent_buffer *eb,
+                                struct btrfs_item *item)
+{
+       return btrfs_item_offset(eb, item) + btrfs_item_size(eb, item);
+}
+
+static inline u32 btrfs_item_end_nr(struct extent_buffer *eb, int nr)
+{
+       return btrfs_item_end(eb, btrfs_item_nr(eb, nr));
+}
+
+static inline u32 btrfs_item_offset_nr(struct extent_buffer *eb, int nr)
+{
+       return btrfs_item_offset(eb, btrfs_item_nr(eb, nr));
+}
+
+static inline u32 btrfs_item_size_nr(struct extent_buffer *eb, int nr)
+{
+       return btrfs_item_size(eb, btrfs_item_nr(eb, nr));
+}
+
+static inline void btrfs_item_key(struct extent_buffer *eb,
+                          struct btrfs_disk_key *disk_key, int nr)
+{
+       struct btrfs_item *item = btrfs_item_nr(eb, nr);
+       read_eb_member(eb, item, struct btrfs_item, key, disk_key);
+}
+
+static inline void btrfs_set_item_key(struct extent_buffer *eb,
+                              struct btrfs_disk_key *disk_key, int nr)
+{
+       struct btrfs_item *item = btrfs_item_nr(eb, nr);
+       write_eb_member(eb, item, struct btrfs_item, key, disk_key);
+}
+
+BTRFS_SETGET_FUNCS(dir_log_end, struct btrfs_dir_log_item, end, 64);
+
+/* struct btrfs_dir_item */
+BTRFS_SETGET_FUNCS(dir_data_len, struct btrfs_dir_item, data_len, 16);
+BTRFS_SETGET_FUNCS(dir_type, struct btrfs_dir_item, type, 8);
+BTRFS_SETGET_FUNCS(dir_name_len, struct btrfs_dir_item, name_len, 16);
+BTRFS_SETGET_FUNCS(dir_transid, struct btrfs_dir_item, transid, 64);
+
+static inline void btrfs_dir_item_key(struct extent_buffer *eb,
+                                     struct btrfs_dir_item *item,
+                                     struct btrfs_disk_key *key)
+{
+       read_eb_member(eb, item, struct btrfs_dir_item, location, key);
+}
+
+static inline void btrfs_set_dir_item_key(struct extent_buffer *eb,
+                                         struct btrfs_dir_item *item,
+                                         struct btrfs_disk_key *key)
+{
+       write_eb_member(eb, item, struct btrfs_dir_item, location, key);
+}
+
+/* struct btrfs_disk_key */
+BTRFS_SETGET_STACK_FUNCS(disk_key_objectid, struct btrfs_disk_key,
+                        objectid, 64);
+BTRFS_SETGET_STACK_FUNCS(disk_key_offset, struct btrfs_disk_key, offset, 64);
+BTRFS_SETGET_STACK_FUNCS(disk_key_type, struct btrfs_disk_key, type, 8);
+
+static inline void btrfs_disk_key_to_cpu(struct btrfs_key *cpu,
+                                        struct btrfs_disk_key *disk)
+{
+       cpu->offset = le64_to_cpu(disk->offset);
+       cpu->type = disk->type;
+       cpu->objectid = le64_to_cpu(disk->objectid);
+}
+
+static inline void btrfs_cpu_key_to_disk(struct btrfs_disk_key *disk,
+                                        struct btrfs_key *cpu)
+{
+       disk->offset = cpu_to_le64(cpu->offset);
+       disk->type = cpu->type;
+       disk->objectid = cpu_to_le64(cpu->objectid);
+}
+
+static inline void btrfs_node_key_to_cpu(struct extent_buffer *eb,
+                                 struct btrfs_key *key, int nr)
+{
+       struct btrfs_disk_key disk_key;
+       btrfs_node_key(eb, &disk_key, nr);
+       btrfs_disk_key_to_cpu(key, &disk_key);
+}
+
+static inline void btrfs_item_key_to_cpu(struct extent_buffer *eb,
+                                 struct btrfs_key *key, int nr)
+{
+       struct btrfs_disk_key disk_key;
+       btrfs_item_key(eb, &disk_key, nr);
+       btrfs_disk_key_to_cpu(key, &disk_key);
+}
+
+static inline void btrfs_dir_item_key_to_cpu(struct extent_buffer *eb,
+                                     struct btrfs_dir_item *item,
+                                     struct btrfs_key *key)
+{
+       struct btrfs_disk_key disk_key;
+       btrfs_dir_item_key(eb, item, &disk_key);
+       btrfs_disk_key_to_cpu(key, &disk_key);
+}
+
+
+static inline u8 btrfs_key_type(struct btrfs_key *key)
+{
+       return key->type;
+}
+
+static inline void btrfs_set_key_type(struct btrfs_key *key, u8 val)
+{
+       key->type = val;
+}
+
+/* struct btrfs_header */
+BTRFS_SETGET_HEADER_FUNCS(header_bytenr, struct btrfs_header, bytenr, 64);
+BTRFS_SETGET_HEADER_FUNCS(header_generation, struct btrfs_header,
+                         generation, 64);
+BTRFS_SETGET_HEADER_FUNCS(header_owner, struct btrfs_header, owner, 64);
+BTRFS_SETGET_HEADER_FUNCS(header_nritems, struct btrfs_header, nritems, 32);
+BTRFS_SETGET_HEADER_FUNCS(header_flags, struct btrfs_header, flags, 64);
+BTRFS_SETGET_HEADER_FUNCS(header_level, struct btrfs_header, level, 8);
+
+static inline int btrfs_header_flag(struct extent_buffer *eb, u64 flag)
+{
+       return (btrfs_header_flags(eb) & flag) == flag;
+}
+
+static inline int btrfs_set_header_flag(struct extent_buffer *eb, u64 flag)
+{
+       u64 flags = btrfs_header_flags(eb);
+       btrfs_set_header_flags(eb, flags | flag);
+       return (flags & flag) == flag;
+}
+
+static inline int btrfs_clear_header_flag(struct extent_buffer *eb, u64 flag)
+{
+       u64 flags = btrfs_header_flags(eb);
+       btrfs_set_header_flags(eb, flags & ~flag);
+       return (flags & flag) == flag;
+}
+
+static inline u8 *btrfs_header_fsid(struct extent_buffer *eb)
+{
+       unsigned long ptr = offsetof(struct btrfs_header, fsid);
+       return (u8 *)ptr;
+}
+
+static inline u8 *btrfs_header_chunk_tree_uuid(struct extent_buffer *eb)
+{
+       unsigned long ptr = offsetof(struct btrfs_header, chunk_tree_uuid);
+       return (u8 *)ptr;
+}
+
+static inline u8 *btrfs_super_fsid(struct extent_buffer *eb)
+{
+       unsigned long ptr = offsetof(struct btrfs_super_block, fsid);
+       return (u8 *)ptr;
+}
+
+static inline u8 *btrfs_header_csum(struct extent_buffer *eb)
+{
+       unsigned long ptr = offsetof(struct btrfs_header, csum);
+       return (u8 *)ptr;
+}
+
+static inline struct btrfs_node *btrfs_buffer_node(struct extent_buffer *eb)
+{
+       return NULL;
+}
+
+static inline struct btrfs_leaf *btrfs_buffer_leaf(struct extent_buffer *eb)
+{
+       return NULL;
+}
+
+static inline struct btrfs_header *btrfs_buffer_header(struct extent_buffer *eb)
+{
+       return NULL;
+}
+
+static inline int btrfs_is_leaf(struct extent_buffer *eb)
+{
+       return (btrfs_header_level(eb) == 0);
+}
+
+/* struct btrfs_root_item */
+BTRFS_SETGET_FUNCS(disk_root_refs, struct btrfs_root_item, refs, 32);
+BTRFS_SETGET_FUNCS(disk_root_bytenr, struct btrfs_root_item, bytenr, 64);
+BTRFS_SETGET_FUNCS(disk_root_level, struct btrfs_root_item, level, 8);
+
+BTRFS_SETGET_STACK_FUNCS(root_bytenr, struct btrfs_root_item, bytenr, 64);
+BTRFS_SETGET_STACK_FUNCS(root_level, struct btrfs_root_item, level, 8);
+BTRFS_SETGET_STACK_FUNCS(root_dirid, struct btrfs_root_item, root_dirid, 64);
+BTRFS_SETGET_STACK_FUNCS(root_refs, struct btrfs_root_item, refs, 32);
+BTRFS_SETGET_STACK_FUNCS(root_flags, struct btrfs_root_item, flags, 32);
+BTRFS_SETGET_STACK_FUNCS(root_used, struct btrfs_root_item, bytes_used, 64);
+BTRFS_SETGET_STACK_FUNCS(root_limit, struct btrfs_root_item, byte_limit, 64);
+
+/* struct btrfs_super_block */
+BTRFS_SETGET_STACK_FUNCS(super_bytenr, struct btrfs_super_block, bytenr, 64);
+BTRFS_SETGET_STACK_FUNCS(super_flags, struct btrfs_super_block, flags, 64);
+BTRFS_SETGET_STACK_FUNCS(super_generation, struct btrfs_super_block,
+                        generation, 64);
+BTRFS_SETGET_STACK_FUNCS(super_root, struct btrfs_super_block, root, 64);
+BTRFS_SETGET_STACK_FUNCS(super_sys_array_size,
+                        struct btrfs_super_block, sys_chunk_array_size, 32);
+BTRFS_SETGET_STACK_FUNCS(super_root_level, struct btrfs_super_block,
+                        root_level, 8);
+BTRFS_SETGET_STACK_FUNCS(super_chunk_root, struct btrfs_super_block,
+                        chunk_root, 64);
+BTRFS_SETGET_STACK_FUNCS(super_chunk_root_level, struct btrfs_super_block,
+                        chunk_root_level, 8);
+BTRFS_SETGET_STACK_FUNCS(super_log_root, struct btrfs_super_block,
+                        log_root, 64);
+BTRFS_SETGET_STACK_FUNCS(super_log_root_level, struct btrfs_super_block,
+                        log_root_level, 8);
+BTRFS_SETGET_STACK_FUNCS(super_total_bytes, struct btrfs_super_block,
+                        total_bytes, 64);
+BTRFS_SETGET_STACK_FUNCS(super_bytes_used, struct btrfs_super_block,
+                        bytes_used, 64);
+BTRFS_SETGET_STACK_FUNCS(super_sectorsize, struct btrfs_super_block,
+                        sectorsize, 32);
+BTRFS_SETGET_STACK_FUNCS(super_nodesize, struct btrfs_super_block,
+                        nodesize, 32);
+BTRFS_SETGET_STACK_FUNCS(super_leafsize, struct btrfs_super_block,
+                        leafsize, 32);
+BTRFS_SETGET_STACK_FUNCS(super_stripesize, struct btrfs_super_block,
+                        stripesize, 32);
+BTRFS_SETGET_STACK_FUNCS(super_root_dir, struct btrfs_super_block,
+                        root_dir_objectid, 64);
+BTRFS_SETGET_STACK_FUNCS(super_num_devices, struct btrfs_super_block,
+                        num_devices, 64);
+
+static inline unsigned long btrfs_leaf_data(struct extent_buffer *l)
+{
+       return offsetof(struct btrfs_leaf, items);
+}
+
+/* struct btrfs_file_extent_item */
+BTRFS_SETGET_FUNCS(file_extent_type, struct btrfs_file_extent_item, type, 8);
+
+static inline unsigned long btrfs_file_extent_inline_start(struct
+                                                  btrfs_file_extent_item *e)
+{
+       unsigned long offset = (unsigned long)e;
+       offset += offsetof(struct btrfs_file_extent_item, disk_bytenr);
+       return offset;
+}
+
+static inline u32 btrfs_file_extent_calc_inline_size(u32 datasize)
+{
+       return offsetof(struct btrfs_file_extent_item, disk_bytenr) + datasize;
+}
+
+static inline u32 btrfs_file_extent_inline_len(struct extent_buffer *eb,
+                                              struct btrfs_item *e)
+{
+       unsigned long offset;
+       offset = offsetof(struct btrfs_file_extent_item, disk_bytenr);
+       return btrfs_item_size(eb, e) - offset;
+}
+
+BTRFS_SETGET_FUNCS(file_extent_disk_bytenr, struct btrfs_file_extent_item,
+                  disk_bytenr, 64);
+BTRFS_SETGET_FUNCS(file_extent_generation, struct btrfs_file_extent_item,
+                  generation, 64);
+BTRFS_SETGET_FUNCS(file_extent_disk_num_bytes, struct btrfs_file_extent_item,
+                  disk_num_bytes, 64);
+BTRFS_SETGET_FUNCS(file_extent_offset, struct btrfs_file_extent_item,
+                 offset, 64);
+BTRFS_SETGET_FUNCS(file_extent_num_bytes, struct btrfs_file_extent_item,
+                  num_bytes, 64);
+
+static inline struct btrfs_root *btrfs_sb(struct super_block *sb)
+{
+       return sb->s_fs_info;
+}
+
+static inline int btrfs_set_root_name(struct btrfs_root *root,
+                                     const char *name, int len)
+{
+       /* if we already have a name just free it */
+       if (root->name)
+               kfree(root->name);
+
+       root->name = kmalloc(len+1, GFP_KERNEL);
+       if (!root->name)
+               return -ENOMEM;
+
+       memcpy(root->name, name, len);
+       root->name[len] ='\0';
+
+       return 0;
+}
+
+static inline u32 btrfs_level_size(struct btrfs_root *root, int level) {
+       if (level == 0)
+               return root->leafsize;
+       return root->nodesize;
+}
+
+/* helper function to cast into the data area of the leaf. */
+#define btrfs_item_ptr(leaf, slot, type) \
+       ((type *)(btrfs_leaf_data(leaf) + \
+       btrfs_item_offset_nr(leaf, slot)))
+
+#define btrfs_item_ptr_offset(leaf, slot) \
+       ((unsigned long)(btrfs_leaf_data(leaf) + \
+       btrfs_item_offset_nr(leaf, slot)))
+
+static inline struct dentry *fdentry(struct file *file) {
+#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
+       return file->f_dentry;
+#else
+       return file->f_path.dentry;
+#endif
+}
+
+/* extent-tree.c */
+int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len);
+int btrfs_update_pinned_extents(struct btrfs_root *root,
+                               u64 bytenr, u64 num, int pin);
+int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans,
+                       struct btrfs_root *root, struct extent_buffer *leaf);
+int btrfs_cross_ref_exists(struct btrfs_trans_handle *trans,
+                          struct btrfs_root *root,
+                          struct btrfs_key *key, u64 bytenr);
+int btrfs_extent_post_op(struct btrfs_trans_handle *trans,
+                        struct btrfs_root *root);
+int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy);
+struct btrfs_block_group_cache *btrfs_lookup_block_group(struct
+                                                        btrfs_fs_info *info,
+                                                        u64 bytenr);
+struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root,
+                                                struct btrfs_block_group_cache
+                                                *hint, u64 search_start,
+                                                int data, int owner);
+struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
+                                            struct btrfs_root *root,
+                                            u32 blocksize, u64 parent,
+                                            u64 root_objectid,
+                                            u64 ref_generation,
+                                            int level,
+                                            u64 hint,
+                                            u64 empty_size);
+struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans,
+                                           struct btrfs_root *root,
+                                           u64 bytenr, u32 blocksize);
+int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 new_size);
+int btrfs_insert_extent_backref(struct btrfs_trans_handle *trans,
+                                struct btrfs_root *root,
+                                struct btrfs_path *path,
+                                u64 bytenr, u64 parent,
+                                u64 root_objectid, u64 ref_generation,
+                                u64 owner, u64 owner_offset);
+int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
+                      struct btrfs_root *root,
+                      u64 num_bytes, u64 parent, u64 min_bytes,
+                      u64 root_objectid, u64 ref_generation,
+                      u64 owner, u64 owner_offset,
+                      u64 empty_size, u64 hint_byte,
+                      u64 search_end, struct btrfs_key *ins, u64 data);
+int btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans,
+                               struct btrfs_root *root, u64 parent,
+                               u64 root_objectid, u64 ref_generation,
+                               u64 owner, u64 owner_offset,
+                               struct btrfs_key *ins);
+int btrfs_alloc_logged_extent(struct btrfs_trans_handle *trans,
+                               struct btrfs_root *root, u64 parent,
+                               u64 root_objectid, u64 ref_generation,
+                               u64 owner, u64 owner_offset,
+                               struct btrfs_key *ins);
+int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
+                                 struct btrfs_root *root,
+                                 u64 num_bytes, u64 min_alloc_size,
+                                 u64 empty_size, u64 hint_byte,
+                                 u64 search_end, struct btrfs_key *ins,
+                                 u64 data);
+int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
+                 struct extent_buffer *orig_buf, struct extent_buffer *buf,
+                 u32 *nr_extents);
+int btrfs_cache_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
+                   struct extent_buffer *buf, u32 nr_extents);
+int btrfs_update_ref(struct btrfs_trans_handle *trans,
+                    struct btrfs_root *root, struct extent_buffer *orig_buf,
+                    struct extent_buffer *buf, int start_slot, int nr);
+int btrfs_free_extent(struct btrfs_trans_handle *trans,
+                     struct btrfs_root *root,
+                     u64 bytenr, u64 num_bytes, u64 parent,
+                     u64 root_objectid, u64 ref_generation,
+                     u64 owner_objectid, u64 owner_offset, int pin);
+int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len);
+int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
+                              struct btrfs_root *root,
+                              struct extent_io_tree *unpin);
+int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
+                        struct btrfs_root *root,
+                        u64 bytenr, u64 num_bytes, u64 parent,
+                        u64 root_objectid, u64 ref_generation,
+                        u64 owner, u64 owner_offset);
+int btrfs_update_extent_ref(struct btrfs_trans_handle *trans,
+                           struct btrfs_root *root, u64 bytenr,
+                           u64 orig_parent, u64 parent,
+                           u64 root_objectid, u64 ref_generation,
+                           u64 owner, u64 owner_offset);
+int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
+                                   struct btrfs_root *root);
+int btrfs_free_block_groups(struct btrfs_fs_info *info);
+int btrfs_read_block_groups(struct btrfs_root *root);
+int btrfs_make_block_group(struct btrfs_trans_handle *trans,
+                          struct btrfs_root *root, u64 bytes_used,
+                          u64 type, u64 chunk_objectid, u64 chunk_offset,
+                          u64 size);
+/* ctree.c */
+int btrfs_previous_item(struct btrfs_root *root,
+                       struct btrfs_path *path, u64 min_objectid,
+                       int type);
+int btrfs_set_item_key_safe(struct btrfs_trans_handle *trans,
+                           struct btrfs_root *root, struct btrfs_path *path,
+                           struct btrfs_key *new_key);
+struct extent_buffer *btrfs_root_node(struct btrfs_root *root);
+struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root);
+int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path,
+                       struct btrfs_key *key, int lowest_level,
+                       int cache_only, u64 min_trans);
+int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key,
+                        struct btrfs_key *max_key,
+                        struct btrfs_path *path, int cache_only,
+                        u64 min_trans);
+int btrfs_cow_block(struct btrfs_trans_handle *trans,
+                   struct btrfs_root *root, struct extent_buffer *buf,
+                   struct extent_buffer *parent, int parent_slot,
+                   struct extent_buffer **cow_ret, u64 prealloc_dest);
+int btrfs_copy_root(struct btrfs_trans_handle *trans,
+                     struct btrfs_root *root,
+                     struct extent_buffer *buf,
+                     struct extent_buffer **cow_ret, u64 new_root_objectid);
+int btrfs_extend_item(struct btrfs_trans_handle *trans, struct btrfs_root
+                     *root, struct btrfs_path *path, u32 data_size);
+int btrfs_truncate_item(struct btrfs_trans_handle *trans,
+                       struct btrfs_root *root,
+                       struct btrfs_path *path,
+                       u32 new_size, int from_end);
+int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
+                     *root, struct btrfs_key *key, struct btrfs_path *p, int
+                     ins_len, int cow);
+int btrfs_realloc_node(struct btrfs_trans_handle *trans,
+                      struct btrfs_root *root, struct extent_buffer *parent,
+                      int start_slot, int cache_only, u64 *last_ret,
+                      struct btrfs_key *progress);
+void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p);
+struct btrfs_path *btrfs_alloc_path(void);
+void btrfs_free_path(struct btrfs_path *p);
+void btrfs_init_path(struct btrfs_path *p);
+int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
+                  struct btrfs_path *path, int slot, int nr);
+
+static inline int btrfs_del_item(struct btrfs_trans_handle *trans,
+                                struct btrfs_root *root,
+                                struct btrfs_path *path)
+{
+       return btrfs_del_items(trans, root, path, path->slots[0], 1);
+}
+
+int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root
+                     *root, struct btrfs_key *key, void *data, u32 data_size);
+int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
+                            struct btrfs_root *root,
+                            struct btrfs_path *path,
+                            struct btrfs_key *cpu_key, u32 *data_size, int nr);
+
+static inline int btrfs_insert_empty_item(struct btrfs_trans_handle *trans,
+                                         struct btrfs_root *root,
+                                         struct btrfs_path *path,
+                                         struct btrfs_key *key,
+                                         u32 data_size)
+{
+       return btrfs_insert_empty_items(trans, root, path, key, &data_size, 1);
+}
+
+int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path);
+int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path);
+int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf);
+int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
+                       *root);
+/* root-item.c */
+int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root,
+                  struct btrfs_key *key);
+int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root
+                     *root, struct btrfs_key *key, struct btrfs_root_item
+                     *item);
+int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root
+                     *root, struct btrfs_key *key, struct btrfs_root_item
+                     *item);
+int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, struct
+                        btrfs_root_item *item, struct btrfs_key *key);
+int btrfs_search_root(struct btrfs_root *root, u64 search_start,
+                     u64 *found_objectid);
+int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid,
+                         struct btrfs_root *latest_root);
+/* dir-item.c */
+int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root
+                         *root, const char *name, int name_len, u64 dir,
+                         struct btrfs_key *location, u8 type, u64 index);
+struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans,
+                                            struct btrfs_root *root,
+                                            struct btrfs_path *path, u64 dir,
+                                            const char *name, int name_len,
+                                            int mod);
+struct btrfs_dir_item *
+btrfs_lookup_dir_index_item(struct btrfs_trans_handle *trans,
+                           struct btrfs_root *root,
+                           struct btrfs_path *path, u64 dir,
+                           u64 objectid, const char *name, int name_len,
+                           int mod);
+struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root,
+                             struct btrfs_path *path,
+                             const char *name, int name_len);
+int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans,
+                             struct btrfs_root *root,
+                             struct btrfs_path *path,
+                             struct btrfs_dir_item *di);
+int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans,
+                           struct btrfs_root *root, const char *name,
+                           u16 name_len, const void *data, u16 data_len,
+                           u64 dir);
+struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans,
+                                         struct btrfs_root *root,
+                                         struct btrfs_path *path, u64 dir,
+                                         const char *name, u16 name_len,
+                                         int mod);
+
+/* orphan.c */
+int btrfs_insert_orphan_item(struct btrfs_trans_handle *trans,
+                            struct btrfs_root *root, u64 offset);
+int btrfs_del_orphan_item(struct btrfs_trans_handle *trans,
+                         struct btrfs_root *root, u64 offset);
+
+/* inode-map.c */
+int btrfs_find_free_objectid(struct btrfs_trans_handle *trans,
+                            struct btrfs_root *fs_root,
+                            u64 dirid, u64 *objectid);
+int btrfs_find_highest_inode(struct btrfs_root *fs_root, u64 *objectid);
+
+/* inode-item.c */
+int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans,
+                          struct btrfs_root *root,
+                          const char *name, int name_len,
+                          u64 inode_objectid, u64 ref_objectid, u64 index);
+int btrfs_del_inode_ref(struct btrfs_trans_handle *trans,
+                          struct btrfs_root *root,
+                          const char *name, int name_len,
+                          u64 inode_objectid, u64 ref_objectid, u64 *index);
+int btrfs_insert_empty_inode(struct btrfs_trans_handle *trans,
+                            struct btrfs_root *root,
+                            struct btrfs_path *path, u64 objectid);
+int btrfs_lookup_inode(struct btrfs_trans_handle *trans, struct btrfs_root
+                      *root, struct btrfs_path *path,
+                      struct btrfs_key *location, int mod);
+
+/* file-item.c */
+int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode,
+                         struct bio *bio);
+int btrfs_insert_file_extent(struct btrfs_trans_handle *trans,
+                              struct btrfs_root *root,
+                              u64 objectid, u64 pos, u64 disk_offset,
+                              u64 disk_num_bytes,
+                            u64 num_bytes, u64 offset);
+int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans,
+                            struct btrfs_root *root,
+                            struct btrfs_path *path, u64 objectid,
+                            u64 bytenr, int mod);
+int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
+                          struct btrfs_root *root, struct inode *inode,
+                          struct btrfs_ordered_sum *sums);
+int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode,
+                      struct bio *bio);
+struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans,
+                                         struct btrfs_root *root,
+                                         struct btrfs_path *path,
+                                         u64 objectid, u64 offset,
+                                         int cow);
+int btrfs_csum_truncate(struct btrfs_trans_handle *trans,
+                       struct btrfs_root *root, struct btrfs_path *path,
+                       u64 isize);
+/* inode.c */
+
+/* RHEL and EL kernels have a patch that renames PG_checked to FsMisc */
+#if defined(ClearPageFsMisc) && !defined(ClearPageChecked)
+#define ClearPageChecked ClearPageFsMisc
+#define SetPageChecked SetPageFsMisc
+#define PageChecked PageFsMisc
+#endif
+
+int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
+                      struct btrfs_root *root,
+                      struct inode *dir, struct inode *inode,
+                      const char *name, int name_len);
+int btrfs_add_link(struct btrfs_trans_handle *trans,
+                  struct inode *parent_inode, struct inode *inode,
+                  const char *name, int name_len, int add_backref, u64 index);
+int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
+                              struct btrfs_root *root,
+                              struct inode *inode, u64 new_size,
+                              u32 min_type);
+
+int btrfs_start_delalloc_inodes(struct btrfs_root *root);
+int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end);
+int btrfs_writepages(struct address_space *mapping,
+                    struct writeback_control *wbc);
+int btrfs_create_subvol_root(struct btrfs_root *new_root,
+               struct btrfs_trans_handle *trans, u64 new_dirid,
+               struct btrfs_block_group_cache *block_group);
+
+void btrfs_invalidate_dcache_root(struct btrfs_root *root, char *name,
+                                 int namelen);
+
+int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
+                        size_t size, struct bio *bio);
+
+static inline void dec_i_blocks(struct inode *inode, u64 dec)
+{
+       dec = dec >> 9;
+       if (dec <= inode->i_blocks)
+               inode->i_blocks -= dec;
+       else
+               inode->i_blocks = 0;
+}
+
+unsigned long btrfs_force_ra(struct address_space *mapping,
+                             struct file_ra_state *ra, struct file *file,
+                             pgoff_t offset, pgoff_t last_index);
+int btrfs_check_free_space(struct btrfs_root *root, u64 num_required,
+                          int for_del);
+int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page);
+int btrfs_readpage(struct file *file, struct page *page);
+void btrfs_delete_inode(struct inode *inode);
+void btrfs_put_inode(struct inode *inode);
+void btrfs_read_locked_inode(struct inode *inode);
+int btrfs_write_inode(struct inode *inode, int wait);
+void btrfs_dirty_inode(struct inode *inode);
+struct inode *btrfs_alloc_inode(struct super_block *sb);
+void btrfs_destroy_inode(struct inode *inode);
+int btrfs_init_cachep(void);
+void btrfs_destroy_cachep(void);
+long btrfs_ioctl_trans_end(struct file *file);
+struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid,
+                               struct btrfs_root *root);
+struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
+                        struct btrfs_root *root, int *is_new);
+int btrfs_commit_write(struct file *file, struct page *page,
+                      unsigned from, unsigned to);
+struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,
+                                   size_t page_offset, u64 start, u64 end,
+                                   int create);
+int btrfs_update_inode(struct btrfs_trans_handle *trans,
+                             struct btrfs_root *root,
+                             struct inode *inode);
+
+/* ioctl.c */
+long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
+
+/* file.c */
+int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync);
+int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end);
+int btrfs_check_file(struct btrfs_root *root, struct inode *inode);
+extern struct file_operations btrfs_file_operations;
+int btrfs_drop_extents(struct btrfs_trans_handle *trans,
+                      struct btrfs_root *root, struct inode *inode,
+                      u64 start, u64 end, u64 inline_limit, u64 *hint_block);
+int btrfs_release_file(struct inode *inode, struct file *file);
+
+/* tree-defrag.c */
+int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
+                       struct btrfs_root *root, int cache_only);
+
+/* sysfs.c */
+int btrfs_init_sysfs(void);
+void btrfs_exit_sysfs(void);
+int btrfs_sysfs_add_super(struct btrfs_fs_info *fs);
+int btrfs_sysfs_add_root(struct btrfs_root *root);
+void btrfs_sysfs_del_root(struct btrfs_root *root);
+void btrfs_sysfs_del_super(struct btrfs_fs_info *root);
+
+/* xattr.c */
+ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size);
+
+/* super.c */
+u64 btrfs_parse_size(char *str);
+int btrfs_parse_options(struct btrfs_root *root, char *options);
+int btrfs_sync_fs(struct super_block *sb, int wait);
+
+/* acl.c */
+int btrfs_check_acl(struct inode *inode, int mask);
+int btrfs_init_acl(struct inode *inode, struct inode *dir);
+int btrfs_acl_chmod(struct inode *inode);
+
+/* free-space-cache.c */
+int btrfs_add_free_space(struct btrfs_block_group_cache *block_group,
+                        u64 bytenr, u64 size);
+int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
+                           u64 bytenr, u64 size);
+void btrfs_remove_free_space_cache(struct btrfs_block_group_cache
+                                  *block_group);
+struct btrfs_free_space *btrfs_find_free_space(struct btrfs_block_group_cache
+                                              *block_group, u64 offset,
+                                              u64 bytes);
+void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
+                          u64 bytes);
+u64 btrfs_block_group_free_space(struct btrfs_block_group_cache *block_group);
+#endif
diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c
new file mode 100644 (file)
index 0000000..e4f3009
--- /dev/null
@@ -0,0 +1,345 @@
+/*
+ * Copyright (C) 2007 Oracle.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include "ctree.h"
+#include "disk-io.h"
+#include "hash.h"
+#include "transaction.h"
+
+static struct btrfs_dir_item *insert_with_overflow(struct btrfs_trans_handle
+                                                  *trans,
+                                                  struct btrfs_root *root,
+                                                  struct btrfs_path *path,
+                                                  struct btrfs_key *cpu_key,
+                                                  u32 data_size,
+                                                  const char *name,
+                                                  int name_len)
+{
+       int ret;
+       char *ptr;
+       struct btrfs_item *item;
+       struct extent_buffer *leaf;
+
+       ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size);
+       if (ret == -EEXIST) {
+               struct btrfs_dir_item *di;
+               di = btrfs_match_dir_item_name(root, path, name, name_len);
+               if (di)
+                       return ERR_PTR(-EEXIST);
+               ret = btrfs_extend_item(trans, root, path, data_size);
+               WARN_ON(ret > 0);
+       }
+       if (ret < 0)
+               return ERR_PTR(ret);
+       WARN_ON(ret > 0);
+       leaf = path->nodes[0];
+       item = btrfs_item_nr(leaf, path->slots[0]);
+       ptr = btrfs_item_ptr(leaf, path->slots[0], char);
+       BUG_ON(data_size > btrfs_item_size(leaf, item));
+       ptr += btrfs_item_size(leaf, item) - data_size;
+       return (struct btrfs_dir_item *)ptr;
+}
+
+int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans,
+                           struct btrfs_root *root, const char *name,
+                           u16 name_len, const void *data, u16 data_len,
+                           u64 dir)
+{
+       int ret = 0;
+       struct btrfs_path *path;
+       struct btrfs_dir_item *dir_item;
+       unsigned long name_ptr, data_ptr;
+       struct btrfs_key key, location;
+       struct btrfs_disk_key disk_key;
+       struct extent_buffer *leaf;
+       u32 data_size;
+
+       key.objectid = dir;
+       btrfs_set_key_type(&key, BTRFS_XATTR_ITEM_KEY);
+       key.offset = btrfs_name_hash(name, name_len);
+       path = btrfs_alloc_path();
+       if (!path)
+               return -ENOMEM;
+       if (name_len + data_len + sizeof(struct btrfs_dir_item) >
+           BTRFS_LEAF_DATA_SIZE(root) - sizeof(struct btrfs_item))
+               return -ENOSPC;
+
+       data_size = sizeof(*dir_item) + name_len + data_len;
+       dir_item = insert_with_overflow(trans, root, path, &key, data_size,
+                                       name, name_len);
+       /*
+        * FIXME: at some point we should handle xattr's that are larger than
+        * what we can fit in our leaf.  We set location to NULL b/c we arent
+        * pointing at anything else, that will change if we store the xattr
+        * data in a separate inode.
+        */
+       BUG_ON(IS_ERR(dir_item));
+       memset(&location, 0, sizeof(location));
+
+       leaf = path->nodes[0];
+       btrfs_cpu_key_to_disk(&disk_key, &location);
+       btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
+       btrfs_set_dir_type(leaf, dir_item, BTRFS_FT_XATTR);
+       btrfs_set_dir_name_len(leaf, dir_item, name_len);
+       btrfs_set_dir_transid(leaf, dir_item, trans->transid);
+       btrfs_set_dir_data_len(leaf, dir_item, data_len);
+       name_ptr = (unsigned long)(dir_item + 1);
+       data_ptr = (unsigned long)((char *)name_ptr + name_len);
+
+       write_extent_buffer(leaf, name, name_ptr, name_len);
+       write_extent_buffer(leaf, data, data_ptr, data_len);
+       btrfs_mark_buffer_dirty(path->nodes[0]);
+
+       btrfs_free_path(path);
+       return ret;
+}
+
+int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root
+                         *root, const char *name, int name_len, u64 dir,
+                         struct btrfs_key *location, u8 type, u64 index)
+{
+       int ret = 0;
+       int ret2 = 0;
+       struct btrfs_path *path;
+       struct btrfs_dir_item *dir_item;
+       struct extent_buffer *leaf;
+       unsigned long name_ptr;
+       struct btrfs_key key;
+       struct btrfs_disk_key disk_key;
+       u32 data_size;
+
+       key.objectid = dir;
+       btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY);
+       key.offset = btrfs_name_hash(name, name_len);
+       path = btrfs_alloc_path();
+       data_size = sizeof(*dir_item) + name_len;
+       dir_item = insert_with_overflow(trans, root, path, &key, data_size,
+                                       name, name_len);
+       if (IS_ERR(dir_item)) {
+               ret = PTR_ERR(dir_item);
+               if (ret == -EEXIST)
+                       goto second_insert;
+               goto out;
+       }
+
+       leaf = path->nodes[0];
+       btrfs_cpu_key_to_disk(&disk_key, location);
+       btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
+       btrfs_set_dir_type(leaf, dir_item, type);
+       btrfs_set_dir_data_len(leaf, dir_item, 0);
+       btrfs_set_dir_name_len(leaf, dir_item, name_len);
+       btrfs_set_dir_transid(leaf, dir_item, trans->transid);
+       name_ptr = (unsigned long)(dir_item + 1);
+
+       write_extent_buffer(leaf, name, name_ptr, name_len);
+       btrfs_mark_buffer_dirty(leaf);
+
+second_insert:
+       /* FIXME, use some real flag for selecting the extra index */
+       if (root == root->fs_info->tree_root) {
+               ret = 0;
+               goto out;
+       }
+       btrfs_release_path(root, path);
+
+       btrfs_set_key_type(&key, BTRFS_DIR_INDEX_KEY);
+       key.offset = index;
+       dir_item = insert_with_overflow(trans, root, path, &key, data_size,
+                                       name, name_len);
+       if (IS_ERR(dir_item)) {
+               ret2 = PTR_ERR(dir_item);
+               goto out;
+       }
+       leaf = path->nodes[0];
+       btrfs_cpu_key_to_disk(&disk_key, location);
+       btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
+       btrfs_set_dir_type(leaf, dir_item, type);
+       btrfs_set_dir_data_len(leaf, dir_item, 0);
+       btrfs_set_dir_name_len(leaf, dir_item, name_len);
+       btrfs_set_dir_transid(leaf, dir_item, trans->transid);
+       name_ptr = (unsigned long)(dir_item + 1);
+       write_extent_buffer(leaf, name, name_ptr, name_len);
+       btrfs_mark_buffer_dirty(leaf);
+out:
+       btrfs_free_path(path);
+       if (ret)
+               return ret;
+       if (ret2)
+               return ret2;
+       return 0;
+}
+
+struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans,
+                                            struct btrfs_root *root,
+                                            struct btrfs_path *path, u64 dir,
+                                            const char *name, int name_len,
+                                            int mod)
+{
+       int ret;
+       struct btrfs_key key;
+       int ins_len = mod < 0 ? -1 : 0;
+       int cow = mod != 0;
+       struct btrfs_key found_key;
+       struct extent_buffer *leaf;
+
+       key.objectid = dir;
+       btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY);
+
+       key.offset = btrfs_name_hash(name, name_len);
+
+       ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow);
+       if (ret < 0)
+               return ERR_PTR(ret);
+       if (ret > 0) {
+               if (path->slots[0] == 0)
+                       return NULL;
+               path->slots[0]--;
+       }
+
+       leaf = path->nodes[0];
+       btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
+
+       if (found_key.objectid != dir ||
+           btrfs_key_type(&found_key) != BTRFS_DIR_ITEM_KEY ||
+           found_key.offset != key.offset)
+               return NULL;
+
+       return btrfs_match_dir_item_name(root, path, name, name_len);
+}
+
+struct btrfs_dir_item *
+btrfs_lookup_dir_index_item(struct btrfs_trans_handle *trans,
+                           struct btrfs_root *root,
+                           struct btrfs_path *path, u64 dir,
+                           u64 objectid, const char *name, int name_len,
+                           int mod)
+{
+       int ret;
+       struct btrfs_key key;
+       int ins_len = mod < 0 ? -1 : 0;
+       int cow = mod != 0;
+
+       key.objectid = dir;
+       btrfs_set_key_type(&key, BTRFS_DIR_INDEX_KEY);
+       key.offset = objectid;
+
+       ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow);
+       if (ret < 0)
+               return ERR_PTR(ret);
+       if (ret > 0)
+               return ERR_PTR(-ENOENT);
+       return btrfs_match_dir_item_name(root, path, name, name_len);
+}
+
+struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans,
+                                         struct btrfs_root *root,
+                                         struct btrfs_path *path, u64 dir,
+                                         const char *name, u16 name_len,
+                                         int mod)
+{
+       int ret;
+       struct btrfs_key key;
+       int ins_len = mod < 0 ? -1 : 0;
+       int cow = mod != 0;
+       struct btrfs_key found_key;
+       struct extent_buffer *leaf;
+
+       key.objectid = dir;
+       btrfs_set_key_type(&key, BTRFS_XATTR_ITEM_KEY);
+       key.offset = btrfs_name_hash(name, name_len);
+       ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow);
+       if (ret < 0)
+               return ERR_PTR(ret);
+       if (ret > 0) {
+               if (path->slots[0] == 0)
+                       return NULL;
+               path->slots[0]--;
+       }
+
+       leaf = path->nodes[0];
+       btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
+
+       if (found_key.objectid != dir ||
+           btrfs_key_type(&found_key) != BTRFS_XATTR_ITEM_KEY ||
+           found_key.offset != key.offset)
+               return NULL;
+
+       return btrfs_match_dir_item_name(root, path, name, name_len);
+}
+
+struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root,
+                             struct btrfs_path *path,
+                             const char *name, int name_len)
+{
+       struct btrfs_dir_item *dir_item;
+       unsigned long name_ptr;
+       u32 total_len;
+       u32 cur = 0;
+       u32 this_len;
+       struct extent_buffer *leaf;
+
+       leaf = path->nodes[0];
+       dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item);
+       total_len = btrfs_item_size_nr(leaf, path->slots[0]);
+       while(cur < total_len) {
+               this_len = sizeof(*dir_item) +
+                       btrfs_dir_name_len(leaf, dir_item) +
+                       btrfs_dir_data_len(leaf, dir_item);
+               name_ptr = (unsigned long)(dir_item + 1);
+
+               if (btrfs_dir_name_len(leaf, dir_item) == name_len &&
+                   memcmp_extent_buffer(leaf, name, name_ptr, name_len) == 0)
+                       return dir_item;
+
+               cur += this_len;
+               dir_item = (struct btrfs_dir_item *)((char *)dir_item +
+                                                    this_len);
+       }
+       return NULL;
+}
+
+int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans,
+                             struct btrfs_root *root,
+                             struct btrfs_path *path,
+                             struct btrfs_dir_item *di)
+{
+
+       struct extent_buffer *leaf;
+       u32 sub_item_len;
+       u32 item_len;
+       int ret = 0;
+
+       leaf = path->nodes[0];
+       sub_item_len = sizeof(*di) + btrfs_dir_name_len(leaf, di) +
+               btrfs_dir_data_len(leaf, di);
+       item_len = btrfs_item_size_nr(leaf, path->slots[0]);
+       if (sub_item_len == item_len) {
+               ret = btrfs_del_item(trans, root, path);
+       } else {
+               /* MARKER */
+               unsigned long ptr = (unsigned long)di;
+               unsigned long start;
+
+               start = btrfs_item_ptr_offset(leaf, path->slots[0]);
+               memmove_extent_buffer(leaf, ptr, ptr + sub_item_len,
+                       item_len - (ptr + sub_item_len - start));
+               ret = btrfs_truncate_item(trans, root, path,
+                                         item_len - sub_item_len, 1);
+       }
+       return 0;
+}
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
new file mode 100644 (file)
index 0000000..d35ca6a
--- /dev/null
@@ -0,0 +1,2056 @@
+/*
+ * Copyright (C) 2007 Oracle.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include <linux/version.h>
+#include <linux/fs.h>
+#include <linux/blkdev.h>
+#include <linux/scatterlist.h>
+#include <linux/swap.h>
+#include <linux/radix-tree.h>
+#include <linux/writeback.h>
+#include <linux/buffer_head.h> // for block_sync_page
+#include <linux/workqueue.h>
+#include <linux/kthread.h>
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,20)
+# include <linux/freezer.h>
+#else
+# include <linux/sched.h>
+#endif
+#include "crc32c.h"
+#include "ctree.h"
+#include "disk-io.h"
+#include "transaction.h"
+#include "btrfs_inode.h"
+#include "volumes.h"
+#include "print-tree.h"
+#include "async-thread.h"
+#include "locking.h"
+#include "ref-cache.h"
+#include "tree-log.h"
+
+#if 0
+static int check_tree_block(struct btrfs_root *root, struct extent_buffer *buf)
+{
+       if (extent_buffer_blocknr(buf) != btrfs_header_blocknr(buf)) {
+               printk(KERN_CRIT "buf blocknr(buf) is %llu, header is %llu\n",
+                      (unsigned long long)extent_buffer_blocknr(buf),
+                      (unsigned long long)btrfs_header_blocknr(buf));
+               return 1;
+       }
+       return 0;
+}
+#endif
+
+static struct extent_io_ops btree_extent_io_ops;
+static void end_workqueue_fn(struct btrfs_work *work);
+
+struct end_io_wq {
+       struct bio *bio;
+       bio_end_io_t *end_io;
+       void *private;
+       struct btrfs_fs_info *info;
+       int error;
+       int metadata;
+       struct list_head list;
+       struct btrfs_work work;
+};
+
+struct async_submit_bio {
+       struct inode *inode;
+       struct bio *bio;
+       struct list_head list;
+       extent_submit_bio_hook_t *submit_bio_hook;
+       int rw;
+       int mirror_num;
+       struct btrfs_work work;
+};
+
+struct extent_map *btree_get_extent(struct inode *inode, struct page *page,
+                                   size_t page_offset, u64 start, u64 len,
+                                   int create)
+{
+       struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
+       struct extent_map *em;
+       int ret;
+
+       spin_lock(&em_tree->lock);
+       em = lookup_extent_mapping(em_tree, start, len);
+       if (em) {
+               em->bdev =
+                       BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev;
+               spin_unlock(&em_tree->lock);
+               goto out;
+       }
+       spin_unlock(&em_tree->lock);
+
+       em = alloc_extent_map(GFP_NOFS);
+       if (!em) {
+               em = ERR_PTR(-ENOMEM);
+               goto out;
+       }
+       em->start = 0;
+       em->len = (u64)-1;
+       em->block_start = 0;
+       em->bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev;
+
+       spin_lock(&em_tree->lock);
+       ret = add_extent_mapping(em_tree, em);
+       if (ret == -EEXIST) {
+               u64 failed_start = em->start;
+               u64 failed_len = em->len;
+
+               printk("failed to insert %Lu %Lu -> %Lu into tree\n",
+                      em->start, em->len, em->block_start);
+               free_extent_map(em);
+               em = lookup_extent_mapping(em_tree, start, len);
+               if (em) {
+                       printk("after failing, found %Lu %Lu %Lu\n",
+                              em->start, em->len, em->block_start);
+                       ret = 0;
+               } else {
+                       em = lookup_extent_mapping(em_tree, failed_start,
+                                                  failed_len);
+                       if (em) {
+                               printk("double failure lookup gives us "
+                                      "%Lu %Lu -> %Lu\n", em->start,
+                                      em->len, em->block_start);
+                               free_extent_map(em);
+                       }
+                       ret = -EIO;
+               }
+       } else if (ret) {
+               free_extent_map(em);
+               em = NULL;
+       }
+       spin_unlock(&em_tree->lock);
+
+       if (ret)
+               em = ERR_PTR(ret);
+out:
+       return em;
+}
+
+u32 btrfs_csum_data(struct btrfs_root *root, char *data, u32 seed, size_t len)
+{
+       return btrfs_crc32c(seed, data, len);
+}
+
+void btrfs_csum_final(u32 crc, char *result)
+{
+       *(__le32 *)result = ~cpu_to_le32(crc);
+}
+
+static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf,
+                          int verify)
+{
+       char result[BTRFS_CRC32_SIZE];
+       unsigned long len;
+       unsigned long cur_len;
+       unsigned long offset = BTRFS_CSUM_SIZE;
+       char *map_token = NULL;
+       char *kaddr;
+       unsigned long map_start;
+       unsigned long map_len;
+       int err;
+       u32 crc = ~(u32)0;
+
+       len = buf->len - offset;
+       while(len > 0) {
+               err = map_private_extent_buffer(buf, offset, 32,
+                                       &map_token, &kaddr,
+                                       &map_start, &map_len, KM_USER0);
+               if (err) {
+                       printk("failed to map extent buffer! %lu\n",
+                              offset);
+                       return 1;
+               }
+               cur_len = min(len, map_len - (offset - map_start));
+               crc = btrfs_csum_data(root, kaddr + offset - map_start,
+                                     crc, cur_len);
+               len -= cur_len;
+               offset += cur_len;
+               unmap_extent_buffer(buf, map_token, KM_USER0);
+       }
+       btrfs_csum_final(crc, result);
+
+       if (verify) {
+               /* FIXME, this is not good */
+               if (memcmp_extent_buffer(buf, result, 0, BTRFS_CRC32_SIZE)) {
+                       u32 val;
+                       u32 found = 0;
+                       memcpy(&found, result, BTRFS_CRC32_SIZE);
+
+                       read_extent_buffer(buf, &val, 0, BTRFS_CRC32_SIZE);
+                       printk("btrfs: %s checksum verify failed on %llu "
+                              "wanted %X found %X level %d\n",
+                              root->fs_info->sb->s_id,
+                              buf->start, val, found, btrfs_header_level(buf));
+                       return 1;
+               }
+       } else {
+               write_extent_buffer(buf, result, 0, BTRFS_CRC32_SIZE);
+       }
+       return 0;
+}
+
+static int verify_parent_transid(struct extent_io_tree *io_tree,
+                                struct extent_buffer *eb, u64 parent_transid)
+{
+       int ret;
+
+       if (!parent_transid || btrfs_header_generation(eb) == parent_transid)
+               return 0;
+
+       lock_extent(io_tree, eb->start, eb->start + eb->len - 1, GFP_NOFS);
+       if (extent_buffer_uptodate(io_tree, eb) &&
+           btrfs_header_generation(eb) == parent_transid) {
+               ret = 0;
+               goto out;
+       }
+       printk("parent transid verify failed on %llu wanted %llu found %llu\n",
+              (unsigned long long)eb->start,
+              (unsigned long long)parent_transid,
+              (unsigned long long)btrfs_header_generation(eb));
+       ret = 1;
+       clear_extent_buffer_uptodate(io_tree, eb);
+out:
+       unlock_extent(io_tree, eb->start, eb->start + eb->len - 1,
+                     GFP_NOFS);
+       return ret;
+
+}
+
+static int btree_read_extent_buffer_pages(struct btrfs_root *root,
+                                         struct extent_buffer *eb,
+                                         u64 start, u64 parent_transid)
+{
+       struct extent_io_tree *io_tree;
+       int ret;
+       int num_copies = 0;
+       int mirror_num = 0;
+
+       io_tree = &BTRFS_I(root->fs_info->btree_inode)->io_tree;
+       while (1) {
+               ret = read_extent_buffer_pages(io_tree, eb, start, 1,
+                                              btree_get_extent, mirror_num);
+               if (!ret &&
+                   !verify_parent_transid(io_tree, eb, parent_transid))
+                       return ret;
+printk("read extent buffer pages failed with ret %d mirror no %d\n", ret, mirror_num);
+               num_copies = btrfs_num_copies(&root->fs_info->mapping_tree,
+                                             eb->start, eb->len);
+               if (num_copies == 1)
+                       return ret;
+
+               mirror_num++;
+               if (mirror_num > num_copies)
+                       return ret;
+       }
+       return -EIO;
+}
+
+int csum_dirty_buffer(struct btrfs_root *root, struct page *page)
+{
+       struct extent_io_tree *tree;
+       u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
+       u64 found_start;
+       int found_level;
+       unsigned long len;
+       struct extent_buffer *eb;
+       int ret;
+
+       tree = &BTRFS_I(page->mapping->host)->io_tree;
+
+       if (page->private == EXTENT_PAGE_PRIVATE)
+               goto out;
+       if (!page->private)
+               goto out;
+       len = page->private >> 2;
+       if (len == 0) {
+               WARN_ON(1);
+       }
+       eb = alloc_extent_buffer(tree, start, len, page, GFP_NOFS);
+       ret = btree_read_extent_buffer_pages(root, eb, start + PAGE_CACHE_SIZE,
+                                            btrfs_header_generation(eb));
+       BUG_ON(ret);
+       found_start = btrfs_header_bytenr(eb);
+       if (found_start != start) {
+               printk("warning: eb start incorrect %Lu buffer %Lu len %lu\n",
+                      start, found_start, len);
+               WARN_ON(1);
+               goto err;
+       }
+       if (eb->first_page != page) {
+               printk("bad first page %lu %lu\n", eb->first_page->index,
+                      page->index);
+               WARN_ON(1);
+               goto err;
+       }
+       if (!PageUptodate(page)) {
+               printk("csum not up to date page %lu\n", page->index);
+               WARN_ON(1);
+               goto err;
+       }
+       found_level = btrfs_header_level(eb);
+
+       csum_tree_block(root, eb, 0);
+err:
+       free_extent_buffer(eb);
+out:
+       return 0;
+}
+
+int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
+                              struct extent_state *state)
+{
+       struct extent_io_tree *tree;
+       u64 found_start;
+       int found_level;
+       unsigned long len;
+       struct extent_buffer *eb;
+       struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
+       int ret = 0;
+
+       tree = &BTRFS_I(page->mapping->host)->io_tree;
+       if (page->private == EXTENT_PAGE_PRIVATE)
+               goto out;
+       if (!page->private)
+               goto out;
+       len = page->private >> 2;
+       if (len == 0) {
+               WARN_ON(1);
+       }
+       eb = alloc_extent_buffer(tree, start, len, page, GFP_NOFS);
+
+       found_start = btrfs_header_bytenr(eb);
+       if (found_start != start) {
+               printk("bad tree block start %llu %llu\n",
+                      (unsigned long long)found_start,
+                      (unsigned long long)eb->start);
+               ret = -EIO;
+               goto err;
+       }
+       if (eb->first_page != page) {
+               printk("bad first page %lu %lu\n", eb->first_page->index,
+                      page->index);
+               WARN_ON(1);
+               ret = -EIO;
+               goto err;
+       }
+       if (memcmp_extent_buffer(eb, root->fs_info->fsid,
+                                (unsigned long)btrfs_header_fsid(eb),
+                                BTRFS_FSID_SIZE)) {
+               printk("bad fsid on block %Lu\n", eb->start);
+               ret = -EIO;
+               goto err;
+       }
+       found_level = btrfs_header_level(eb);
+
+       ret = csum_tree_block(root, eb, 1);
+       if (ret)
+               ret = -EIO;
+
+       end = min_t(u64, eb->len, PAGE_CACHE_SIZE);
+       end = eb->start + end - 1;
+err:
+       free_extent_buffer(eb);
+out:
+       return ret;
+}
+
+#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23)
+static void end_workqueue_bio(struct bio *bio, int err)
+#else
+static int end_workqueue_bio(struct bio *bio,
+                                  unsigned int bytes_done, int err)
+#endif
+{
+       struct end_io_wq *end_io_wq = bio->bi_private;
+       struct btrfs_fs_info *fs_info;
+
+#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
+       if (bio->bi_size)
+               return 1;
+#endif
+
+       fs_info = end_io_wq->info;
+       end_io_wq->error = err;
+       end_io_wq->work.func = end_workqueue_fn;
+       end_io_wq->work.flags = 0;
+       if (bio->bi_rw & (1 << BIO_RW))
+               btrfs_queue_worker(&fs_info->endio_write_workers,
+                                  &end_io_wq->work);
+       else
+               btrfs_queue_worker(&fs_info->endio_workers, &end_io_wq->work);
+
+#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
+       return 0;
+#endif
+}
+
+int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
+                       int metadata)
+{
+       struct end_io_wq *end_io_wq;
+       end_io_wq = kmalloc(sizeof(*end_io_wq), GFP_NOFS);
+       if (!end_io_wq)
+               return -ENOMEM;
+
+       end_io_wq->private = bio->bi_private;
+       end_io_wq->end_io = bio->bi_end_io;
+       end_io_wq->info = info;
+       end_io_wq->error = 0;
+       end_io_wq->bio = bio;
+       end_io_wq->metadata = metadata;
+
+       bio->bi_private = end_