--- /dev/null
+
+ NOTE! This copyright does *not* cover user programs that use kernel
+ services by normal system calls - this is merely considered normal use
+ of the kernel, and does *not* fall under the heading of "derived work".
+ Also note that the GPL below is copyrighted by the Free Software
+ Foundation, but the instance of code that it refers to (the Linux
+ kernel) is copyrighted by me and others who actually wrote it.
+
+ Also note that the only valid version of the GPL as far as the kernel
+ is concerned is _this_ particular version of the license (ie v2, not
+ v2.2 or v3.x or whatever), unless explicitly otherwise stated.
+
+ Linus Torvalds
+
+----------------------------------------
+
+ GNU GENERAL PUBLIC LICENSE
+ Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.
+ 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+ Preamble
+
+ The licenses for most software are designed to take away your
+freedom to share and change it. By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users. This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it. (Some other Free Software Foundation software is covered by
+the GNU Library General Public License instead.) You can apply it to
+your programs, too.
+
+ When we speak of free software, we are referring to freedom, not
+price. Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+ To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+ For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have. You must make sure that they, too, receive or can get the
+source code. And you must show them these terms so they know their
+rights.
+
+ We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+ Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software. If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+ Finally, any free program is threatened constantly by software
+patents. We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary. To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+ The precise terms and conditions for copying, distribution and
+modification follow.
+\f
+ GNU GENERAL PUBLIC LICENSE
+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+ 0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License. The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language. (Hereinafter, translation is included without limitation in
+the term "modification".) Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope. The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+ 1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+ 2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+ a) You must cause the modified files to carry prominent notices
+ stating that you changed the files and the date of any change.
+
+ b) You must cause any work that you distribute or publish, that in
+ whole or in part contains or is derived from the Program or any
+ part thereof, to be licensed as a whole at no charge to all third
+ parties under the terms of this License.
+
+ c) If the modified program normally reads commands interactively
+ when run, you must cause it, when started running for such
+ interactive use in the most ordinary way, to print or display an
+ announcement including an appropriate copyright notice and a
+ notice that there is no warranty (or else, saying that you provide
+ a warranty) and that users may redistribute the program under
+ these conditions, and telling the user how to view a copy of this
+ License. (Exception: if the Program itself is interactive but
+ does not normally print such an announcement, your work based on
+ the Program is not required to print an announcement.)
+\f
+These requirements apply to the modified work as a whole. If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works. But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+ 3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+ a) Accompany it with the complete corresponding machine-readable
+ source code, which must be distributed under the terms of Sections
+ 1 and 2 above on a medium customarily used for software interchange; or,
+
+ b) Accompany it with a written offer, valid for at least three
+ years, to give any third party, for a charge no more than your
+ cost of physically performing source distribution, a complete
+ machine-readable copy of the corresponding source code, to be
+ distributed under the terms of Sections 1 and 2 above on a medium
+ customarily used for software interchange; or,
+
+ c) Accompany it with the information you received as to the offer
+ to distribute corresponding source code. (This alternative is
+ allowed only for noncommercial distribution and only if you
+ received the program in object code or executable form with such
+ an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it. For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable. However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+\f
+ 4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License. Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+ 5. You are not required to accept this License, since you have not
+signed it. However, nothing else grants you permission to modify or
+distribute the Program or its derivative works. These actions are
+prohibited by law if you do not accept this License. Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+ 6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions. You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+ 7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all. For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices. Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+\f
+ 8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded. In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+ 9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time. Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation. If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+ 10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission. For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this. Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+ NO WARRANTY
+
+ 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+ 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+ END OF TERMS AND CONDITIONS
+\f
+ How to Apply These Terms to Your New Programs
+
+ If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+ To do so, attach the following notices to the program. It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+ <one line to give the program's name and a brief idea of what it does.>
+ Copyright (C) <year> <name of author>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+ Gnomovision version 69, Copyright (C) year name of author
+ Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+ This is free software, and you are welcome to redistribute it
+ under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License. Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary. Here is a sample; alter the names:
+
+ Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+ `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+ <signature of Ty Coon>, 1 April 1989
+ Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs. If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library. If this is what you want to do, use the GNU Library General
+Public License instead of this License.
--- /dev/null
+Install Instructions
+
+Btrfs puts snapshots and subvolumes into the root directory of the FS. This
+directory can only be changed by btrfsctl right now, and normal filesystem
+operations do not work on it. The default subvolume is called 'default',
+and you can create files and directories in mount_point/default
+
+Btrfs uses libcrc32c in the kernel for file and metadata checksums. You need
+to compile the kernel with:
+
+CONFIG_LIBCRC32C=m
+
+libcrc32c can be static as well. Once your kernel is setup, typing make in the
+btrfs module sources will build against the running kernel. When the build is
+complete:
+
+modprobe libcrc32c
+insmod btrfs.ko
+
+The Btrfs utility programs require libuuid to build. This can be found
+in the e2fsprogs sources, and is usually available as libuuid or
+e2fsprogs-devel from various distros.
+
+Building the utilities is just make ; make install. The programs go
+into /usr/local/bin. The commands available are:
+
+mkfs.btrfs: create a filesystem
+
+btrfsctl: control program to create snapshots and subvolumes:
+
+ mount /dev/sda2 /mnt
+ btrfsctl -s new_subvol_name /mnt
+ btrfsctl -s snapshot_of_default /mnt/default
+ btrfsctl -s snapshot_of_new_subvol /mnt/new_subvol_name
+ btrfsctl -s snapshot_of_a_snapshot /mnt/snapshot_of_new_subvol
+ ls /mnt
+ default snapshot_of_a_snapshot snapshot_of_new_subvol
+ new_subvol_name snapshot_of_default
+
+ Snapshots and subvolumes cannot be deleted right now, but you can
+ rm -rf all the files and directories inside them.
+
+btrfsck: do a limited check of the FS extent trees.</li>
+
+debug-tree: print all of the FS metadata in text form. Example:
+
+ debug-tree /dev/sda2 >& big_output_file
+
--- /dev/null
+ifneq ($(KERNELRELEASE),)
+# kbuild part of makefile
+
+obj-m := btrfs.o
+btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
+ file-item.o inode-item.o inode-map.o disk-io.o \
+ transaction.o bit-radix.o inode.o file.o tree-defrag.o \
+ extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \
+ extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \
+ ref-cache.o export.o tree-log.o acl.o free-space-cache.o
+else
+
+# Normal Makefile
+
+KERNELDIR := /lib/modules/`uname -r`/build
+all: version
+ $(MAKE) -C $(KERNELDIR) M=`pwd` modules
+
+version:
+ bash version.sh
+
+modules_install:
+ $(MAKE) -C $(KERNELDIR) M=`pwd` modules_install
+clean:
+ $(MAKE) -C $(KERNELDIR) M=`pwd` clean
+
+tester:
+ $(MAKE) -C $(KERNELDIR) M=`pwd` tree-defrag.o transaction.o sysfs.o super.o root-tree.o inode-map.o inode-item.o inode.o file-item.o file.o extent_map.o disk-io.o ctree.o dir-item.o extent-tree.o
+endif
--- /dev/null
+* cleanup, add more error checking, get rid of BUG_ONs
+* Fix ENOSPC handling
+* Make allocator smarter
+* add a block group to struct inode
+* Do actual block accounting
+* Check compat and incompat flags on the inode
+* Get rid of struct ctree_path, limiting tree levels held at one time
+* Add generation number to key pointer in nodes
+* Add generation number to inode
+* forbid cross subvolume renames and hardlinks
+* Release
+* Do real tree locking
+* Add extent mirroring (backup copies of blocks)
+* Add fancy interface to get access to incremental backups
+* Add fancy striped extents to make big reads faster
+* Use relocation to try and fix write errors
+* Make allocator much smarter
+* xattrs (directory streams for regular files)
+* Scrub & defrag
+
--- /dev/null
+/*
+ * Copyright (C) 2007 Red Hat. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include <linux/fs.h>
+#include <linux/string.h>
+#include <linux/xattr.h>
+#include <linux/posix_acl_xattr.h>
+#include <linux/posix_acl.h>
+#include <linux/sched.h>
+
+#include "ctree.h"
+#include "btrfs_inode.h"
+#include "xattr.h"
+
+#ifdef CONFIG_FS_POSIX_ACL
+
+static void btrfs_update_cached_acl(struct inode *inode,
+ struct posix_acl **p_acl,
+ struct posix_acl *acl)
+{
+ spin_lock(&inode->i_lock);
+ if (*p_acl && *p_acl != BTRFS_ACL_NOT_CACHED)
+ posix_acl_release(*p_acl);
+ *p_acl = posix_acl_dup(acl);
+ spin_unlock(&inode->i_lock);
+}
+
+static struct posix_acl *btrfs_get_acl(struct inode *inode, int type)
+{
+ int size;
+ const char *name;
+ char *value = NULL;
+ struct posix_acl *acl = NULL, **p_acl;
+
+ switch (type) {
+ case ACL_TYPE_ACCESS:
+ name = POSIX_ACL_XATTR_ACCESS;
+ p_acl = &BTRFS_I(inode)->i_acl;
+ break;
+ case ACL_TYPE_DEFAULT:
+ name = POSIX_ACL_XATTR_DEFAULT;
+ p_acl = &BTRFS_I(inode)->i_default_acl;
+ break;
+ default:
+ return ERR_PTR(-EINVAL);
+ }
+
+ spin_lock(&inode->i_lock);
+ if (*p_acl != BTRFS_ACL_NOT_CACHED)
+ acl = posix_acl_dup(*p_acl);
+ spin_unlock(&inode->i_lock);
+
+ if (acl)
+ return acl;
+
+
+ size = __btrfs_getxattr(inode, name, "", 0);
+ if (size > 0) {
+ value = kzalloc(size, GFP_NOFS);
+ if (!value)
+ return ERR_PTR(-ENOMEM);
+ size = __btrfs_getxattr(inode, name, value, size);
+ if (size > 0) {
+ acl = posix_acl_from_xattr(value, size);
+ btrfs_update_cached_acl(inode, p_acl, acl);
+ }
+ kfree(value);
+ } else if (size == -ENOENT) {
+ acl = NULL;
+ btrfs_update_cached_acl(inode, p_acl, acl);
+ }
+
+ return acl;
+}
+
+static int btrfs_xattr_get_acl(struct inode *inode, int type,
+ void *value, size_t size)
+{
+ struct posix_acl *acl;
+ int ret = 0;
+
+ acl = btrfs_get_acl(inode, type);
+
+ if (IS_ERR(acl))
+ return PTR_ERR(acl);
+ if (acl == NULL)
+ return -ENODATA;
+ ret = posix_acl_to_xattr(acl, value, size);
+ posix_acl_release(acl);
+
+ return ret;
+}
+
+/*
+ * Needs to be called with fs_mutex held
+ */
+static int btrfs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
+{
+ int ret, size = 0;
+ const char *name;
+ struct posix_acl **p_acl;
+ char *value = NULL;
+ mode_t mode;
+
+ if (acl) {
+ ret = posix_acl_valid(acl);
+ if (ret < 0)
+ return ret;
+ ret = 0;
+ }
+
+ switch (type) {
+ case ACL_TYPE_ACCESS:
+ mode = inode->i_mode;
+ ret = posix_acl_equiv_mode(acl, &mode);
+ if (ret < 0)
+ return ret;
+ ret = 0;
+ inode->i_mode = mode;
+ name = POSIX_ACL_XATTR_ACCESS;
+ p_acl = &BTRFS_I(inode)->i_acl;
+ break;
+ case ACL_TYPE_DEFAULT:
+ if (!S_ISDIR(inode->i_mode))
+ return acl ? -EINVAL : 0;
+ name = POSIX_ACL_XATTR_DEFAULT;
+ p_acl = &BTRFS_I(inode)->i_default_acl;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ if (acl) {
+ size = posix_acl_xattr_size(acl->a_count);
+ value = kmalloc(size, GFP_NOFS);
+ if (!value) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ ret = posix_acl_to_xattr(acl, value, size);
+ if (ret < 0)
+ goto out;
+ }
+
+ ret = __btrfs_setxattr(inode, name, value, size, 0);
+
+out:
+ if (value)
+ kfree(value);
+
+ if (!ret)
+ btrfs_update_cached_acl(inode, p_acl, acl);
+
+ return ret;
+}
+
+static int btrfs_xattr_set_acl(struct inode *inode, int type,
+ const void *value, size_t size)
+{
+ int ret = 0;
+ struct posix_acl *acl = NULL;
+
+ if (value) {
+ acl = posix_acl_from_xattr(value, size);
+ if (acl == NULL) {
+ value = NULL;
+ size = 0;
+ } else if (IS_ERR(acl)) {
+ return PTR_ERR(acl);
+ }
+ }
+
+ ret = btrfs_set_acl(inode, acl, type);
+
+ posix_acl_release(acl);
+
+ return ret;
+}
+
+
+static int btrfs_xattr_acl_access_get(struct inode *inode, const char *name,
+ void *value, size_t size)
+{
+ return btrfs_xattr_get_acl(inode, ACL_TYPE_ACCESS, value, size);
+}
+
+static int btrfs_xattr_acl_access_set(struct inode *inode, const char *name,
+ const void *value, size_t size, int flags)
+{
+ return btrfs_xattr_set_acl(inode, ACL_TYPE_ACCESS, value, size);
+}
+
+static int btrfs_xattr_acl_default_get(struct inode *inode, const char *name,
+ void *value, size_t size)
+{
+ return btrfs_xattr_get_acl(inode, ACL_TYPE_DEFAULT, value, size);
+}
+
+static int btrfs_xattr_acl_default_set(struct inode *inode, const char *name,
+ const void *value, size_t size, int flags)
+{
+ return btrfs_xattr_set_acl(inode, ACL_TYPE_DEFAULT, value, size);
+}
+
+int btrfs_check_acl(struct inode *inode, int mask)
+{
+ struct posix_acl *acl;
+ int error = -EAGAIN;
+
+ acl = btrfs_get_acl(inode, ACL_TYPE_ACCESS);
+
+ if (IS_ERR(acl))
+ return PTR_ERR(acl);
+ if (acl) {
+ error = posix_acl_permission(inode, acl, mask);
+ posix_acl_release(acl);
+ }
+
+ return error;
+}
+
+/*
+ * btrfs_init_acl is already generally called under fs_mutex, so the locking
+ * stuff has been fixed to work with that. If the locking stuff changes, we
+ * need to re-evaluate the acl locking stuff.
+ */
+int btrfs_init_acl(struct inode *inode, struct inode *dir)
+{
+ struct posix_acl *acl = NULL;
+ int ret = 0;
+
+ /* this happens with subvols */
+ if (!dir)
+ return 0;
+
+ if (!S_ISLNK(inode->i_mode)) {
+ if (IS_POSIXACL(dir)) {
+ acl = btrfs_get_acl(dir, ACL_TYPE_DEFAULT);
+ if (IS_ERR(acl))
+ return PTR_ERR(acl);
+ }
+
+ if (!acl)
+ inode->i_mode &= ~current->fs->umask;
+ }
+
+ if (IS_POSIXACL(dir) && acl) {
+ struct posix_acl *clone;
+ mode_t mode;
+
+ if (S_ISDIR(inode->i_mode)) {
+ ret = btrfs_set_acl(inode, acl, ACL_TYPE_DEFAULT);
+ if (ret)
+ goto failed;
+ }
+ clone = posix_acl_clone(acl, GFP_NOFS);
+ ret = -ENOMEM;
+ if (!clone)
+ goto failed;
+
+ mode = inode->i_mode;
+ ret = posix_acl_create_masq(clone, &mode);
+ if (ret >= 0) {
+ inode->i_mode = mode;
+ if (ret > 0) {
+ /* we need an acl */
+ ret = btrfs_set_acl(inode, clone,
+ ACL_TYPE_ACCESS);
+ }
+ }
+ }
+failed:
+ posix_acl_release(acl);
+
+ return ret;
+}
+
+int btrfs_acl_chmod(struct inode *inode)
+{
+ struct posix_acl *acl, *clone;
+ int ret = 0;
+
+ if (S_ISLNK(inode->i_mode))
+ return -EOPNOTSUPP;
+
+ if (!IS_POSIXACL(inode))
+ return 0;
+
+ acl = btrfs_get_acl(inode, ACL_TYPE_ACCESS);
+ if (IS_ERR(acl) || !acl)
+ return PTR_ERR(acl);
+
+ clone = posix_acl_clone(acl, GFP_KERNEL);
+ posix_acl_release(acl);
+ if (!clone)
+ return -ENOMEM;
+
+ ret = posix_acl_chmod_masq(clone, inode->i_mode);
+ if (!ret)
+ ret = btrfs_set_acl(inode, clone, ACL_TYPE_ACCESS);
+
+ posix_acl_release(clone);
+
+ return ret;
+}
+
+struct xattr_handler btrfs_xattr_acl_default_handler = {
+ .prefix = POSIX_ACL_XATTR_DEFAULT,
+ .get = btrfs_xattr_acl_default_get,
+ .set = btrfs_xattr_acl_default_set,
+};
+
+struct xattr_handler btrfs_xattr_acl_access_handler = {
+ .prefix = POSIX_ACL_XATTR_ACCESS,
+ .get = btrfs_xattr_acl_access_get,
+ .set = btrfs_xattr_acl_access_set,
+};
+
+#else /* CONFIG_FS_POSIX_ACL */
+
+int btrfs_acl_chmod(struct inode *inode)
+{
+ return 0;
+}
+
+int btrfs_init_acl(struct inode *inode, struct inode *dir)
+{
+ return 0;
+}
+
+int btrfs_check_acl(struct inode *inode, int mask)
+{
+ return 0;
+}
+
+#endif /* CONFIG_FS_POSIX_ACL */
--- /dev/null
+/*
+ * Copyright (C) 2007 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include <linux/version.h>
+#include <linux/kthread.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,20)
+# include <linux/freezer.h>
+#else
+# include <linux/sched.h>
+#endif
+
+#include "async-thread.h"
+
+/*
+ * container for the kthread task pointer and the list of pending work
+ * One of these is allocated per thread.
+ */
+struct btrfs_worker_thread {
+ /* pool we belong to */
+ struct btrfs_workers *workers;
+
+ /* list of struct btrfs_work that are waiting for service */
+ struct list_head pending;
+
+ /* list of worker threads from struct btrfs_workers */
+ struct list_head worker_list;
+
+ /* kthread */
+ struct task_struct *task;
+
+ /* number of things on the pending list */
+ atomic_t num_pending;
+
+ unsigned long sequence;
+
+ /* protects the pending list. */
+ spinlock_t lock;
+
+ /* set to non-zero when this thread is already awake and kicking */
+ int working;
+
+ /* are we currently idle */
+ int idle;
+};
+
+/*
+ * helper function to move a thread onto the idle list after it
+ * has finished some requests.
+ */
+static void check_idle_worker(struct btrfs_worker_thread *worker)
+{
+ if (!worker->idle && atomic_read(&worker->num_pending) <
+ worker->workers->idle_thresh / 2) {
+ unsigned long flags;
+ spin_lock_irqsave(&worker->workers->lock, flags);
+ worker->idle = 1;
+ list_move(&worker->worker_list, &worker->workers->idle_list);
+ spin_unlock_irqrestore(&worker->workers->lock, flags);
+ }
+}
+
+/*
+ * helper function to move a thread off the idle list after new
+ * pending work is added.
+ */
+static void check_busy_worker(struct btrfs_worker_thread *worker)
+{
+ if (worker->idle && atomic_read(&worker->num_pending) >=
+ worker->workers->idle_thresh) {
+ unsigned long flags;
+ spin_lock_irqsave(&worker->workers->lock, flags);
+ worker->idle = 0;
+ list_move_tail(&worker->worker_list,
+ &worker->workers->worker_list);
+ spin_unlock_irqrestore(&worker->workers->lock, flags);
+ }
+}
+
+/*
+ * main loop for servicing work items
+ */
+static int worker_loop(void *arg)
+{
+ struct btrfs_worker_thread *worker = arg;
+ struct list_head *cur;
+ struct btrfs_work *work;
+ do {
+ spin_lock_irq(&worker->lock);
+ while(!list_empty(&worker->pending)) {
+ cur = worker->pending.next;
+ work = list_entry(cur, struct btrfs_work, list);
+ list_del(&work->list);
+ clear_bit(0, &work->flags);
+
+ work->worker = worker;
+ spin_unlock_irq(&worker->lock);
+
+ work->func(work);
+
+ atomic_dec(&worker->num_pending);
+ spin_lock_irq(&worker->lock);
+ check_idle_worker(worker);
+ }
+ worker->working = 0;
+ if (freezing(current)) {
+ refrigerator();
+ } else {
+ set_current_state(TASK_INTERRUPTIBLE);
+ spin_unlock_irq(&worker->lock);
+ schedule();
+ __set_current_state(TASK_RUNNING);
+ }
+ } while (!kthread_should_stop());
+ return 0;
+}
+
+/*
+ * this will wait for all the worker threads to shutdown
+ */
+int btrfs_stop_workers(struct btrfs_workers *workers)
+{
+ struct list_head *cur;
+ struct btrfs_worker_thread *worker;
+
+ list_splice_init(&workers->idle_list, &workers->worker_list);
+ while(!list_empty(&workers->worker_list)) {
+ cur = workers->worker_list.next;
+ worker = list_entry(cur, struct btrfs_worker_thread,
+ worker_list);
+ kthread_stop(worker->task);
+ list_del(&worker->worker_list);
+ kfree(worker);
+ }
+ return 0;
+}
+
+/*
+ * simple init on struct btrfs_workers
+ */
+void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max)
+{
+ workers->num_workers = 0;
+ INIT_LIST_HEAD(&workers->worker_list);
+ INIT_LIST_HEAD(&workers->idle_list);
+ spin_lock_init(&workers->lock);
+ workers->max_workers = max;
+ workers->idle_thresh = 32;
+ workers->name = name;
+}
+
+/*
+ * starts new worker threads. This does not enforce the max worker
+ * count in case you need to temporarily go past it.
+ */
+int btrfs_start_workers(struct btrfs_workers *workers, int num_workers)
+{
+ struct btrfs_worker_thread *worker;
+ int ret = 0;
+ int i;
+
+ for (i = 0; i < num_workers; i++) {
+ worker = kzalloc(sizeof(*worker), GFP_NOFS);
+ if (!worker) {
+ ret = -ENOMEM;
+ goto fail;
+ }
+
+ INIT_LIST_HEAD(&worker->pending);
+ INIT_LIST_HEAD(&worker->worker_list);
+ spin_lock_init(&worker->lock);
+ atomic_set(&worker->num_pending, 0);
+ worker->task = kthread_run(worker_loop, worker,
+ "btrfs-%s-%d", workers->name,
+ workers->num_workers + i);
+ worker->workers = workers;
+ if (IS_ERR(worker->task)) {
+ kfree(worker);
+ ret = PTR_ERR(worker->task);
+ goto fail;
+ }
+
+ spin_lock_irq(&workers->lock);
+ list_add_tail(&worker->worker_list, &workers->idle_list);
+ worker->idle = 1;
+ workers->num_workers++;
+ spin_unlock_irq(&workers->lock);
+ }
+ return 0;
+fail:
+ btrfs_stop_workers(workers);
+ return ret;
+}
+
+/*
+ * run through the list and find a worker thread that doesn't have a lot
+ * to do right now. This can return null if we aren't yet at the thread
+ * count limit and all of the threads are busy.
+ */
+static struct btrfs_worker_thread *next_worker(struct btrfs_workers *workers)
+{
+ struct btrfs_worker_thread *worker;
+ struct list_head *next;
+ int enforce_min = workers->num_workers < workers->max_workers;
+
+ /*
+ * if we find an idle thread, don't move it to the end of the
+ * idle list. This improves the chance that the next submission
+ * will reuse the same thread, and maybe catch it while it is still
+ * working
+ */
+ if (!list_empty(&workers->idle_list)) {
+ next = workers->idle_list.next;
+ worker = list_entry(next, struct btrfs_worker_thread,
+ worker_list);
+ return worker;
+ }
+ if (enforce_min || list_empty(&workers->worker_list))
+ return NULL;
+
+ /*
+ * if we pick a busy task, move the task to the end of the list.
+ * hopefully this will keep things somewhat evenly balanced
+ */
+ next = workers->worker_list.next;
+ worker = list_entry(next, struct btrfs_worker_thread, worker_list);
+ atomic_inc(&worker->num_pending);
+ worker->sequence++;
+ if (worker->sequence % workers->idle_thresh == 0)
+ list_move_tail(next, &workers->worker_list);
+ return worker;
+}
+
+static struct btrfs_worker_thread *find_worker(struct btrfs_workers *workers)
+{
+ struct btrfs_worker_thread *worker;
+ unsigned long flags;
+
+again:
+ spin_lock_irqsave(&workers->lock, flags);
+ worker = next_worker(workers);
+ spin_unlock_irqrestore(&workers->lock, flags);
+
+ if (!worker) {
+ spin_lock_irqsave(&workers->lock, flags);
+ if (workers->num_workers >= workers->max_workers) {
+ struct list_head *fallback = NULL;
+ /*
+ * we have failed to find any workers, just
+ * return the force one
+ */
+ if (!list_empty(&workers->worker_list))
+ fallback = workers->worker_list.next;
+ if (!list_empty(&workers->idle_list))
+ fallback = workers->idle_list.next;
+ BUG_ON(!fallback);
+ worker = list_entry(fallback,
+ struct btrfs_worker_thread, worker_list);
+ spin_unlock_irqrestore(&workers->lock, flags);
+ } else {
+ spin_unlock_irqrestore(&workers->lock, flags);
+ /* we're below the limit, start another worker */
+ btrfs_start_workers(workers, 1);
+ goto again;
+ }
+ }
+ return worker;
+}
+
+/*
+ * btrfs_requeue_work just puts the work item back on the tail of the list
+ * it was taken from. It is intended for use with long running work functions
+ * that make some progress and want to give the cpu up for others.
+ */
+int btrfs_requeue_work(struct btrfs_work *work)
+{
+ struct btrfs_worker_thread *worker = work->worker;
+ unsigned long flags;
+
+ if (test_and_set_bit(0, &work->flags))
+ goto out;
+
+ spin_lock_irqsave(&worker->lock, flags);
+ atomic_inc(&worker->num_pending);
+ list_add_tail(&work->list, &worker->pending);
+ check_busy_worker(worker);
+ spin_unlock_irqrestore(&worker->lock, flags);
+out:
+ return 0;
+}
+
+/*
+ * places a struct btrfs_work into the pending queue of one of the kthreads
+ */
+int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work)
+{
+ struct btrfs_worker_thread *worker;
+ unsigned long flags;
+ int wake = 0;
+
+ /* don't requeue something already on a list */
+ if (test_and_set_bit(0, &work->flags))
+ goto out;
+
+ worker = find_worker(workers);
+
+ spin_lock_irqsave(&worker->lock, flags);
+ atomic_inc(&worker->num_pending);
+ check_busy_worker(worker);
+ list_add_tail(&work->list, &worker->pending);
+
+ /*
+ * avoid calling into wake_up_process if this thread has already
+ * been kicked
+ */
+ if (!worker->working)
+ wake = 1;
+ worker->working = 1;
+
+ spin_unlock_irqrestore(&worker->lock, flags);
+
+ if (wake)
+ wake_up_process(worker->task);
+out:
+ return 0;
+}
--- /dev/null
+/*
+ * Copyright (C) 2007 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#ifndef __BTRFS_ASYNC_THREAD_
+#define __BTRFS_ASYNC_THREAD_
+
+struct btrfs_worker_thread;
+
+/*
+ * This is similar to a workqueue, but it is meant to spread the operations
+ * across all available cpus instead of just the CPU that was used to
+ * queue the work. There is also some batching introduced to try and
+ * cut down on context switches.
+ *
+ * By default threads are added on demand up to 2 * the number of cpus.
+ * Changing struct btrfs_workers->max_workers is one way to prevent
+ * demand creation of kthreads.
+ *
+ * the basic model of these worker threads is to embed a btrfs_work
+ * structure in your own data struct, and use container_of in a
+ * work function to get back to your data struct.
+ */
+struct btrfs_work {
+ /*
+ * only func should be set to the function you want called
+ * your work struct is passed as the only arg
+ */
+ void (*func)(struct btrfs_work *work);
+
+ /*
+ * flags should be set to zero. It is used to make sure the
+ * struct is only inserted once into the list.
+ */
+ unsigned long flags;
+
+ /* don't touch these */
+ struct btrfs_worker_thread *worker;
+ struct list_head list;
+};
+
+struct btrfs_workers {
+ /* current number of running workers */
+ int num_workers;
+
+ /* max number of workers allowed. changed by btrfs_start_workers */
+ int max_workers;
+
+ /* once a worker has this many requests or fewer, it is idle */
+ int idle_thresh;
+
+ /* list with all the work threads */
+ struct list_head worker_list;
+ struct list_head idle_list;
+
+ /* lock for finding the next worker thread to queue on */
+ spinlock_t lock;
+
+ /* extra name for this worker */
+ char *name;
+};
+
+int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work);
+int btrfs_start_workers(struct btrfs_workers *workers, int num_workers);
+int btrfs_stop_workers(struct btrfs_workers *workers);
+void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max);
+int btrfs_requeue_work(struct btrfs_work *work);
+#endif
--- /dev/null
+/*
+ * Copyright (C) 2007 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include "bit-radix.h"
+
+#define BIT_ARRAY_BYTES 256
+#define BIT_RADIX_BITS_PER_ARRAY ((BIT_ARRAY_BYTES - sizeof(unsigned long)) * 8)
+
+extern struct kmem_cache *btrfs_bit_radix_cachep;
+int set_radix_bit(struct radix_tree_root *radix, unsigned long bit)
+{
+ unsigned long *bits;
+ unsigned long slot;
+ int bit_slot;
+ int ret;
+
+ slot = bit / BIT_RADIX_BITS_PER_ARRAY;
+ bit_slot = bit % BIT_RADIX_BITS_PER_ARRAY;
+
+ bits = radix_tree_lookup(radix, slot);
+ if (!bits) {
+ bits = kmem_cache_alloc(btrfs_bit_radix_cachep, GFP_NOFS);
+ if (!bits)
+ return -ENOMEM;
+ memset(bits + 1, 0, BIT_ARRAY_BYTES - sizeof(unsigned long));
+ bits[0] = slot;
+ ret = radix_tree_insert(radix, slot, bits);
+ if (ret)
+ return ret;
+ }
+ ret = test_and_set_bit(bit_slot, bits + 1);
+ if (ret < 0)
+ ret = 1;
+ return ret;
+}
+
+int test_radix_bit(struct radix_tree_root *radix, unsigned long bit)
+{
+ unsigned long *bits;
+ unsigned long slot;
+ int bit_slot;
+
+ slot = bit / BIT_RADIX_BITS_PER_ARRAY;
+ bit_slot = bit % BIT_RADIX_BITS_PER_ARRAY;
+
+ bits = radix_tree_lookup(radix, slot);
+ if (!bits)
+ return 0;
+ return test_bit(bit_slot, bits + 1);
+}
+
+int clear_radix_bit(struct radix_tree_root *radix, unsigned long bit)
+{
+ unsigned long *bits;
+ unsigned long slot;
+ int bit_slot;
+ int i;
+ int empty = 1;
+
+ slot = bit / BIT_RADIX_BITS_PER_ARRAY;
+ bit_slot = bit % BIT_RADIX_BITS_PER_ARRAY;
+
+ bits = radix_tree_lookup(radix, slot);
+ if (!bits)
+ return 0;
+ clear_bit(bit_slot, bits + 1);
+ for (i = 1; i < BIT_ARRAY_BYTES / sizeof(unsigned long); i++) {
+ if (bits[i]) {
+ empty = 0;
+ break;
+ }
+ }
+ if (empty) {
+ bits = radix_tree_delete(radix, slot);
+ BUG_ON(!bits);
+ kmem_cache_free(btrfs_bit_radix_cachep, bits);
+ }
+ return 0;
+}
+
+int find_first_radix_bit(struct radix_tree_root *radix, unsigned long *retbits,
+ unsigned long start, int nr)
+{
+ unsigned long *bits;
+ unsigned long *gang[4];
+ int found;
+ int ret;
+ int i;
+ int total_found = 0;
+ unsigned long slot;
+
+ slot = start / BIT_RADIX_BITS_PER_ARRAY;
+ ret = radix_tree_gang_lookup(radix, (void **)gang, slot,
+ ARRAY_SIZE(gang));
+ found = start % BIT_RADIX_BITS_PER_ARRAY;
+ for (i = 0; i < ret && nr > 0; i++) {
+ bits = gang[i];
+ while(nr > 0) {
+ found = find_next_bit(bits + 1,
+ BIT_RADIX_BITS_PER_ARRAY,
+ found);
+ if (found < BIT_RADIX_BITS_PER_ARRAY) {
+ *retbits = bits[0] *
+ BIT_RADIX_BITS_PER_ARRAY + found;
+ retbits++;
+ nr--;
+ total_found++;
+ found++;
+ } else
+ break;
+ }
+ found = 0;
+ }
+ return total_found;
+}
--- /dev/null
+/*
+ * Copyright (C) 2007 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#ifndef __BIT_RADIX__
+#define __BIT_RADIX__
+#include <linux/radix-tree.h>
+
+int set_radix_bit(struct radix_tree_root *radix, unsigned long bit);
+int test_radix_bit(struct radix_tree_root *radix, unsigned long bit);
+int clear_radix_bit(struct radix_tree_root *radix, unsigned long bit);
+int find_first_radix_bit(struct radix_tree_root *radix, unsigned long *retbits,
+ unsigned long start, int nr);
+
+static inline void init_bit_radix(struct radix_tree_root *radix)
+{
+ INIT_RADIX_TREE(radix, GFP_NOFS);
+}
+#endif
--- /dev/null
+/*
+ * Copyright (C) 2007 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#ifndef __BTRFS_I__
+#define __BTRFS_I__
+
+#include "extent_map.h"
+#include "extent_io.h"
+#include "ordered-data.h"
+
+/* in memory btrfs inode */
+struct btrfs_inode {
+ struct btrfs_root *root;
+ struct btrfs_block_group_cache *block_group;
+ struct btrfs_key location;
+ struct extent_map_tree extent_tree;
+ struct extent_io_tree io_tree;
+ struct extent_io_tree io_failure_tree;
+ struct mutex csum_mutex;
+ struct mutex extent_mutex;
+ struct mutex log_mutex;
+ struct inode vfs_inode;
+ struct btrfs_ordered_inode_tree ordered_tree;
+
+ struct posix_acl *i_acl;
+ struct posix_acl *i_default_acl;
+
+ /* for keeping track of orphaned inodes */
+ struct list_head i_orphan;
+
+ struct list_head delalloc_inodes;
+
+ /* full 64 bit generation number */
+ u64 generation;
+
+ /*
+ * transid of the trans_handle that last modified this inode
+ */
+ u64 last_trans;
+ /*
+ * transid that last logged this inode
+ */
+ u64 logged_trans;
+
+ /* trans that last made a change that should be fully fsync'd */
+ u64 log_dirty_trans;
+ u64 delalloc_bytes;
+ u64 disk_i_size;
+ u32 flags;
+
+ /*
+ * if this is a directory then index_cnt is the counter for the index
+ * number for new files that are created
+ */
+ u64 index_cnt;
+};
+
+static inline struct btrfs_inode *BTRFS_I(struct inode *inode)
+{
+ return container_of(inode, struct btrfs_inode, vfs_inode);
+}
+
+static inline void btrfs_i_size_write(struct inode *inode, u64 size)
+{
+ inode->i_size = size;
+ BTRFS_I(inode)->disk_i_size = size;
+}
+
+
+#endif
--- /dev/null
+#ifndef _COMPAT_H_
+#define _COMPAT_H_
+
+#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,26)
+#define trylock_page(page) (!TestSetPageLocked(page))
+#endif
+
+#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,27)
+static inline struct dentry *d_obtain_alias(struct inode *inode)
+{
+ struct dentry *d;
+
+ if (!inode)
+ return NULL;
+ if (IS_ERR(inode))
+ return ERR_CAST(inode);
+
+ d = d_alloc_anon(inode);
+ if (!d)
+ iput(inode);
+ return d;
+}
+#endif
+
+#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
+static inline void btrfs_drop_nlink(struct inode *inode)
+{
+ inode->i_nlink--;
+}
+
+static inline void btrfs_inc_nlink(struct inode *inode)
+{
+ inode->i_nlink++;
+}
+#else
+# define btrfs_drop_nlink(inode) drop_nlink(inode)
+# define btrfs_inc_nlink(inode) inc_nlink(inode)
+#endif
+
+/*
+ * Even if AppArmor isn't enabled, it still has different prototypes.
+ * Add more distro/version pairs here to declare which has AppArmor applied.
+ */
+#if defined(CONFIG_SUSE_KERNEL)
+# if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,22)
+# define REMOVE_SUID_PATH 1
+# endif
+#endif
+
+/*
+ * catch any other distros that have patched in apparmor. This isn't
+ * 100% reliable because it won't catch people that hand compile their
+ * own distro kernels without apparmor compiled in. But, it is better
+ * than nothing.
+ */
+#ifdef CONFIG_SECURITY_APPARMOR
+# define REMOVE_SUID_PATH 1
+#endif
+
+#endif /* _COMPAT_H_ */
--- /dev/null
+#ifndef __BTRFS_CRC32C__
+#define __BTRFS_CRC32C__
+#include <asm/byteorder.h>
+#include <linux/crc32c.h>
+#include <linux/version.h>
+
+/* #define CONFIG_BTRFS_HW_SUM 1 */
+
+#ifdef CONFIG_BTRFS_HW_SUM
+#ifdef CONFIG_X86
+/*
+ * Using hardware provided CRC32 instruction to accelerate the CRC32 disposal.
+ * CRC32C polynomial:0x1EDC6F41(BE)/0x82F63B78(LE)
+ * CRC32 is a new instruction in Intel SSE4.2, the reference can be found at:
+ * http://www.intel.com/products/processor/manuals/
+ * Intel(R) 64 and IA-32 Architectures Software Developer's Manual
+ * Volume 2A: Instruction Set Reference, A-M
+ */
+
+#include <asm/cpufeature.h>
+#include <asm/processor.h>
+
+#define X86_FEATURE_XMM4_2 (4*32+20) /* Streaming SIMD Extensions-4.2 */
+#define cpu_has_xmm4_2 boot_cpu_has(X86_FEATURE_XMM4_2)
+
+#ifdef CONFIG_X86_64
+#define REX_PRE "0x48, "
+#define SCALE_F 8
+#else
+#define REX_PRE
+#define SCALE_F 4
+#endif
+
+static inline u32 btrfs_crc32c_le_hw_byte(u32 crc, unsigned char const *data,
+ size_t length)
+{
+ while (length--) {
+ __asm__ __volatile__(
+ ".byte 0xf2, 0xf, 0x38, 0xf0, 0xf1"
+ :"=S"(crc)
+ :"0"(crc), "c"(*data)
+ );
+ data++;
+ }
+
+ return crc;
+}
+
+static inline u32 __pure btrfs_crc32c_le_hw(u32 crc, unsigned char const *p,
+ size_t len)
+{
+ unsigned int iquotient = len / SCALE_F;
+ unsigned int iremainder = len % SCALE_F;
+#ifdef CONFIG_X86_64
+ u64 *ptmp = (u64 *)p;
+#else
+ u32 *ptmp = (u32 *)p;
+#endif
+
+ while (iquotient--) {
+ __asm__ __volatile__(
+ ".byte 0xf2, " REX_PRE "0xf, 0x38, 0xf1, 0xf1;"
+ :"=S"(crc)
+ :"0"(crc), "c"(*ptmp)
+ );
+ ptmp++;
+ }
+
+ if (iremainder)
+ crc = btrfs_crc32c_le_hw_byte(crc, (unsigned char *)ptmp,
+ iremainder);
+
+ return crc;
+}
+#endif /* CONFIG_BTRFS_HW_SUM */
+
+static inline u32 __btrfs_crc32c(u32 crc, unsigned char const *address,
+ size_t len)
+{
+#ifdef CONFIG_BTRFS_HW_SUM
+ if (cpu_has_xmm4_2)
+ return btrfs_crc32c_le_hw(crc, address, len);
+#endif
+ return crc32c_le(crc, address, len);
+}
+
+#else
+
+#define __btrfs_crc32c(seed, data, length) crc32c(seed, data, length)
+
+#endif /* CONFIG_X86 */
+
+/**
+ * implementation of crc32c_le() changed in linux-2.6.23,
+ * has of v0.13 btrfs-progs is using the latest version.
+ * We must workaround older implementations of crc32c_le()
+ * found on older kernel versions.
+ */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23)
+#define btrfs_crc32c(seed, data, length) \
+ __cpu_to_le32( __btrfs_crc32c( __le32_to_cpu(seed), \
+ (unsigned char const *)data, length) )
+#else
+#define btrfs_crc32c(seed, data, length) \
+ __btrfs_crc32c(seed, (unsigned char const *)data, length)
+#endif
+#endif
+
--- /dev/null
+/*
+ * Copyright (C) 2007 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include <linux/sched.h>
+#include "ctree.h"
+#include "disk-io.h"
+#include "transaction.h"
+#include "print-tree.h"
+#include "locking.h"
+
+static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
+ *root, struct btrfs_path *path, int level);
+static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root
+ *root, struct btrfs_key *ins_key,
+ struct btrfs_path *path, int data_size, int extend);
+static int push_node_left(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, struct extent_buffer *dst,
+ struct extent_buffer *src, int empty);
+static int balance_node_right(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct extent_buffer *dst_buf,
+ struct extent_buffer *src_buf);
+static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
+ struct btrfs_path *path, int level, int slot);
+
+inline void btrfs_init_path(struct btrfs_path *p)
+{
+ memset(p, 0, sizeof(*p));
+}
+
+struct btrfs_path *btrfs_alloc_path(void)
+{
+ struct btrfs_path *path;
+ path = kmem_cache_alloc(btrfs_path_cachep, GFP_NOFS);
+ if (path) {
+ btrfs_init_path(path);
+ path->reada = 1;
+ }
+ return path;
+}
+
+void btrfs_free_path(struct btrfs_path *p)
+{
+ btrfs_release_path(NULL, p);
+ kmem_cache_free(btrfs_path_cachep, p);
+}
+
+void noinline btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p)
+{
+ int i;
+
+ for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
+ p->slots[i] = 0;
+ if (!p->nodes[i])
+ continue;
+ if (p->locks[i]) {
+ btrfs_tree_unlock(p->nodes[i]);
+ p->locks[i] = 0;
+ }
+ free_extent_buffer(p->nodes[i]);
+ p->nodes[i] = NULL;
+ }
+}
+
+struct extent_buffer *btrfs_root_node(struct btrfs_root *root)
+{
+ struct extent_buffer *eb;
+ spin_lock(&root->node_lock);
+ eb = root->node;
+ extent_buffer_get(eb);
+ spin_unlock(&root->node_lock);
+ return eb;
+}
+
+struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root)
+{
+ struct extent_buffer *eb;
+
+ while(1) {
+ eb = btrfs_root_node(root);
+ btrfs_tree_lock(eb);
+
+ spin_lock(&root->node_lock);
+ if (eb == root->node) {
+ spin_unlock(&root->node_lock);
+ break;
+ }
+ spin_unlock(&root->node_lock);
+
+ btrfs_tree_unlock(eb);
+ free_extent_buffer(eb);
+ }
+ return eb;
+}
+
+static void add_root_to_dirty_list(struct btrfs_root *root)
+{
+ if (root->track_dirty && list_empty(&root->dirty_list)) {
+ list_add(&root->dirty_list,
+ &root->fs_info->dirty_cowonly_roots);
+ }
+}
+
+int btrfs_copy_root(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct extent_buffer *buf,
+ struct extent_buffer **cow_ret, u64 new_root_objectid)
+{
+ struct extent_buffer *cow;
+ u32 nritems;
+ int ret = 0;
+ int level;
+ struct btrfs_root *new_root;
+
+ new_root = kmalloc(sizeof(*new_root), GFP_NOFS);
+ if (!new_root)
+ return -ENOMEM;
+
+ memcpy(new_root, root, sizeof(*new_root));
+ new_root->root_key.objectid = new_root_objectid;
+
+ WARN_ON(root->ref_cows && trans->transid !=
+ root->fs_info->running_transaction->transid);
+ WARN_ON(root->ref_cows && trans->transid != root->last_trans);
+
+ level = btrfs_header_level(buf);
+ nritems = btrfs_header_nritems(buf);
+
+ cow = btrfs_alloc_free_block(trans, new_root, buf->len, 0,
+ new_root_objectid, trans->transid,
+ level, buf->start, 0);
+ if (IS_ERR(cow)) {
+ kfree(new_root);
+ return PTR_ERR(cow);
+ }
+
+ copy_extent_buffer(cow, buf, 0, 0, cow->len);
+ btrfs_set_header_bytenr(cow, cow->start);
+ btrfs_set_header_generation(cow, trans->transid);
+ btrfs_set_header_owner(cow, new_root_objectid);
+ btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN);
+
+ WARN_ON(btrfs_header_generation(buf) > trans->transid);
+ ret = btrfs_inc_ref(trans, new_root, buf, cow, NULL);
+ kfree(new_root);
+
+ if (ret)
+ return ret;
+
+ btrfs_mark_buffer_dirty(cow);
+ *cow_ret = cow;
+ return 0;
+}
+
+int noinline __btrfs_cow_block(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct extent_buffer *buf,
+ struct extent_buffer *parent, int parent_slot,
+ struct extent_buffer **cow_ret,
+ u64 search_start, u64 empty_size,
+ u64 prealloc_dest)
+{
+ u64 parent_start;
+ struct extent_buffer *cow;
+ u32 nritems;
+ int ret = 0;
+ int different_trans = 0;
+ int level;
+ int unlock_orig = 0;
+
+ if (*cow_ret == buf)
+ unlock_orig = 1;
+
+ WARN_ON(!btrfs_tree_locked(buf));
+
+ if (parent)
+ parent_start = parent->start;
+ else
+ parent_start = 0;
+
+ WARN_ON(root->ref_cows && trans->transid !=
+ root->fs_info->running_transaction->transid);
+ WARN_ON(root->ref_cows && trans->transid != root->last_trans);
+
+ level = btrfs_header_level(buf);
+ nritems = btrfs_header_nritems(buf);
+
+ if (prealloc_dest) {
+ struct btrfs_key ins;
+
+ ins.objectid = prealloc_dest;
+ ins.offset = buf->len;
+ ins.type = BTRFS_EXTENT_ITEM_KEY;
+
+ ret = btrfs_alloc_reserved_extent(trans, root, parent_start,
+ root->root_key.objectid,
+ trans->transid, level, 0,
+ &ins);
+ BUG_ON(ret);
+ cow = btrfs_init_new_buffer(trans, root, prealloc_dest,
+ buf->len);
+ } else {
+ cow = btrfs_alloc_free_block(trans, root, buf->len,
+ parent_start,
+ root->root_key.objectid,
+ trans->transid, level,
+ search_start, empty_size);
+ }
+ if (IS_ERR(cow))
+ return PTR_ERR(cow);
+
+ copy_extent_buffer(cow, buf, 0, 0, cow->len);
+ btrfs_set_header_bytenr(cow, cow->start);
+ btrfs_set_header_generation(cow, trans->transid);
+ btrfs_set_header_owner(cow, root->root_key.objectid);
+ btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN);
+
+ WARN_ON(btrfs_header_generation(buf) > trans->transid);
+ if (btrfs_header_generation(buf) != trans->transid) {
+ u32 nr_extents;
+ different_trans = 1;
+ ret = btrfs_inc_ref(trans, root, buf, cow, &nr_extents);
+ if (ret)
+ return ret;
+
+ ret = btrfs_cache_ref(trans, root, buf, nr_extents);
+ WARN_ON(ret);
+ } else {
+ ret = btrfs_update_ref(trans, root, buf, cow, 0, nritems);
+ if (ret)
+ return ret;
+ clean_tree_block(trans, root, buf);
+ }
+
+ if (buf == root->node) {
+ WARN_ON(parent && parent != buf);
+
+ spin_lock(&root->node_lock);
+ root->node = cow;
+ extent_buffer_get(cow);
+ spin_unlock(&root->node_lock);
+
+ if (buf != root->commit_root) {
+ btrfs_free_extent(trans, root, buf->start,
+ buf->len, buf->start,
+ root->root_key.objectid,
+ btrfs_header_generation(buf),
+ 0, 0, 1);
+ }
+ free_extent_buffer(buf);
+ add_root_to_dirty_list(root);
+ } else {
+ btrfs_set_node_blockptr(parent, parent_slot,
+ cow->start);
+ WARN_ON(trans->transid == 0);
+ btrfs_set_node_ptr_generation(parent, parent_slot,
+ trans->transid);
+ btrfs_mark_buffer_dirty(parent);
+ WARN_ON(btrfs_header_generation(parent) != trans->transid);
+ btrfs_free_extent(trans, root, buf->start, buf->len,
+ parent_start, btrfs_header_owner(parent),
+ btrfs_header_generation(parent), 0, 0, 1);
+ }
+ if (unlock_orig)
+ btrfs_tree_unlock(buf);
+ free_extent_buffer(buf);
+ btrfs_mark_buffer_dirty(cow);
+ *cow_ret = cow;
+ return 0;
+}
+
+int noinline btrfs_cow_block(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, struct extent_buffer *buf,
+ struct extent_buffer *parent, int parent_slot,
+ struct extent_buffer **cow_ret, u64 prealloc_dest)
+{
+ u64 search_start;
+ u64 header_trans;
+ int ret;
+
+ if (trans->transaction != root->fs_info->running_transaction) {
+ printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
+ root->fs_info->running_transaction->transid);
+ WARN_ON(1);
+ }
+ if (trans->transid != root->fs_info->generation) {
+ printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
+ root->fs_info->generation);
+ WARN_ON(1);
+ }
+
+ header_trans = btrfs_header_generation(buf);
+ spin_lock(&root->fs_info->hash_lock);
+ if (header_trans == trans->transid &&
+ !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
+ *cow_ret = buf;
+ spin_unlock(&root->fs_info->hash_lock);
+ WARN_ON(prealloc_dest);
+ return 0;
+ }
+ spin_unlock(&root->fs_info->hash_lock);
+ search_start = buf->start & ~((u64)(1024 * 1024 * 1024) - 1);
+ ret = __btrfs_cow_block(trans, root, buf, parent,
+ parent_slot, cow_ret, search_start, 0,
+ prealloc_dest);
+ return ret;
+}
+
+static int close_blocks(u64 blocknr, u64 other, u32 blocksize)
+{
+ if (blocknr < other && other - (blocknr + blocksize) < 32768)
+ return 1;
+ if (blocknr > other && blocknr - (other + blocksize) < 32768)
+ return 1;
+ return 0;
+}
+
+/*
+ * compare two keys in a memcmp fashion
+ */
+static int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2)
+{
+ struct btrfs_key k1;
+
+ btrfs_disk_key_to_cpu(&k1, disk);
+
+ if (k1.objectid > k2->objectid)
+ return 1;
+ if (k1.objectid < k2->objectid)
+ return -1;
+ if (k1.type > k2->type)
+ return 1;
+ if (k1.type < k2->type)
+ return -1;
+ if (k1.offset > k2->offset)
+ return 1;
+ if (k1.offset < k2->offset)
+ return -1;
+ return 0;
+}
+
+
+int btrfs_realloc_node(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, struct extent_buffer *parent,
+ int start_slot, int cache_only, u64 *last_ret,
+ struct btrfs_key *progress)
+{
+ struct extent_buffer *cur;
+ u64 blocknr;
+ u64 gen;
+ u64 search_start = *last_ret;
+ u64 last_block = 0;
+ u64 other;
+ u32 parent_nritems;
+ int end_slot;
+ int i;
+ int err = 0;
+ int parent_level;
+ int uptodate;
+ u32 blocksize;
+ int progress_passed = 0;
+ struct btrfs_disk_key disk_key;
+
+ parent_level = btrfs_header_level(parent);
+ if (cache_only && parent_level != 1)
+ return 0;
+
+ if (trans->transaction != root->fs_info->running_transaction) {
+ printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
+ root->fs_info->running_transaction->transid);
+ WARN_ON(1);
+ }
+ if (trans->transid != root->fs_info->generation) {
+ printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
+ root->fs_info->generation);
+ WARN_ON(1);
+ }
+
+ parent_nritems = btrfs_header_nritems(parent);
+ blocksize = btrfs_level_size(root, parent_level - 1);
+ end_slot = parent_nritems;
+
+ if (parent_nritems == 1)
+ return 0;
+
+ for (i = start_slot; i < end_slot; i++) {
+ int close = 1;
+
+ if (!parent->map_token) {
+ map_extent_buffer(parent,
+ btrfs_node_key_ptr_offset(i),
+ sizeof(struct btrfs_key_ptr),
+ &parent->map_token, &parent->kaddr,
+ &parent->map_start, &parent->map_len,
+ KM_USER1);
+ }
+ btrfs_node_key(parent, &disk_key, i);
+ if (!progress_passed && comp_keys(&disk_key, progress) < 0)
+ continue;
+
+ progress_passed = 1;
+ blocknr = btrfs_node_blockptr(parent, i);
+ gen = btrfs_node_ptr_generation(parent, i);
+ if (last_block == 0)
+ last_block = blocknr;
+
+ if (i > 0) {
+ other = btrfs_node_blockptr(parent, i - 1);
+ close = close_blocks(blocknr, other, blocksize);
+ }
+ if (!close && i < end_slot - 2) {
+ other = btrfs_node_blockptr(parent, i + 1);
+ close = close_blocks(blocknr, other, blocksize);
+ }
+ if (close) {
+ last_block = blocknr;
+ continue;
+ }
+ if (parent->map_token) {
+ unmap_extent_buffer(parent, parent->map_token,
+ KM_USER1);
+ parent->map_token = NULL;
+ }
+
+ cur = btrfs_find_tree_block(root, blocknr, blocksize);
+ if (cur)
+ uptodate = btrfs_buffer_uptodate(cur, gen);
+ else
+ uptodate = 0;
+ if (!cur || !uptodate) {
+ if (cache_only) {
+ free_extent_buffer(cur);
+ continue;
+ }
+ if (!cur) {
+ cur = read_tree_block(root, blocknr,
+ blocksize, gen);
+ } else if (!uptodate) {
+ btrfs_read_buffer(cur, gen);
+ }
+ }
+ if (search_start == 0)
+ search_start = last_block;
+
+ btrfs_tree_lock(cur);
+ err = __btrfs_cow_block(trans, root, cur, parent, i,
+ &cur, search_start,
+ min(16 * blocksize,
+ (end_slot - i) * blocksize), 0);
+ if (err) {
+ btrfs_tree_unlock(cur);
+ free_extent_buffer(cur);
+ break;
+ }
+ search_start = cur->start;
+ last_block = cur->start;
+ *last_ret = search_start;
+ btrfs_tree_unlock(cur);
+ free_extent_buffer(cur);
+ }
+ if (parent->map_token) {
+ unmap_extent_buffer(parent, parent->map_token,
+ KM_USER1);
+ parent->map_token = NULL;
+ }
+ return err;
+}
+
+/*
+ * The leaf data grows from end-to-front in the node.
+ * this returns the address of the start of the last item,
+ * which is the stop of the leaf data stack
+ */
+static inline unsigned int leaf_data_end(struct btrfs_root *root,
+ struct extent_buffer *leaf)
+{
+ u32 nr = btrfs_header_nritems(leaf);
+ if (nr == 0)
+ return BTRFS_LEAF_DATA_SIZE(root);
+ return btrfs_item_offset_nr(leaf, nr - 1);
+}
+
+static int check_node(struct btrfs_root *root, struct btrfs_path *path,
+ int level)
+{
+ struct extent_buffer *parent = NULL;
+ struct extent_buffer *node = path->nodes[level];
+ struct btrfs_disk_key parent_key;
+ struct btrfs_disk_key node_key;
+ int parent_slot;
+ int slot;
+ struct btrfs_key cpukey;
+ u32 nritems = btrfs_header_nritems(node);
+
+ if (path->nodes[level + 1])
+ parent = path->nodes[level + 1];
+
+ slot = path->slots[level];
+ BUG_ON(nritems == 0);
+ if (parent) {
+ parent_slot = path->slots[level + 1];
+ btrfs_node_key(parent, &parent_key, parent_slot);
+ btrfs_node_key(node, &node_key, 0);
+ BUG_ON(memcmp(&parent_key, &node_key,
+ sizeof(struct btrfs_disk_key)));
+ BUG_ON(btrfs_node_blockptr(parent, parent_slot) !=
+ btrfs_header_bytenr(node));
+ }
+ BUG_ON(nritems > BTRFS_NODEPTRS_PER_BLOCK(root));
+ if (slot != 0) {
+ btrfs_node_key_to_cpu(node, &cpukey, slot - 1);
+ btrfs_node_key(node, &node_key, slot);
+ BUG_ON(comp_keys(&node_key, &cpukey) <= 0);
+ }
+ if (slot < nritems - 1) {
+ btrfs_node_key_to_cpu(node, &cpukey, slot + 1);
+ btrfs_node_key(node, &node_key, slot);
+ BUG_ON(comp_keys(&node_key, &cpukey) >= 0);
+ }
+ return 0;
+}
+
+static int check_leaf(struct btrfs_root *root, struct btrfs_path *path,
+ int level)
+{
+ struct extent_buffer *leaf = path->nodes[level];
+ struct extent_buffer *parent = NULL;
+ int parent_slot;
+ struct btrfs_key cpukey;
+ struct btrfs_disk_key parent_key;
+ struct btrfs_disk_key leaf_key;
+ int slot = path->slots[0];
+
+ u32 nritems = btrfs_header_nritems(leaf);
+
+ if (path->nodes[level + 1])
+ parent = path->nodes[level + 1];
+
+ if (nritems == 0)
+ return 0;
+
+ if (parent) {
+ parent_slot = path->slots[level + 1];
+ btrfs_node_key(parent, &parent_key, parent_slot);
+ btrfs_item_key(leaf, &leaf_key, 0);
+
+ BUG_ON(memcmp(&parent_key, &leaf_key,
+ sizeof(struct btrfs_disk_key)));
+ BUG_ON(btrfs_node_blockptr(parent, parent_slot) !=
+ btrfs_header_bytenr(leaf));
+ }
+#if 0
+ for (i = 0; nritems > 1 && i < nritems - 2; i++) {
+ btrfs_item_key_to_cpu(leaf, &cpukey, i + 1);
+ btrfs_item_key(leaf, &leaf_key, i);
+ if (comp_keys(&leaf_key, &cpukey) >= 0) {
+ btrfs_print_leaf(root, leaf);
+ printk("slot %d offset bad key\n", i);
+ BUG_ON(1);
+ }
+ if (btrfs_item_offset_nr(leaf, i) !=
+ btrfs_item_end_nr(leaf, i + 1)) {
+ btrfs_print_leaf(root, leaf);
+ printk("slot %d offset bad\n", i);
+ BUG_ON(1);
+ }
+ if (i == 0) {
+ if (btrfs_item_offset_nr(leaf, i) +
+ btrfs_item_size_nr(leaf, i) !=
+ BTRFS_LEAF_DATA_SIZE(root)) {
+ btrfs_print_leaf(root, leaf);
+ printk("slot %d first offset bad\n", i);
+ BUG_ON(1);
+ }
+ }
+ }
+ if (nritems > 0) {
+ if (btrfs_item_size_nr(leaf, nritems - 1) > 4096) {
+ btrfs_print_leaf(root, leaf);
+ printk("slot %d bad size \n", nritems - 1);
+ BUG_ON(1);
+ }
+ }
+#endif
+ if (slot != 0 && slot < nritems - 1) {
+ btrfs_item_key(leaf, &leaf_key, slot);
+ btrfs_item_key_to_cpu(leaf, &cpukey, slot - 1);
+ if (comp_keys(&leaf_key, &cpukey) <= 0) {
+ btrfs_print_leaf(root, leaf);
+ printk("slot %d offset bad key\n", slot);
+ BUG_ON(1);
+ }
+ if (btrfs_item_offset_nr(leaf, slot - 1) !=
+ btrfs_item_end_nr(leaf, slot)) {
+ btrfs_print_leaf(root, leaf);
+ printk("slot %d offset bad\n", slot);
+ BUG_ON(1);
+ }
+ }
+ if (slot < nritems - 1) {
+ btrfs_item_key(leaf, &leaf_key, slot);
+ btrfs_item_key_to_cpu(leaf, &cpukey, slot + 1);
+ BUG_ON(comp_keys(&leaf_key, &cpukey) >= 0);
+ if (btrfs_item_offset_nr(leaf, slot) !=
+ btrfs_item_end_nr(leaf, slot + 1)) {
+ btrfs_print_leaf(root, leaf);
+ printk("slot %d offset bad\n", slot);
+ BUG_ON(1);
+ }
+ }
+ BUG_ON(btrfs_item_offset_nr(leaf, 0) +
+ btrfs_item_size_nr(leaf, 0) != BTRFS_LEAF_DATA_SIZE(root));
+ return 0;
+}
+
+static int noinline check_block(struct btrfs_root *root,
+ struct btrfs_path *path, int level)
+{
+ u64 found_start;
+ return 0;
+ if (btrfs_header_level(path->nodes[level]) != level)
+ printk("warning: bad level %Lu wanted %d found %d\n",
+ path->nodes[level]->start, level,
+ btrfs_header_level(path->nodes[level]));
+ found_start = btrfs_header_bytenr(path->nodes[level]);
+ if (found_start != path->nodes[level]->start) {
+ printk("warning: bad bytentr %Lu found %Lu\n",
+ path->nodes[level]->start, found_start);
+ }
+#if 0
+ struct extent_buffer *buf = path->nodes[level];
+
+ if (memcmp_extent_buffer(buf, root->fs_info->fsid,
+ (unsigned long)btrfs_header_fsid(buf),
+ BTRFS_FSID_SIZE)) {
+ printk("warning bad block %Lu\n", buf->start);
+ return 1;
+ }
+#endif
+ if (level == 0)
+ return check_leaf(root, path, level);
+ return check_node(root, path, level);
+}
+
+/*
+ * search for key in the extent_buffer. The items start at offset p,
+ * and they are item_size apart. There are 'max' items in p.
+ *
+ * the slot in the array is returned via slot, and it points to
+ * the place where you would insert key if it is not found in
+ * the array.
+ *
+ * slot may point to max if the key is bigger than all of the keys
+ */
+static noinline int generic_bin_search(struct extent_buffer *eb,
+ unsigned long p,
+ int item_size, struct btrfs_key *key,
+ int max, int *slot)
+{
+ int low = 0;
+ int high = max;
+ int mid;
+ int ret;
+ struct btrfs_disk_key *tmp = NULL;
+ struct btrfs_disk_key unaligned;
+ unsigned long offset;
+ char *map_token = NULL;
+ char *kaddr = NULL;
+ unsigned long map_start = 0;
+ unsigned long map_len = 0;
+ int err;
+
+ while(low < high) {
+ mid = (low + high) / 2;
+ offset = p + mid * item_size;
+
+ if (!map_token || offset < map_start ||
+ (offset + sizeof(struct btrfs_disk_key)) >
+ map_start + map_len) {
+ if (map_token) {
+ unmap_extent_buffer(eb, map_token, KM_USER0);
+ map_token = NULL;
+ }
+ err = map_extent_buffer(eb, offset,
+ sizeof(struct btrfs_disk_key),
+ &map_token, &kaddr,
+ &map_start, &map_len, KM_USER0);
+
+ if (!err) {
+ tmp = (struct btrfs_disk_key *)(kaddr + offset -
+ map_start);
+ } else {
+ read_extent_buffer(eb, &unaligned,
+ offset, sizeof(unaligned));
+ tmp = &unaligned;
+ }
+
+ } else {
+ tmp = (struct btrfs_disk_key *)(kaddr + offset -
+ map_start);
+ }
+ ret = comp_keys(tmp, key);
+
+ if (ret < 0)
+ low = mid + 1;
+ else if (ret > 0)
+ high = mid;
+ else {
+ *slot = mid;
+ if (map_token)
+ unmap_extent_buffer(eb, map_token, KM_USER0);
+ return 0;
+ }
+ }
+ *slot = low;
+ if (map_token)
+ unmap_extent_buffer(eb, map_token, KM_USER0);
+ return 1;
+}
+
+/*
+ * simple bin_search frontend that does the right thing for
+ * leaves vs nodes
+ */
+static int bin_search(struct extent_buffer *eb, struct btrfs_key *key,
+ int level, int *slot)
+{
+ if (level == 0) {
+ return generic_bin_search(eb,
+ offsetof(struct btrfs_leaf, items),
+ sizeof(struct btrfs_item),
+ key, btrfs_header_nritems(eb),
+ slot);
+ } else {
+ return generic_bin_search(eb,
+ offsetof(struct btrfs_node, ptrs),
+ sizeof(struct btrfs_key_ptr),
+ key, btrfs_header_nritems(eb),
+ slot);
+ }
+ return -1;
+}
+
+static noinline struct extent_buffer *read_node_slot(struct btrfs_root *root,
+ struct extent_buffer *parent, int slot)
+{
+ int level = btrfs_header_level(parent);
+ if (slot < 0)
+ return NULL;
+ if (slot >= btrfs_header_nritems(parent))
+ return NULL;
+
+ BUG_ON(level == 0);
+
+ return read_tree_block(root, btrfs_node_blockptr(parent, slot),
+ btrfs_level_size(root, level - 1),
+ btrfs_node_ptr_generation(parent, slot));
+}
+
+static noinline int balance_level(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path, int level)
+{
+ struct extent_buffer *right = NULL;
+ struct extent_buffer *mid;
+ struct extent_buffer *left = NULL;
+ struct extent_buffer *parent = NULL;
+ int ret = 0;
+ int wret;
+ int pslot;
+ int orig_slot = path->slots[level];
+ int err_on_enospc = 0;
+ u64 orig_ptr;
+
+ if (level == 0)
+ return 0;
+
+ mid = path->nodes[level];
+ WARN_ON(!path->locks[level]);
+ WARN_ON(btrfs_header_generation(mid) != trans->transid);
+
+ orig_ptr = btrfs_node_blockptr(mid, orig_slot);
+
+ if (level < BTRFS_MAX_LEVEL - 1)
+ parent = path->nodes[level + 1];
+ pslot = path->slots[level + 1];
+
+ /*
+ * deal with the case where there is only one pointer in the root
+ * by promoting the node below to a root
+ */
+ if (!parent) {
+ struct extent_buffer *child;
+
+ if (btrfs_header_nritems(mid) != 1)
+ return 0;
+
+ /* promote the child to a root */
+ child = read_node_slot(root, mid, 0);
+ btrfs_tree_lock(child);
+ BUG_ON(!child);
+ ret = btrfs_cow_block(trans, root, child, mid, 0, &child, 0);
+ BUG_ON(ret);
+
+ spin_lock(&root->node_lock);
+ root->node = child;
+ spin_unlock(&root->node_lock);
+
+ ret = btrfs_update_extent_ref(trans, root, child->start,
+ mid->start, child->start,
+ root->root_key.objectid,
+ trans->transid, level - 1, 0);
+ BUG_ON(ret);
+
+ add_root_to_dirty_list(root);
+ btrfs_tree_unlock(child);
+ path->locks[level] = 0;
+ path->nodes[level] = NULL;
+ clean_tree_block(trans, root, mid);
+ btrfs_tree_unlock(mid);
+ /* once for the path */
+ free_extent_buffer(mid);
+ ret = btrfs_free_extent(trans, root, mid->start, mid->len,
+ mid->start, root->root_key.objectid,
+ btrfs_header_generation(mid), 0, 0, 1);
+ /* once for the root ptr */
+ free_extent_buffer(mid);
+ return ret;
+ }
+ if (btrfs_header_nritems(mid) >
+ BTRFS_NODEPTRS_PER_BLOCK(root) / 4)
+ return 0;
+
+ if (btrfs_header_nritems(mid) < 2)
+ err_on_enospc = 1;
+
+ left = read_node_slot(root, parent, pslot - 1);
+ if (left) {
+ btrfs_tree_lock(left);
+ wret = btrfs_cow_block(trans, root, left,
+ parent, pslot - 1, &left, 0);
+ if (wret) {
+ ret = wret;
+ goto enospc;
+ }
+ }
+ right = read_node_slot(root, parent, pslot + 1);
+ if (right) {
+ btrfs_tree_lock(right);
+ wret = btrfs_cow_block(trans, root, right,
+ parent, pslot + 1, &right, 0);
+ if (wret) {
+ ret = wret;
+ goto enospc;
+ }
+ }
+
+ /* first, try to make some room in the middle buffer */
+ if (left) {
+ orig_slot += btrfs_header_nritems(left);
+ wret = push_node_left(trans, root, left, mid, 1);
+ if (wret < 0)
+ ret = wret;
+ if (btrfs_header_nritems(mid) < 2)
+ err_on_enospc = 1;
+ }
+
+ /*
+ * then try to empty the right most buffer into the middle
+ */
+ if (right) {
+ wret = push_node_left(trans, root, mid, right, 1);
+ if (wret < 0 && wret != -ENOSPC)
+ ret = wret;
+ if (btrfs_header_nritems(right) == 0) {
+ u64 bytenr = right->start;
+ u64 generation = btrfs_header_generation(parent);
+ u32 blocksize = right->len;
+
+ clean_tree_block(trans, root, right);
+ btrfs_tree_unlock(right);
+ free_extent_buffer(right);
+ right = NULL;
+ wret = del_ptr(trans, root, path, level + 1, pslot +
+ 1);
+ if (wret)
+ ret = wret;
+ wret = btrfs_free_extent(trans, root, bytenr,
+ blocksize, parent->start,
+ btrfs_header_owner(parent),
+ generation, 0, 0, 1);
+ if (wret)
+ ret = wret;
+ } else {
+ struct btrfs_disk_key right_key;
+ btrfs_node_key(right, &right_key, 0);
+ btrfs_set_node_key(parent, &right_key, pslot + 1);
+ btrfs_mark_buffer_dirty(parent);
+ }
+ }
+ if (btrfs_header_nritems(mid) == 1) {
+ /*
+ * we're not allowed to leave a node with one item in the
+ * tree during a delete. A deletion from lower in the tree
+ * could try to delete the only pointer in this node.
+ * So, pull some keys from the left.
+ * There has to be a left pointer at this point because
+ * otherwise we would have pulled some pointers from the
+ * right
+ */
+ BUG_ON(!left);
+ wret = balance_node_right(trans, root, mid, left);
+ if (wret < 0) {
+ ret = wret;
+ goto enospc;
+ }
+ if (wret == 1) {
+ wret = push_node_left(trans, root, left, mid, 1);
+ if (wret < 0)
+ ret = wret;
+ }
+ BUG_ON(wret == 1);
+ }
+ if (btrfs_header_nritems(mid) == 0) {
+ /* we've managed to empty the middle node, drop it */
+ u64 root_gen = btrfs_header_generation(parent);
+ u64 bytenr = mid->start;
+ u32 blocksize = mid->len;
+
+ clean_tree_block(trans, root, mid);
+ btrfs_tree_unlock(mid);
+ free_extent_buffer(mid);
+ mid = NULL;
+ wret = del_ptr(trans, root, path, level + 1, pslot);
+ if (wret)
+ ret = wret;
+ wret = btrfs_free_extent(trans, root, bytenr, blocksize,
+ parent->start,
+ btrfs_header_owner(parent),
+ root_gen, 0, 0, 1);
+ if (wret)
+ ret = wret;
+ } else {
+ /* update the parent key to reflect our changes */
+ struct btrfs_disk_key mid_key;
+ btrfs_node_key(mid, &mid_key, 0);
+ btrfs_set_node_key(parent, &mid_key, pslot);
+ btrfs_mark_buffer_dirty(parent);
+ }
+
+ /* update the path */
+ if (left) {
+ if (btrfs_header_nritems(left) > orig_slot) {
+ extent_buffer_get(left);
+ /* left was locked after cow */
+ path->nodes[level] = left;
+ path->slots[level + 1] -= 1;
+ path->slots[level] = orig_slot;
+ if (mid) {
+ btrfs_tree_unlock(mid);
+ free_extent_buffer(mid);
+ }
+ } else {
+ orig_slot -= btrfs_header_nritems(left);
+ path->slots[level] = orig_slot;
+ }
+ }
+ /* double check we haven't messed things up */
+ check_block(root, path, level);
+ if (orig_ptr !=
+ btrfs_node_blockptr(path->nodes[level], path->slots[level]))
+ BUG();
+enospc:
+ if (right) {
+ btrfs_tree_unlock(right);
+ free_extent_buffer(right);
+ }
+ if (left) {
+ if (path->nodes[level] != left)
+ btrfs_tree_unlock(left);
+ free_extent_buffer(left);
+ }
+ return ret;
+}
+
+/* returns zero if the push worked, non-zero otherwise */
+static int noinline push_nodes_for_insert(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path, int level)
+{
+ struct extent_buffer *right = NULL;
+ struct extent_buffer *mid;
+ struct extent_buffer *left = NULL;
+ struct extent_buffer *parent = NULL;
+ int ret = 0;
+ int wret;
+ int pslot;
+ int orig_slot = path->slots[level];
+ u64 orig_ptr;
+
+ if (level == 0)
+ return 1;
+
+ mid = path->nodes[level];
+ WARN_ON(btrfs_header_generation(mid) != trans->transid);
+ orig_ptr = btrfs_node_blockptr(mid, orig_slot);
+
+ if (level < BTRFS_MAX_LEVEL - 1)
+ parent = path->nodes[level + 1];
+ pslot = path->slots[level + 1];
+
+ if (!parent)
+ return 1;
+
+ left = read_node_slot(root, parent, pslot - 1);
+
+ /* first, try to make some room in the middle buffer */
+ if (left) {
+ u32 left_nr;
+
+ btrfs_tree_lock(left);
+ left_nr = btrfs_header_nritems(left);
+ if (left_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
+ wret = 1;
+ } else {
+ ret = btrfs_cow_block(trans, root, left, parent,
+ pslot - 1, &left, 0);
+ if (ret)
+ wret = 1;
+ else {
+ wret = push_node_left(trans, root,
+ left, mid, 0);
+ }
+ }
+ if (wret < 0)
+ ret = wret;
+ if (wret == 0) {
+ struct btrfs_disk_key disk_key;
+ orig_slot += left_nr;
+ btrfs_node_key(mid, &disk_key, 0);
+ btrfs_set_node_key(parent, &disk_key, pslot);
+ btrfs_mark_buffer_dirty(parent);
+ if (btrfs_header_nritems(left) > orig_slot) {
+ path->nodes[level] = left;
+ path->slots[level + 1] -= 1;
+ path->slots[level] = orig_slot;
+ btrfs_tree_unlock(mid);
+ free_extent_buffer(mid);
+ } else {
+ orig_slot -=
+ btrfs_header_nritems(left);
+ path->slots[level] = orig_slot;
+ btrfs_tree_unlock(left);
+ free_extent_buffer(left);
+ }
+ return 0;
+ }
+ btrfs_tree_unlock(left);
+ free_extent_buffer(left);
+ }
+ right = read_node_slot(root, parent, pslot + 1);
+
+ /*
+ * then try to empty the right most buffer into the middle
+ */
+ if (right) {
+ u32 right_nr;
+ btrfs_tree_lock(right);
+ right_nr = btrfs_header_nritems(right);
+ if (right_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
+ wret = 1;
+ } else {
+ ret = btrfs_cow_block(trans, root, right,
+ parent, pslot + 1,
+ &right, 0);
+ if (ret)
+ wret = 1;
+ else {
+ wret = balance_node_right(trans, root,
+ right, mid);
+ }
+ }
+ if (wret < 0)
+ ret = wret;
+ if (wret == 0) {
+ struct btrfs_disk_key disk_key;
+
+ btrfs_node_key(right, &disk_key, 0);
+ btrfs_set_node_key(parent, &disk_key, pslot + 1);
+ btrfs_mark_buffer_dirty(parent);
+
+ if (btrfs_header_nritems(mid) <= orig_slot) {
+ path->nodes[level] = right;
+ path->slots[level + 1] += 1;
+ path->slots[level] = orig_slot -
+ btrfs_header_nritems(mid);
+ btrfs_tree_unlock(mid);
+ free_extent_buffer(mid);
+ } else {
+ btrfs_tree_unlock(right);
+ free_extent_buffer(right);
+ }
+ return 0;
+ }
+ btrfs_tree_unlock(right);
+ free_extent_buffer(right);
+ }
+ return 1;
+}
+
+/*
+ * readahead one full node of leaves
+ */
+static noinline void reada_for_search(struct btrfs_root *root,
+ struct btrfs_path *path,
+ int level, int slot, u64 objectid)
+{
+ struct extent_buffer *node;
+ struct btrfs_disk_key disk_key;
+ u32 nritems;
+ u64 search;
+ u64 lowest_read;
+ u64 highest_read;
+ u64 nread = 0;
+ int direction = path->reada;
+ struct extent_buffer *eb;
+ u32 nr;
+ u32 blocksize;
+ u32 nscan = 0;
+
+ if (level != 1)
+ return;
+
+ if (!path->nodes[level])
+ return;
+
+ node = path->nodes[level];
+
+ search = btrfs_node_blockptr(node, slot);
+ blocksize = btrfs_level_size(root, level - 1);
+ eb = btrfs_find_tree_block(root, search, blocksize);
+ if (eb) {
+ free_extent_buffer(eb);
+ return;
+ }
+
+ highest_read = search;
+ lowest_read = search;
+
+ nritems = btrfs_header_nritems(node);
+ nr = slot;
+ while(1) {
+ if (direction < 0) {
+ if (nr == 0)
+ break;
+ nr--;
+ } else if (direction > 0) {
+ nr++;
+ if (nr >= nritems)
+ break;
+ }
+ if (path->reada < 0 && objectid) {
+ btrfs_node_key(node, &disk_key, nr);
+ if (btrfs_disk_key_objectid(&disk_key) != objectid)
+ break;
+ }
+ search = btrfs_node_blockptr(node, nr);
+ if ((search >= lowest_read && search <= highest_read) ||
+ (search < lowest_read && lowest_read - search <= 32768) ||
+ (search > highest_read && search - highest_read <= 32768)) {
+ readahead_tree_block(root, search, blocksize,
+ btrfs_node_ptr_generation(node, nr));
+ nread += blocksize;
+ }
+ nscan++;
+ if (path->reada < 2 && (nread > (256 * 1024) || nscan > 32))
+ break;
+ if(nread > (1024 * 1024) || nscan > 128)
+ break;
+
+ if (search < lowest_read)
+ lowest_read = search;
+ if (search > highest_read)
+ highest_read = search;
+ }
+}
+
+static noinline void unlock_up(struct btrfs_path *path, int level,
+ int lowest_unlock)
+{
+ int i;
+ int skip_level = level;
+ int no_skips = 0;
+ struct extent_buffer *t;
+
+ for (i = level; i < BTRFS_MAX_LEVEL; i++) {
+ if (!path->nodes[i])
+ break;
+ if (!path->locks[i])
+ break;
+ if (!no_skips && path->slots[i] == 0) {
+ skip_level = i + 1;
+ continue;
+ }
+ if (!no_skips && path->keep_locks) {
+ u32 nritems;
+ t = path->nodes[i];
+ nritems = btrfs_header_nritems(t);
+ if (nritems < 1 || path->slots[i] >= nritems - 1) {
+ skip_level = i + 1;
+ continue;
+ }
+ }
+ if (skip_level < i && i >= lowest_unlock)
+ no_skips = 1;
+
+ t = path->nodes[i];
+ if (i >= lowest_unlock && i > skip_level && path->locks[i]) {
+ btrfs_tree_unlock(t);
+ path->locks[i] = 0;
+ }
+ }
+}
+
+/*
+ * look for key in the tree. path is filled in with nodes along the way
+ * if key is found, we return zero and you can find the item in the leaf
+ * level of the path (level 0)
+ *
+ * If the key isn't found, the path points to the slot where it should
+ * be inserted, and 1 is returned. If there are other errors during the
+ * search a negative error number is returned.
+ *
+ * if ins_len > 0, nodes and leaves will be split as we walk down the
+ * tree. if ins_len < 0, nodes will be merged as we walk down the tree (if
+ * possible)
+ */
+int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
+ *root, struct btrfs_key *key, struct btrfs_path *p, int
+ ins_len, int cow)
+{
+ struct extent_buffer *b;
+ struct extent_buffer *tmp;
+ int slot;
+ int ret;
+ int level;
+ int should_reada = p->reada;
+ int lowest_unlock = 1;
+ int blocksize;
+ u8 lowest_level = 0;
+ u64 blocknr;
+ u64 gen;
+ struct btrfs_key prealloc_block;
+
+ lowest_level = p->lowest_level;
+ WARN_ON(lowest_level && ins_len);
+ WARN_ON(p->nodes[0] != NULL);
+ WARN_ON(cow && root == root->fs_info->extent_root &&
+ !mutex_is_locked(&root->fs_info->alloc_mutex));
+ if (ins_len < 0)
+ lowest_unlock = 2;
+
+ prealloc_block.objectid = 0;
+
+again:
+ if (p->skip_locking)
+ b = btrfs_root_node(root);
+ else
+ b = btrfs_lock_root_node(root);
+
+ while (b) {
+ level = btrfs_header_level(b);
+
+ /*
+ * setup the path here so we can release it under lock
+ * contention with the cow code
+ */
+ p->nodes[level] = b;
+ if (!p->skip_locking)
+ p->locks[level] = 1;
+
+ if (cow) {
+ int wret;
+
+ /* is a cow on this block not required */
+ spin_lock(&root->fs_info->hash_lock);
+ if (btrfs_header_generation(b) == trans->transid &&
+ !btrfs_header_flag(b, BTRFS_HEADER_FLAG_WRITTEN)) {
+ spin_unlock(&root->fs_info->hash_lock);
+ goto cow_done;
+ }
+ spin_unlock(&root->fs_info->hash_lock);
+
+ /* ok, we have to cow, is our old prealloc the right
+ * size?
+ */
+ if (prealloc_block.objectid &&
+ prealloc_block.offset != b->len) {
+ btrfs_free_reserved_extent(root,
+ prealloc_block.objectid,
+ prealloc_block.offset);
+ prealloc_block.objectid = 0;
+ }
+
+ /*
+ * for higher level blocks, try not to allocate blocks
+ * with the block and the parent locks held.
+ */
+ if (level > 1 && !prealloc_block.objectid &&
+ btrfs_path_lock_waiting(p, level)) {
+ u32 size = b->len;
+ u64 hint = b->start;
+
+ btrfs_release_path(root, p);
+ ret = btrfs_reserve_extent(trans, root,
+ size, size, 0,
+ hint, (u64)-1,
+ &prealloc_block, 0);
+ BUG_ON(ret);
+ goto again;
+ }
+
+ wret = btrfs_cow_block(trans, root, b,
+ p->nodes[level + 1],
+ p->slots[level + 1],
+ &b, prealloc_block.objectid);
+ prealloc_block.objectid = 0;
+ if (wret) {
+ free_extent_buffer(b);
+ ret = wret;
+ goto done;
+ }
+ }
+cow_done:
+ BUG_ON(!cow && ins_len);
+ if (level != btrfs_header_level(b))
+ WARN_ON(1);
+ level = btrfs_header_level(b);
+
+ p->nodes[level] = b;
+ if (!p->skip_locking)
+ p->locks[level] = 1;
+
+ ret = check_block(root, p, level);
+ if (ret) {
+ ret = -1;
+ goto done;
+ }
+
+ ret = bin_search(b, key, level, &slot);
+ if (level != 0) {
+ if (ret && slot > 0)
+ slot -= 1;
+ p->slots[level] = slot;
+ if (ins_len > 0 && btrfs_header_nritems(b) >=
+ BTRFS_NODEPTRS_PER_BLOCK(root) - 3) {
+ int sret = split_node(trans, root, p, level);
+ BUG_ON(sret > 0);
+ if (sret) {
+ ret = sret;
+ goto done;
+ }
+ b = p->nodes[level];
+ slot = p->slots[level];
+ } else if (ins_len < 0) {
+ int sret = balance_level(trans, root, p,
+ level);
+ if (sret) {
+ ret = sret;
+ goto done;
+ }
+ b = p->nodes[level];
+ if (!b) {
+ btrfs_release_path(NULL, p);
+ goto again;
+ }
+ slot = p->slots[level];
+ BUG_ON(btrfs_header_nritems(b) == 1);
+ }
+ unlock_up(p, level, lowest_unlock);
+
+ /* this is only true while dropping a snapshot */
+ if (level == lowest_level) {
+ break;
+ }
+
+ blocknr = btrfs_node_blockptr(b, slot);
+ gen = btrfs_node_ptr_generation(b, slot);
+ blocksize = btrfs_level_size(root, level - 1);
+
+ tmp = btrfs_find_tree_block(root, blocknr, blocksize);
+ if (tmp && btrfs_buffer_uptodate(tmp, gen)) {
+ b = tmp;
+ } else {
+ /*
+ * reduce lock contention at high levels
+ * of the btree by dropping locks before
+ * we read.
+ */
+ if (level > 1) {
+ btrfs_release_path(NULL, p);
+ if (tmp)
+ free_extent_buffer(tmp);
+ if (should_reada)
+ reada_for_search(root, p,
+ level, slot,
+ key->objectid);
+
+ tmp = read_tree_block(root, blocknr,
+ blocksize, gen);
+ if (tmp)
+ free_extent_buffer(tmp);
+ goto again;
+ } else {
+ if (tmp)
+ free_extent_buffer(tmp);
+ if (should_reada)
+ reada_for_search(root, p,
+ level, slot,
+ key->objectid);
+ b = read_node_slot(root, b, slot);
+ }
+ }
+ if (!p->skip_locking)
+ btrfs_tree_lock(b);
+ } else {
+ p->slots[level] = slot;
+ if (ins_len > 0 && btrfs_leaf_free_space(root, b) <
+ sizeof(struct btrfs_item) + ins_len) {
+ int sret = split_leaf(trans, root, key,
+ p, ins_len, ret == 0);
+ BUG_ON(sret > 0);
+ if (sret) {
+ ret = sret;
+ goto done;
+ }
+ }
+ unlock_up(p, level, lowest_unlock);
+ goto done;
+ }
+ }
+ ret = 1;
+done:
+ if (prealloc_block.objectid) {
+ btrfs_free_reserved_extent(root,
+ prealloc_block.objectid,
+ prealloc_block.offset);
+ }
+
+ return ret;
+}
+
+/*
+ * adjust the pointers going up the tree, starting at level
+ * making sure the right key of each node is points to 'key'.
+ * This is used after shifting pointers to the left, so it stops
+ * fixing up pointers when a given leaf/node is not in slot 0 of the
+ * higher levels
+ *
+ * If this fails to write a tree block, it returns -1, but continues
+ * fixing up the blocks in ram so the tree is consistent.
+ */
+static int fixup_low_keys(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, struct btrfs_path *path,
+ struct btrfs_disk_key *key, int level)
+{
+ int i;
+ int ret = 0;
+ struct extent_buffer *t;
+
+ for (i = level; i < BTRFS_MAX_LEVEL; i++) {
+ int tslot = path->slots[i];
+ if (!path->nodes[i])
+ break;
+ t = path->nodes[i];
+ btrfs_set_node_key(t, key, tslot);
+ btrfs_mark_buffer_dirty(path->nodes[i]);
+ if (tslot != 0)
+ break;
+ }
+ return ret;
+}
+
+/*
+ * update item key.
+ *
+ * This function isn't completely safe. It's the caller's responsibility
+ * that the new key won't break the order
+ */
+int btrfs_set_item_key_safe(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, struct btrfs_path *path,
+ struct btrfs_key *new_key)
+{
+ struct btrfs_disk_key disk_key;
+ struct extent_buffer *eb;
+ int slot;
+
+ eb = path->nodes[0];
+ slot = path->slots[0];
+ if (slot > 0) {
+ btrfs_item_key(eb, &disk_key, slot - 1);
+ if (comp_keys(&disk_key, new_key) >= 0)
+ return -1;
+ }
+ if (slot < btrfs_header_nritems(eb) - 1) {
+ btrfs_item_key(eb, &disk_key, slot + 1);
+ if (comp_keys(&disk_key, new_key) <= 0)
+ return -1;
+ }
+
+ btrfs_cpu_key_to_disk(&disk_key, new_key);
+ btrfs_set_item_key(eb, &disk_key, slot);
+ btrfs_mark_buffer_dirty(eb);
+ if (slot == 0)
+ fixup_low_keys(trans, root, path, &disk_key, 1);
+ return 0;
+}
+
+/*
+ * try to push data from one node into the next node left in the
+ * tree.
+ *
+ * returns 0 if some ptrs were pushed left, < 0 if there was some horrible
+ * error, and > 0 if there was no room in the left hand block.
+ */
+static int push_node_left(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, struct extent_buffer *dst,
+ struct extent_buffer *src, int empty)
+{
+ int push_items = 0;
+ int src_nritems;
+ int dst_nritems;
+ int ret = 0;
+
+ src_nritems = btrfs_header_nritems(src);
+ dst_nritems = btrfs_header_nritems(dst);
+ push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems;
+ WARN_ON(btrfs_header_generation(src) != trans->transid);
+ WARN_ON(btrfs_header_generation(dst) != trans->transid);
+
+ if (!empty && src_nritems <= 8)
+ return 1;
+
+ if (push_items <= 0) {
+ return 1;
+ }
+
+ if (empty) {
+ push_items = min(src_nritems, push_items);
+ if (push_items < src_nritems) {
+ /* leave at least 8 pointers in the node if
+ * we aren't going to empty it
+ */
+ if (src_nritems - push_items < 8) {
+ if (push_items <= 8)
+ return 1;
+ push_items -= 8;
+ }
+ }
+ } else
+ push_items = min(src_nritems - 8, push_items);
+
+ copy_extent_buffer(dst, src,
+ btrfs_node_key_ptr_offset(dst_nritems),
+ btrfs_node_key_ptr_offset(0),
+ push_items * sizeof(struct btrfs_key_ptr));
+
+ if (push_items < src_nritems) {
+ memmove_extent_buffer(src, btrfs_node_key_ptr_offset(0),
+ btrfs_node_key_ptr_offset(push_items),
+ (src_nritems - push_items) *
+ sizeof(struct btrfs_key_ptr));
+ }
+ btrfs_set_header_nritems(src, src_nritems - push_items);
+ btrfs_set_header_nritems(dst, dst_nritems + push_items);
+ btrfs_mark_buffer_dirty(src);
+ btrfs_mark_buffer_dirty(dst);
+
+ ret = btrfs_update_ref(trans, root, src, dst, dst_nritems, push_items);
+ BUG_ON(ret);
+
+ return ret;
+}
+
+/*
+ * try to push data from one node into the next node right in the
+ * tree.
+ *
+ * returns 0 if some ptrs were pushed, < 0 if there was some horrible
+ * error, and > 0 if there was no room in the right hand block.
+ *
+ * this will only push up to 1/2 the contents of the left node over
+ */
+static int balance_node_right(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct extent_buffer *dst,
+ struct extent_buffer *src)
+{
+ int push_items = 0;
+ int max_push;
+ int src_nritems;
+ int dst_nritems;
+ int ret = 0;
+
+ WARN_ON(btrfs_header_generation(src) != trans->transid);
+ WARN_ON(btrfs_header_generation(dst) != trans->transid);
+
+ src_nritems = btrfs_header_nritems(src);
+ dst_nritems = btrfs_header_nritems(dst);
+ push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems;
+ if (push_items <= 0) {
+ return 1;
+ }
+
+ if (src_nritems < 4) {
+ return 1;
+ }
+
+ max_push = src_nritems / 2 + 1;
+ /* don't try to empty the node */
+ if (max_push >= src_nritems) {
+ return 1;
+ }
+
+ if (max_push < push_items)
+ push_items = max_push;
+
+ memmove_extent_buffer(dst, btrfs_node_key_ptr_offset(push_items),
+ btrfs_node_key_ptr_offset(0),
+ (dst_nritems) *
+ sizeof(struct btrfs_key_ptr));
+
+ copy_extent_buffer(dst, src,
+ btrfs_node_key_ptr_offset(0),
+ btrfs_node_key_ptr_offset(src_nritems - push_items),
+ push_items * sizeof(struct btrfs_key_ptr));
+
+ btrfs_set_header_nritems(src, src_nritems - push_items);
+ btrfs_set_header_nritems(dst, dst_nritems + push_items);
+
+ btrfs_mark_buffer_dirty(src);
+ btrfs_mark_buffer_dirty(dst);
+
+ ret = btrfs_update_ref(trans, root, src, dst, 0, push_items);
+ BUG_ON(ret);
+
+ return ret;
+}
+
+/*
+ * helper function to insert a new root level in the tree.
+ * A new node is allocated, and a single item is inserted to
+ * point to the existing root
+ *
+ * returns zero on success or < 0 on failure.
+ */
+static int noinline insert_new_root(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path, int level)
+{
+ u64 lower_gen;
+ struct extent_buffer *lower;
+ struct extent_buffer *c;
+ struct extent_buffer *old;
+ struct btrfs_disk_key lower_key;
+ int ret;
+
+ BUG_ON(path->nodes[level]);
+ BUG_ON(path->nodes[level-1] != root->node);
+
+ lower = path->nodes[level-1];
+ if (level == 1)
+ btrfs_item_key(lower, &lower_key, 0);
+ else
+ btrfs_node_key(lower, &lower_key, 0);
+
+ c = btrfs_alloc_free_block(trans, root, root->nodesize, 0,
+ root->root_key.objectid, trans->transid,
+ level, root->node->start, 0);
+ if (IS_ERR(c))
+ return PTR_ERR(c);
+
+ memset_extent_buffer(c, 0, 0, root->nodesize);
+ btrfs_set_header_nritems(c, 1);
+ btrfs_set_header_level(c, level);
+ btrfs_set_header_bytenr(c, c->start);
+ btrfs_set_header_generation(c, trans->transid);
+ btrfs_set_header_owner(c, root->root_key.objectid);
+
+ write_extent_buffer(c, root->fs_info->fsid,
+ (unsigned long)btrfs_header_fsid(c),
+ BTRFS_FSID_SIZE);
+
+ write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
+ (unsigned long)btrfs_header_chunk_tree_uuid(c),
+ BTRFS_UUID_SIZE);
+
+ btrfs_set_node_key(c, &lower_key, 0);
+ btrfs_set_node_blockptr(c, 0, lower->start);
+ lower_gen = btrfs_header_generation(lower);
+ WARN_ON(lower_gen != trans->transid);
+
+ btrfs_set_node_ptr_generation(c, 0, lower_gen);
+
+ btrfs_mark_buffer_dirty(c);
+
+ spin_lock(&root->node_lock);
+ old = root->node;
+ root->node = c;
+ spin_unlock(&root->node_lock);
+
+ ret = btrfs_update_extent_ref(trans, root, lower->start,
+ lower->start, c->start,
+ root->root_key.objectid,
+ trans->transid, level - 1, 0);
+ BUG_ON(ret);
+
+ /* the super has an extra ref to root->node */
+ free_extent_buffer(old);
+
+ add_root_to_dirty_list(root);
+ extent_buffer_get(c);
+ path->nodes[level] = c;
+ path->locks[level] = 1;
+ path->slots[level] = 0;
+ return 0;
+}
+
+/*
+ * worker function to insert a single pointer in a node.
+ * the node should have enough room for the pointer already
+ *
+ * slot and level indicate where you want the key to go, and
+ * blocknr is the block the key points to.
+ *
+ * returns zero on success and < 0 on any error
+ */
+static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root
+ *root, struct btrfs_path *path, struct btrfs_disk_key
+ *key, u64 bytenr, int slot, int level)
+{
+ struct extent_buffer *lower;
+ int nritems;
+
+ BUG_ON(!path->nodes[level]);
+ lower = path->nodes[level];
+ nritems = btrfs_header_nritems(lower);
+ if (slot > nritems)
+ BUG();
+ if (nritems == BTRFS_NODEPTRS_PER_BLOCK(root))
+ BUG();
+ if (slot != nritems) {
+ memmove_extent_buffer(lower,
+ btrfs_node_key_ptr_offset(slot + 1),
+ btrfs_node_key_ptr_offset(slot),
+ (nritems - slot) * sizeof(struct btrfs_key_ptr));
+ }
+ btrfs_set_node_key(lower, key, slot);
+ btrfs_set_node_blockptr(lower, slot, bytenr);
+ WARN_ON(trans->transid == 0);
+ btrfs_set_node_ptr_generation(lower, slot, trans->transid);
+ btrfs_set_header_nritems(lower, nritems + 1);
+ btrfs_mark_buffer_dirty(lower);
+ return 0;
+}
+
+/*
+ * split the node at the specified level in path in two.
+ * The path is corrected to point to the appropriate node after the split
+ *
+ * Before splitting this tries to make some room in the node by pushing
+ * left and right, if either one works, it returns right away.
+ *
+ * returns 0 on success and < 0 on failure
+ */
+static noinline int split_node(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path, int level)
+{
+ struct extent_buffer *c;
+ struct extent_buffer *split;
+ struct btrfs_disk_key disk_key;
+ int mid;
+ int ret;
+ int wret;
+ u32 c_nritems;
+
+ c = path->nodes[level];
+ WARN_ON(btrfs_header_generation(c) != trans->transid);
+ if (c == root->node) {
+ /* trying to split the root, lets make a new one */
+ ret = insert_new_root(trans, root, path, level + 1);
+ if (ret)
+ return ret;
+ } else {
+ ret = push_nodes_for_insert(trans, root, path, level);
+ c = path->nodes[level];
+ if (!ret && btrfs_header_nritems(c) <
+ BTRFS_NODEPTRS_PER_BLOCK(root) - 3)
+ return 0;
+ if (ret < 0)
+ return ret;
+ }
+
+ c_nritems = btrfs_header_nritems(c);
+
+ split = btrfs_alloc_free_block(trans, root, root->nodesize,
+ path->nodes[level + 1]->start,
+ root->root_key.objectid,
+ trans->transid, level, c->start, 0);
+ if (IS_ERR(split))
+ return PTR_ERR(split);
+
+ btrfs_set_header_flags(split, btrfs_header_flags(c));
+ btrfs_set_header_level(split, btrfs_header_level(c));
+ btrfs_set_header_bytenr(split, split->start);
+ btrfs_set_header_generation(split, trans->transid);
+ btrfs_set_header_owner(split, root->root_key.objectid);
+ btrfs_set_header_flags(split, 0);
+ write_extent_buffer(split, root->fs_info->fsid,
+ (unsigned long)btrfs_header_fsid(split),
+ BTRFS_FSID_SIZE);
+ write_extent_buffer(split, root->fs_info->chunk_tree_uuid,
+ (unsigned long)btrfs_header_chunk_tree_uuid(split),
+ BTRFS_UUID_SIZE);
+
+ mid = (c_nritems + 1) / 2;
+
+ copy_extent_buffer(split, c,
+ btrfs_node_key_ptr_offset(0),
+ btrfs_node_key_ptr_offset(mid),
+ (c_nritems - mid) * sizeof(struct btrfs_key_ptr));
+ btrfs_set_header_nritems(split, c_nritems - mid);
+ btrfs_set_header_nritems(c, mid);
+ ret = 0;
+
+ btrfs_mark_buffer_dirty(c);
+ btrfs_mark_buffer_dirty(split);
+
+ btrfs_node_key(split, &disk_key, 0);
+ wret = insert_ptr(trans, root, path, &disk_key, split->start,
+ path->slots[level + 1] + 1,
+ level + 1);
+ if (wret)
+ ret = wret;
+
+ ret = btrfs_update_ref(trans, root, c, split, 0, c_nritems - mid);
+ BUG_ON(ret);
+
+ if (path->slots[level] >= mid) {
+ path->slots[level] -= mid;
+ btrfs_tree_unlock(c);
+ free_extent_buffer(c);
+ path->nodes[level] = split;
+ path->slots[level + 1] += 1;
+ } else {
+ btrfs_tree_unlock(split);
+ free_extent_buffer(split);
+ }
+ return ret;
+}
+
+/*
+ * how many bytes are required to store the items in a leaf. start
+ * and nr indicate which items in the leaf to check. This totals up the
+ * space used both by the item structs and the item data
+ */
+static int leaf_space_used(struct extent_buffer *l, int start, int nr)
+{
+ int data_len;
+ int nritems = btrfs_header_nritems(l);
+ int end = min(nritems, start + nr) - 1;
+
+ if (!nr)
+ return 0;
+ data_len = btrfs_item_end_nr(l, start);
+ data_len = data_len - btrfs_item_offset_nr(l, end);
+ data_len += sizeof(struct btrfs_item) * nr;
+ WARN_ON(data_len < 0);
+ return data_len;
+}
+
+/*
+ * The space between the end of the leaf items and
+ * the start of the leaf data. IOW, how much room
+ * the leaf has left for both items and data
+ */
+int noinline btrfs_leaf_free_space(struct btrfs_root *root,
+ struct extent_buffer *leaf)
+{
+ int nritems = btrfs_header_nritems(leaf);
+ int ret;
+ ret = BTRFS_LEAF_DATA_SIZE(root) - leaf_space_used(leaf, 0, nritems);
+ if (ret < 0) {
+ printk("leaf free space ret %d, leaf data size %lu, used %d nritems %d\n",
+ ret, (unsigned long) BTRFS_LEAF_DATA_SIZE(root),
+ leaf_space_used(leaf, 0, nritems), nritems);
+ }
+ return ret;
+}
+
+/*
+ * push some data in the path leaf to the right, trying to free up at
+ * least data_size bytes. returns zero if the push worked, nonzero otherwise
+ *
+ * returns 1 if the push failed because the other node didn't have enough
+ * room, 0 if everything worked out and < 0 if there were major errors.
+ */
+static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
+ *root, struct btrfs_path *path, int data_size,
+ int empty)
+{
+ struct extent_buffer *left = path->nodes[0];
+ struct extent_buffer *right;
+ struct extent_buffer *upper;
+ struct btrfs_disk_key disk_key;
+ int slot;
+ u32 i;
+ int free_space;
+ int push_space = 0;
+ int push_items = 0;
+ struct btrfs_item *item;
+ u32 left_nritems;
+ u32 nr;
+ u32 right_nritems;
+ u32 data_end;
+ u32 this_item_size;
+ int ret;
+
+ slot = path->slots[1];
+ if (!path->nodes[1]) {
+ return 1;
+ }
+ upper = path->nodes[1];
+ if (slot >= btrfs_header_nritems(upper) - 1)
+ return 1;
+
+ WARN_ON(!btrfs_tree_locked(path->nodes[1]));
+
+ right = read_node_slot(root, upper, slot + 1);
+ btrfs_tree_lock(right);
+ free_space = btrfs_leaf_free_space(root, right);
+ if (free_space < data_size + sizeof(struct btrfs_item))
+ goto out_unlock;
+
+ /* cow and double check */
+ ret = btrfs_cow_block(trans, root, right, upper,
+ slot + 1, &right, 0);
+ if (ret)
+ goto out_unlock;
+
+ free_space = btrfs_leaf_free_space(root, right);
+ if (free_space < data_size + sizeof(struct btrfs_item))
+ goto out_unlock;
+
+ left_nritems = btrfs_header_nritems(left);
+ if (left_nritems == 0)
+ goto out_unlock;
+
+ if (empty)
+ nr = 0;
+ else
+ nr = 1;
+
+ if (path->slots[0] >= left_nritems)
+ push_space += data_size + sizeof(*item);
+
+ i = left_nritems - 1;
+ while (i >= nr) {
+ item = btrfs_item_nr(left, i);
+
+ if (!empty && push_items > 0) {
+ if (path->slots[0] > i)
+ break;
+ if (path->slots[0] == i) {
+ int space = btrfs_leaf_free_space(root, left);
+ if (space + push_space * 2 > free_space)
+ break;
+ }
+ }
+
+ if (path->slots[0] == i)
+ push_space += data_size + sizeof(*item);
+
+ if (!left->map_token) {
+ map_extent_buffer(left, (unsigned long)item,
+ sizeof(struct btrfs_item),
+ &left->map_token, &left->kaddr,
+ &left->map_start, &left->map_len,
+ KM_USER1);
+ }
+
+ this_item_size = btrfs_item_size(left, item);
+ if (this_item_size + sizeof(*item) + push_space > free_space)
+ break;
+
+ push_items++;
+ push_space += this_item_size + sizeof(*item);
+ if (i == 0)
+ break;
+ i--;
+ }
+ if (left->map_token) {
+ unmap_extent_buffer(left, left->map_token, KM_USER1);
+ left->map_token = NULL;
+ }
+
+ if (push_items == 0)
+ goto out_unlock;
+
+ if (!empty && push_items == left_nritems)
+ WARN_ON(1);
+
+ /* push left to right */
+ right_nritems = btrfs_header_nritems(right);
+
+ push_space = btrfs_item_end_nr(left, left_nritems - push_items);
+ push_space -= leaf_data_end(root, left);
+
+ /* make room in the right data area */
+ data_end = leaf_data_end(root, right);
+ memmove_extent_buffer(right,
+ btrfs_leaf_data(right) + data_end - push_space,
+ btrfs_leaf_data(right) + data_end,
+ BTRFS_LEAF_DATA_SIZE(root) - data_end);
+
+ /* copy from the left data area */
+ copy_extent_buffer(right, left, btrfs_leaf_data(right) +
+ BTRFS_LEAF_DATA_SIZE(root) - push_space,
+ btrfs_leaf_data(left) + leaf_data_end(root, left),
+ push_space);
+
+ memmove_extent_buffer(right, btrfs_item_nr_offset(push_items),
+ btrfs_item_nr_offset(0),
+ right_nritems * sizeof(struct btrfs_item));
+
+ /* copy the items from left to right */
+ copy_extent_buffer(right, left, btrfs_item_nr_offset(0),
+ btrfs_item_nr_offset(left_nritems - push_items),
+ push_items * sizeof(struct btrfs_item));
+
+ /* update the item pointers */
+ right_nritems += push_items;
+ btrfs_set_header_nritems(right, right_nritems);
+ push_space = BTRFS_LEAF_DATA_SIZE(root);
+ for (i = 0; i < right_nritems; i++) {
+ item = btrfs_item_nr(right, i);
+ if (!right->map_token) {
+ map_extent_buffer(right, (unsigned long)item,
+ sizeof(struct btrfs_item),
+ &right->map_token, &right->kaddr,
+ &right->map_start, &right->map_len,
+ KM_USER1);
+ }
+ push_space -= btrfs_item_size(right, item);
+ btrfs_set_item_offset(right, item, push_space);
+ }
+
+ if (right->map_token) {
+ unmap_extent_buffer(right, right->map_token, KM_USER1);
+ right->map_token = NULL;
+ }
+ left_nritems -= push_items;
+ btrfs_set_header_nritems(left, left_nritems);
+
+ if (left_nritems)
+ btrfs_mark_buffer_dirty(left);
+ btrfs_mark_buffer_dirty(right);
+
+ ret = btrfs_update_ref(trans, root, left, right, 0, push_items);
+ BUG_ON(ret);
+
+ btrfs_item_key(right, &disk_key, 0);
+ btrfs_set_node_key(upper, &disk_key, slot + 1);
+ btrfs_mark_buffer_dirty(upper);
+
+ /* then fixup the leaf pointer in the path */
+ if (path->slots[0] >= left_nritems) {
+ path->slots[0] -= left_nritems;
+ if (btrfs_header_nritems(path->nodes[0]) == 0)
+ clean_tree_block(trans, root, path->nodes[0]);
+ btrfs_tree_unlock(path->nodes[0]);
+ free_extent_buffer(path->nodes[0]);
+ path->nodes[0] = right;
+ path->slots[1] += 1;
+ } else {
+ btrfs_tree_unlock(right);
+ free_extent_buffer(right);
+ }
+ return 0;
+
+out_unlock:
+ btrfs_tree_unlock(right);
+ free_extent_buffer(right);
+ return 1;
+}
+
+/*
+ * push some data in the path leaf to the left, trying to free up at
+ * least data_size bytes. returns zero if the push worked, nonzero otherwise
+ */
+static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
+ *root, struct btrfs_path *path, int data_size,
+ int empty)
+{
+ struct btrfs_disk_key disk_key;
+ struct extent_buffer *right = path->nodes[0];
+ struct extent_buffer *left;
+ int slot;
+ int i;
+ int free_space;
+ int push_space = 0;
+ int push_items = 0;
+ struct btrfs_item *item;
+ u32 old_left_nritems;
+ u32 right_nritems;
+ u32 nr;
+ int ret = 0;
+ int wret;
+ u32 this_item_size;
+ u32 old_left_item_size;
+
+ slot = path->slots[1];
+ if (slot == 0)
+ return 1;
+ if (!path->nodes[1])
+ return 1;
+
+ right_nritems = btrfs_header_nritems(right);
+ if (right_nritems == 0) {
+ return 1;
+ }
+
+ WARN_ON(!btrfs_tree_locked(path->nodes[1]));
+
+ left = read_node_slot(root, path->nodes[1], slot - 1);
+ btrfs_tree_lock(left);
+ free_space = btrfs_leaf_free_space(root, left);
+ if (free_space < data_size + sizeof(struct btrfs_item)) {
+ ret = 1;
+ goto out;
+ }
+
+ /* cow and double check */
+ ret = btrfs_cow_block(trans, root, left,
+ path->nodes[1], slot - 1, &left, 0);
+ if (ret) {
+ /* we hit -ENOSPC, but it isn't fatal here */
+ ret = 1;
+ goto out;
+ }
+
+ free_space = btrfs_leaf_free_space(root, left);
+ if (free_space < data_size + sizeof(struct btrfs_item)) {
+ ret = 1;
+ goto out;
+ }
+
+ if (empty)
+ nr = right_nritems;
+ else
+ nr = right_nritems - 1;
+
+ for (i = 0; i < nr; i++) {
+ item = btrfs_item_nr(right, i);
+ if (!right->map_token) {
+ map_extent_buffer(right, (unsigned long)item,
+ sizeof(struct btrfs_item),
+ &right->map_token, &right->kaddr,
+ &right->map_start, &right->map_len,
+ KM_USER1);
+ }
+
+ if (!empty && push_items > 0) {
+ if (path->slots[0] < i)
+ break;
+ if (path->slots[0] == i) {
+ int space = btrfs_leaf_free_space(root, right);
+ if (space + push_space * 2 > free_space)
+ break;
+ }
+ }
+
+ if (path->slots[0] == i)
+ push_space += data_size + sizeof(*item);
+
+ this_item_size = btrfs_item_size(right, item);
+ if (this_item_size + sizeof(*item) + push_space > free_space)
+ break;
+
+ push_items++;
+ push_space += this_item_size + sizeof(*item);
+ }
+
+ if (right->map_token) {
+ unmap_extent_buffer(right, right->map_token, KM_USER1);
+ right->map_token = NULL;
+ }
+
+ if (push_items == 0) {
+ ret = 1;
+ goto out;
+ }
+ if (!empty && push_items == btrfs_header_nritems(right))
+ WARN_ON(1);
+
+ /* push data from right to left */
+ copy_extent_buffer(left, right,
+ btrfs_item_nr_offset(btrfs_header_nritems(left)),
+ btrfs_item_nr_offset(0),
+ push_items * sizeof(struct btrfs_item));
+
+ push_space = BTRFS_LEAF_DATA_SIZE(root) -
+ btrfs_item_offset_nr(right, push_items -1);
+
+ copy_extent_buffer(left, right, btrfs_leaf_data(left) +
+ leaf_data_end(root, left) - push_space,
+ btrfs_leaf_data(right) +
+ btrfs_item_offset_nr(right, push_items - 1),
+ push_space);
+ old_left_nritems = btrfs_header_nritems(left);
+ BUG_ON(old_left_nritems < 0);
+
+ old_left_item_size = btrfs_item_offset_nr(left, old_left_nritems - 1);
+ for (i = old_left_nritems; i < old_left_nritems + push_items; i++) {
+ u32 ioff;
+
+ item = btrfs_item_nr(left, i);
+ if (!left->map_token) {
+ map_extent_buffer(left, (unsigned long)item,
+ sizeof(struct btrfs_item),
+ &left->map_token, &left->kaddr,
+ &left->map_start, &left->map_len,
+ KM_USER1);
+ }
+
+ ioff = btrfs_item_offset(left, item);
+ btrfs_set_item_offset(left, item,
+ ioff - (BTRFS_LEAF_DATA_SIZE(root) - old_left_item_size));
+ }
+ btrfs_set_header_nritems(left, old_left_nritems + push_items);
+ if (left->map_token) {
+ unmap_extent_buffer(left, left->map_token, KM_USER1);
+ left->map_token = NULL;
+ }
+
+ /* fixup right node */
+ if (push_items > right_nritems) {
+ printk("push items %d nr %u\n", push_items, right_nritems);
+ WARN_ON(1);
+ }
+
+ if (push_items < right_nritems) {
+ push_space = btrfs_item_offset_nr(right, push_items - 1) -
+ leaf_data_end(root, right);
+ memmove_extent_buffer(right, btrfs_leaf_data(right) +
+ BTRFS_LEAF_DATA_SIZE(root) - push_space,
+ btrfs_leaf_data(right) +
+ leaf_data_end(root, right), push_space);
+
+ memmove_extent_buffer(right, btrfs_item_nr_offset(0),
+ btrfs_item_nr_offset(push_items),
+ (btrfs_header_nritems(right) - push_items) *
+ sizeof(struct btrfs_item));
+ }
+ right_nritems -= push_items;
+ btrfs_set_header_nritems(right, right_nritems);
+ push_space = BTRFS_LEAF_DATA_SIZE(root);
+ for (i = 0; i < right_nritems; i++) {
+ item = btrfs_item_nr(right, i);
+
+ if (!right->map_token) {
+ map_extent_buffer(right, (unsigned long)item,
+ sizeof(struct btrfs_item),
+ &right->map_token, &right->kaddr,
+ &right->map_start, &right->map_len,
+ KM_USER1);
+ }
+
+ push_space = push_space - btrfs_item_size(right, item);
+ btrfs_set_item_offset(right, item, push_space);
+ }
+ if (right->map_token) {
+ unmap_extent_buffer(right, right->map_token, KM_USER1);
+ right->map_token = NULL;
+ }
+
+ btrfs_mark_buffer_dirty(left);
+ if (right_nritems)
+ btrfs_mark_buffer_dirty(right);
+
+ ret = btrfs_update_ref(trans, root, right, left,
+ old_left_nritems, push_items);
+ BUG_ON(ret);
+
+ btrfs_item_key(right, &disk_key, 0);
+ wret = fixup_low_keys(trans, root, path, &disk_key, 1);
+ if (wret)
+ ret = wret;
+
+ /* then fixup the leaf pointer in the path */
+ if (path->slots[0] < push_items) {
+ path->slots[0] += old_left_nritems;
+ if (btrfs_header_nritems(path->nodes[0]) == 0)
+ clean_tree_block(trans, root, path->nodes[0]);
+ btrfs_tree_unlock(path->nodes[0]);
+ free_extent_buffer(path->nodes[0]);
+ path->nodes[0] = left;
+ path->slots[1] -= 1;
+ } else {
+ btrfs_tree_unlock(left);
+ free_extent_buffer(left);
+ path->slots[0] -= push_items;
+ }
+ BUG_ON(path->slots[0] < 0);
+ return ret;
+out:
+ btrfs_tree_unlock(left);
+ free_extent_buffer(left);
+ return ret;
+}
+
+/*
+ * split the path's leaf in two, making sure there is at least data_size
+ * available for the resulting leaf level of the path.
+ *
+ * returns 0 if all went well and < 0 on failure.
+ */
+static noinline int split_leaf(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_key *ins_key,
+ struct btrfs_path *path, int data_size,
+ int extend)
+{
+ struct extent_buffer *l;
+ u32 nritems;
+ int mid;
+ int slot;
+ struct extent_buffer *right;
+ int space_needed = data_size + sizeof(struct btrfs_item);
+ int data_copy_size;
+ int rt_data_off;
+ int i;
+ int ret = 0;
+ int wret;
+ int double_split;
+ int num_doubles = 0;
+ struct btrfs_disk_key disk_key;
+
+ if (extend)
+ space_needed = data_size;
+
+ /* first try to make some room by pushing left and right */
+ if (ins_key->type != BTRFS_DIR_ITEM_KEY) {
+ wret = push_leaf_right(trans, root, path, data_size, 0);
+ if (wret < 0) {
+ return wret;
+ }
+ if (wret) {
+ wret = push_leaf_left(trans, root, path, data_size, 0);
+ if (wret < 0)
+ return wret;
+ }
+ l = path->nodes[0];
+
+ /* did the pushes work? */
+ if (btrfs_leaf_free_space(root, l) >= space_needed)
+ return 0;
+ }
+
+ if (!path->nodes[1]) {
+ ret = insert_new_root(trans, root, path, 1);
+ if (ret)
+ return ret;
+ }
+again:
+ double_split = 0;
+ l = path->nodes[0];
+ slot = path->slots[0];
+ nritems = btrfs_header_nritems(l);
+ mid = (nritems + 1)/ 2;
+
+ right = btrfs_alloc_free_block(trans, root, root->leafsize,
+ path->nodes[1]->start,
+ root->root_key.objectid,
+ trans->transid, 0, l->start, 0);
+ if (IS_ERR(right)) {
+ BUG_ON(1);
+ return PTR_ERR(right);
+ }
+
+ memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header));
+ btrfs_set_header_bytenr(right, right->start);
+ btrfs_set_header_generation(right, trans->transid);
+ btrfs_set_header_owner(right, root->root_key.objectid);
+ btrfs_set_header_level(right, 0);
+ write_extent_buffer(right, root->fs_info->fsid,
+ (unsigned long)btrfs_header_fsid(right),
+ BTRFS_FSID_SIZE);
+
+ write_extent_buffer(right, root->fs_info->chunk_tree_uuid,
+ (unsigned long)btrfs_header_chunk_tree_uuid(right),
+ BTRFS_UUID_SIZE);
+ if (mid <= slot) {
+ if (nritems == 1 ||
+ leaf_space_used(l, mid, nritems - mid) + space_needed >
+ BTRFS_LEAF_DATA_SIZE(root)) {
+ if (slot >= nritems) {
+ btrfs_cpu_key_to_disk(&disk_key, ins_key);
+ btrfs_set_header_nritems(right, 0);
+ wret = insert_ptr(trans, root, path,
+ &disk_key, right->start,
+ path->slots[1] + 1, 1);
+ if (wret)
+ ret = wret;
+
+ btrfs_tree_unlock(path->nodes[0]);
+ free_extent_buffer(path->nodes[0]);
+ path->nodes[0] = right;
+ path->slots[0] = 0;
+ path->slots[1] += 1;
+ btrfs_mark_buffer_dirty(right);
+ return ret;
+ }
+ mid = slot;
+ if (mid != nritems &&
+ leaf_space_used(l, mid, nritems - mid) +
+ space_needed > BTRFS_LEAF_DATA_SIZE(root)) {
+ double_split = 1;
+ }
+ }
+ } else {
+ if (leaf_space_used(l, 0, mid + 1) + space_needed >
+ BTRFS_LEAF_DATA_SIZE(root)) {
+ if (!extend && slot == 0) {
+ btrfs_cpu_key_to_disk(&disk_key, ins_key);
+ btrfs_set_header_nritems(right, 0);
+ wret = insert_ptr(trans, root, path,
+ &disk_key,
+ right->start,
+ path->slots[1], 1);
+ if (wret)
+ ret = wret;
+ btrfs_tree_unlock(path->nodes[0]);
+ free_extent_buffer(path->nodes[0]);
+ path->nodes[0] = right;
+ path->slots[0] = 0;
+ if (path->slots[1] == 0) {
+ wret = fixup_low_keys(trans, root,
+ path, &disk_key, 1);
+ if (wret)
+ ret = wret;
+ }
+ btrfs_mark_buffer_dirty(right);
+ return ret;
+ } else if (extend && slot == 0) {
+ mid = 1;
+ } else {
+ mid = slot;
+ if (mid != nritems &&
+ leaf_space_used(l, mid, nritems - mid) +
+ space_needed > BTRFS_LEAF_DATA_SIZE(root)) {
+ double_split = 1;
+ }
+ }
+ }
+ }
+ nritems = nritems - mid;
+ btrfs_set_header_nritems(right, nritems);
+ data_copy_size = btrfs_item_end_nr(l, mid) - leaf_data_end(root, l);
+
+ copy_extent_buffer(right, l, btrfs_item_nr_offset(0),
+ btrfs_item_nr_offset(mid),
+ nritems * sizeof(struct btrfs_item));
+
+ copy_extent_buffer(right, l,
+ btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) -
+ data_copy_size, btrfs_leaf_data(l) +
+ leaf_data_end(root, l), data_copy_size);
+
+ rt_data_off = BTRFS_LEAF_DATA_SIZE(root) -
+ btrfs_item_end_nr(l, mid);
+
+ for (i = 0; i < nritems; i++) {
+ struct btrfs_item *item = btrfs_item_nr(right, i);
+ u32 ioff;
+
+ if (!right->map_token) {
+ map_extent_buffer(right, (unsigned long)item,
+ sizeof(struct btrfs_item),
+ &right->map_token, &right->kaddr,
+ &right->map_start, &right->map_len,
+ KM_USER1);
+ }
+
+ ioff = btrfs_item_offset(right, item);
+ btrfs_set_item_offset(right, item, ioff + rt_data_off);
+ }
+
+ if (right->map_token) {
+ unmap_extent_buffer(right, right->map_token, KM_USER1);
+ right->map_token = NULL;
+ }
+
+ btrfs_set_header_nritems(l, mid);
+ ret = 0;
+ btrfs_item_key(right, &disk_key, 0);
+ wret = insert_ptr(trans, root, path, &disk_key, right->start,
+ path->slots[1] + 1, 1);
+ if (wret)
+ ret = wret;
+
+ btrfs_mark_buffer_dirty(right);
+ btrfs_mark_buffer_dirty(l);
+ BUG_ON(path->slots[0] != slot);
+
+ ret = btrfs_update_ref(trans, root, l, right, 0, nritems);
+ BUG_ON(ret);
+
+ if (mid <= slot) {
+ btrfs_tree_unlock(path->nodes[0]);
+ free_extent_buffer(path->nodes[0]);
+ path->nodes[0] = right;
+ path->slots[0] -= mid;
+ path->slots[1] += 1;
+ } else {
+ btrfs_tree_unlock(right);
+ free_extent_buffer(right);
+ }
+
+ BUG_ON(path->slots[0] < 0);
+
+ if (double_split) {
+ BUG_ON(num_doubles != 0);
+ num_doubles++;
+ goto again;
+ }
+ return ret;
+}
+
+int btrfs_truncate_item(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path,
+ u32 new_size, int from_end)
+{
+ int ret = 0;
+ int slot;
+ int slot_orig;
+ struct extent_buffer *leaf;
+ struct btrfs_item *item;
+ u32 nritems;
+ unsigned int data_end;
+ unsigned int old_data_start;
+ unsigned int old_size;
+ unsigned int size_diff;
+ int i;
+
+ slot_orig = path->slots[0];
+ leaf = path->nodes[0];
+ slot = path->slots[0];
+
+ old_size = btrfs_item_size_nr(leaf, slot);
+ if (old_size == new_size)
+ return 0;
+
+ nritems = btrfs_header_nritems(leaf);
+ data_end = leaf_data_end(root, leaf);
+
+ old_data_start = btrfs_item_offset_nr(leaf, slot);
+
+ size_diff = old_size - new_size;
+
+ BUG_ON(slot < 0);
+ BUG_ON(slot >= nritems);
+
+ /*
+ * item0..itemN ... dataN.offset..dataN.size .. data0.size
+ */
+ /* first correct the data pointers */
+ for (i = slot; i < nritems; i++) {
+ u32 ioff;
+ item = btrfs_item_nr(leaf, i);
+
+ if (!leaf->map_token) {
+ map_extent_buffer(leaf, (unsigned long)item,
+ sizeof(struct btrfs_item),
+ &leaf->map_token, &leaf->kaddr,
+ &leaf->map_start, &leaf->map_len,
+ KM_USER1);
+ }
+
+ ioff = btrfs_item_offset(leaf, item);
+ btrfs_set_item_offset(leaf, item, ioff + size_diff);
+ }
+
+ if (leaf->map_token) {
+ unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
+ leaf->map_token = NULL;
+ }
+
+ /* shift the data */
+ if (from_end) {
+ memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
+ data_end + size_diff, btrfs_leaf_data(leaf) +
+ data_end, old_data_start + new_size - data_end);
+ } else {
+ struct btrfs_disk_key disk_key;
+ u64 offset;
+
+ btrfs_item_key(leaf, &disk_key, slot);
+
+ if (btrfs_disk_key_type(&disk_key) == BTRFS_EXTENT_DATA_KEY) {
+ unsigned long ptr;
+ struct btrfs_file_extent_item *fi;
+
+ fi = btrfs_item_ptr(leaf, slot,
+ struct btrfs_file_extent_item);
+ fi = (struct btrfs_file_extent_item *)(
+ (unsigned long)fi - size_diff);
+
+ if (btrfs_file_extent_type(leaf, fi) ==
+ BTRFS_FILE_EXTENT_INLINE) {
+ ptr = btrfs_item_ptr_offset(leaf, slot);
+ memmove_extent_buffer(leaf, ptr,
+ (unsigned long)fi,
+ offsetof(struct btrfs_file_extent_item,
+ disk_bytenr));
+ }
+ }
+
+ memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
+ data_end + size_diff, btrfs_leaf_data(leaf) +
+ data_end, old_data_start - data_end);
+
+ offset = btrfs_disk_key_offset(&disk_key);
+ btrfs_set_disk_key_offset(&disk_key, offset + size_diff);
+ btrfs_set_item_key(leaf, &disk_key, slot);
+ if (slot == 0)
+ fixup_low_keys(trans, root, path, &disk_key, 1);
+ }
+
+ item = btrfs_item_nr(leaf, slot);
+ btrfs_set_item_size(leaf, item, new_size);
+ btrfs_mark_buffer_dirty(leaf);
+
+ ret = 0;
+ if (btrfs_leaf_free_space(root, leaf) < 0) {
+ btrfs_print_leaf(root, leaf);
+ BUG();
+ }
+ return ret;
+}
+
+int btrfs_extend_item(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, struct btrfs_path *path,
+ u32 data_size)
+{
+ int ret = 0;
+ int slot;
+ int slot_orig;
+ struct extent_buffer *leaf;
+ struct btrfs_item *item;
+ u32 nritems;
+ unsigned int data_end;
+ unsigned int old_data;
+ unsigned int old_size;
+ int i;
+
+ slot_orig = path->slots[0];
+ leaf = path->nodes[0];
+
+ nritems = btrfs_header_nritems(leaf);
+ data_end = leaf_data_end(root, leaf);
+
+ if (btrfs_leaf_free_space(root, leaf) < data_size) {
+ btrfs_print_leaf(root, leaf);
+ BUG();
+ }
+ slot = path->slots[0];
+ old_data = btrfs_item_end_nr(leaf, slot);
+
+ BUG_ON(slot < 0);
+ if (slot >= nritems) {
+ btrfs_print_leaf(root, leaf);
+ printk("slot %d too large, nritems %d\n", slot, nritems);
+ BUG_ON(1);
+ }
+
+ /*
+ * item0..itemN ... dataN.offset..dataN.size .. data0.size
+ */
+ /* first correct the data pointers */
+ for (i = slot; i < nritems; i++) {
+ u32 ioff;
+ item = btrfs_item_nr(leaf, i);
+
+ if (!leaf->map_token) {
+ map_extent_buffer(leaf, (unsigned long)item,
+ sizeof(struct btrfs_item),
+ &leaf->map_token, &leaf->kaddr,
+ &leaf->map_start, &leaf->map_len,
+ KM_USER1);
+ }
+ ioff = btrfs_item_offset(leaf, item);
+ btrfs_set_item_offset(leaf, item, ioff - data_size);
+ }
+
+ if (leaf->map_token) {
+ unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
+ leaf->map_token = NULL;
+ }
+
+ /* shift the data */
+ memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
+ data_end - data_size, btrfs_leaf_data(leaf) +
+ data_end, old_data - data_end);
+
+ data_end = old_data;
+ old_size = btrfs_item_size_nr(leaf, slot);
+ item = btrfs_item_nr(leaf, slot);
+ btrfs_set_item_size(leaf, item, old_size + data_size);
+ btrfs_mark_buffer_dirty(leaf);
+
+ ret = 0;
+ if (btrfs_leaf_free_space(root, leaf) < 0) {
+ btrfs_print_leaf(root, leaf);
+ BUG();
+ }
+ return ret;
+}
+
+/*
+ * Given a key and some data, insert an item into the tree.
+ * This does all the path init required, making room in the tree if needed.
+ */
+int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path,
+ struct btrfs_key *cpu_key, u32 *data_size,
+ int nr)
+{
+ struct extent_buffer *leaf;
+ struct btrfs_item *item;
+ int ret = 0;
+ int slot;
+ int slot_orig;
+ int i;
+ u32 nritems;
+ u32 total_size = 0;
+ u32 total_data = 0;
+ unsigned int data_end;
+ struct btrfs_disk_key disk_key;
+
+ for (i = 0; i < nr; i++) {
+ total_data += data_size[i];
+ }
+
+ total_size = total_data + (nr * sizeof(struct btrfs_item));
+ ret = btrfs_search_slot(trans, root, cpu_key, path, total_size, 1);
+ if (ret == 0)
+ return -EEXIST;
+ if (ret < 0)
+ goto out;
+
+ slot_orig = path->slots[0];
+ leaf = path->nodes[0];
+
+ nritems = btrfs_header_nritems(leaf);
+ data_end = leaf_data_end(root, leaf);
+
+ if (btrfs_leaf_free_space(root, leaf) < total_size) {
+ btrfs_print_leaf(root, leaf);
+ printk("not enough freespace need %u have %d\n",
+ total_size, btrfs_leaf_free_space(root, leaf));
+ BUG();
+ }
+
+ slot = path->slots[0];
+ BUG_ON(slot < 0);
+
+ if (slot != nritems) {
+ unsigned int old_data = btrfs_item_end_nr(leaf, slot);
+
+ if (old_data < data_end) {
+ btrfs_print_leaf(root, leaf);
+ printk("slot %d old_data %d data_end %d\n",
+ slot, old_data, data_end);
+ BUG_ON(1);
+ }
+ /*
+ * item0..itemN ... dataN.offset..dataN.size .. data0.size
+ */
+ /* first correct the data pointers */
+ WARN_ON(leaf->map_token);
+ for (i = slot; i < nritems; i++) {
+ u32 ioff;
+
+ item = btrfs_item_nr(leaf, i);
+ if (!leaf->map_token) {
+ map_extent_buffer(leaf, (unsigned long)item,
+ sizeof(struct btrfs_item),
+ &leaf->map_token, &leaf->kaddr,
+ &leaf->map_start, &leaf->map_len,
+ KM_USER1);
+ }
+
+ ioff = btrfs_item_offset(leaf, item);
+ btrfs_set_item_offset(leaf, item, ioff - total_data);
+ }
+ if (leaf->map_token) {
+ unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
+ leaf->map_token = NULL;
+ }
+
+ /* shift the items */
+ memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr),
+ btrfs_item_nr_offset(slot),
+ (nritems - slot) * sizeof(struct btrfs_item));
+
+ /* shift the data */
+ memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
+ data_end - total_data, btrfs_leaf_data(leaf) +
+ data_end, old_data - data_end);
+ data_end = old_data;
+ }
+
+ /* setup the item for the new data */
+ for (i = 0; i < nr; i++) {
+ btrfs_cpu_key_to_disk(&disk_key, cpu_key + i);
+ btrfs_set_item_key(leaf, &disk_key, slot + i);
+ item = btrfs_item_nr(leaf, slot + i);
+ btrfs_set_item_offset(leaf, item, data_end - data_size[i]);
+ data_end -= data_size[i];
+ btrfs_set_item_size(leaf, item, data_size[i]);
+ }
+ btrfs_set_header_nritems(leaf, nritems + nr);
+ btrfs_mark_buffer_dirty(leaf);
+
+ ret = 0;
+ if (slot == 0) {
+ btrfs_cpu_key_to_disk(&disk_key, cpu_key);
+ ret = fixup_low_keys(trans, root, path, &disk_key, 1);
+ }
+
+ if (btrfs_leaf_free_space(root, leaf) < 0) {
+ btrfs_print_leaf(root, leaf);
+ BUG();
+ }
+out:
+ return ret;
+}
+
+/*
+ * Given a key and some data, insert an item into the tree.
+ * This does all the path init required, making room in the tree if needed.
+ */
+int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root
+ *root, struct btrfs_key *cpu_key, void *data, u32
+ data_size)
+{
+ int ret = 0;
+ struct btrfs_path *path;
+ struct extent_buffer *leaf;
+ unsigned long ptr;
+
+ path = btrfs_alloc_path();
+ BUG_ON(!path);
+ ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size);
+ if (!ret) {
+ leaf = path->nodes[0];
+ ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
+ write_extent_buffer(leaf, data, ptr, data_size);
+ btrfs_mark_buffer_dirty(leaf);
+ }
+ btrfs_free_path(path);
+ return ret;
+}
+
+/*
+ * delete the pointer from a given node.
+ *
+ * If the delete empties a node, the node is removed from the tree,
+ * continuing all the way the root if required. The root is converted into
+ * a leaf if all the nodes are emptied.
+ */
+static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
+ struct btrfs_path *path, int level, int slot)
+{
+ struct extent_buffer *parent = path->nodes[level];
+ u32 nritems;
+ int ret = 0;
+ int wret;
+
+ nritems = btrfs_header_nritems(parent);
+ if (slot != nritems -1) {
+ memmove_extent_buffer(parent,
+ btrfs_node_key_ptr_offset(slot),
+ btrfs_node_key_ptr_offset(slot + 1),
+ sizeof(struct btrfs_key_ptr) *
+ (nritems - slot - 1));
+ }
+ nritems--;
+ btrfs_set_header_nritems(parent, nritems);
+ if (nritems == 0 && parent == root->node) {
+ BUG_ON(btrfs_header_level(root->node) != 1);
+ /* just turn the root into a leaf and break */
+ btrfs_set_header_level(root->node, 0);
+ } else if (slot == 0) {
+ struct btrfs_disk_key disk_key;
+
+ btrfs_node_key(parent, &disk_key, 0);
+ wret = fixup_low_keys(trans, root, path, &disk_key, level + 1);
+ if (wret)
+ ret = wret;
+ }
+ btrfs_mark_buffer_dirty(parent);
+ return ret;
+}
+
+/*
+ * delete the item at the leaf level in path. If that empties
+ * the leaf, remove it from the tree
+ */
+int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
+ struct btrfs_path *path, int slot, int nr)
+{
+ struct extent_buffer *leaf;
+ struct btrfs_item *item;
+ int last_off;
+ int dsize = 0;
+ int ret = 0;
+ int wret;
+ int i;
+ u32 nritems;
+
+ leaf = path->nodes[0];
+ last_off = btrfs_item_offset_nr(leaf, slot + nr - 1);
+
+ for (i = 0; i < nr; i++)
+ dsize += btrfs_item_size_nr(leaf, slot + i);
+
+ nritems = btrfs_header_nritems(leaf);
+
+ if (slot + nr != nritems) {
+ int data_end = leaf_data_end(root, leaf);
+
+ memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
+ data_end + dsize,
+ btrfs_leaf_data(leaf) + data_end,
+ last_off - data_end);
+
+ for (i = slot + nr; i < nritems; i++) {
+ u32 ioff;
+
+ item = btrfs_item_nr(leaf, i);
+ if (!leaf->map_token) {
+ map_extent_buffer(leaf, (unsigned long)item,
+ sizeof(struct btrfs_item),
+ &leaf->map_token, &leaf->kaddr,
+ &leaf->map_start, &leaf->map_len,
+ KM_USER1);
+ }
+ ioff = btrfs_item_offset(leaf, item);
+ btrfs_set_item_offset(leaf, item, ioff + dsize);
+ }
+
+ if (leaf->map_token) {
+ unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
+ leaf->map_token = NULL;
+ }
+
+ memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot),
+ btrfs_item_nr_offset(slot + nr),
+ sizeof(struct btrfs_item) *
+ (nritems - slot - nr));
+ }
+ btrfs_set_header_nritems(leaf, nritems - nr);
+ nritems -= nr;
+
+ /* delete the leaf if we've emptied it */
+ if (nritems == 0) {
+ if (leaf == root->node) {
+ btrfs_set_header_level(leaf, 0);
+ } else {
+ u64 root_gen = btrfs_header_generation(path->nodes[1]);
+ wret = del_ptr(trans, root, path, 1, path->slots[1]);
+ if (wret)
+ ret = wret;
+ wret = btrfs_free_extent(trans, root,
+ leaf->start, leaf->len,
+ path->nodes[1]->start,
+ btrfs_header_owner(path->nodes[1]),
+ root_gen, 0, 0, 1);
+ if (wret)
+ ret = wret;
+ }
+ } else {
+ int used = leaf_space_used(leaf, 0, nritems);
+ if (slot == 0) {
+ struct btrfs_disk_key disk_key;
+
+ btrfs_item_key(leaf, &disk_key, 0);
+ wret = fixup_low_keys(trans, root, path,
+ &disk_key, 1);
+ if (wret)
+ ret = wret;
+ }
+
+ /* delete the leaf if it is mostly empty */
+ if (used < BTRFS_LEAF_DATA_SIZE(root) / 4) {
+ /* push_leaf_left fixes the path.
+ * make sure the path still points to our leaf
+ * for possible call to del_ptr below
+ */
+ slot = path->slots[1];
+ extent_buffer_get(leaf);
+
+ wret = push_leaf_left(trans, root, path, 1, 1);
+ if (wret < 0 && wret != -ENOSPC)
+ ret = wret;
+
+ if (path->nodes[0] == leaf &&
+ btrfs_header_nritems(leaf)) {
+ wret = push_leaf_right(trans, root, path, 1, 1);
+ if (wret < 0 && wret != -ENOSPC)
+ ret = wret;
+ }
+
+ if (btrfs_header_nritems(leaf) == 0) {
+ u64 root_gen;
+ u64 bytenr = leaf->start;
+ u32 blocksize = leaf->len;
+
+ root_gen = btrfs_header_generation(
+ path->nodes[1]);
+
+ wret = del_ptr(trans, root, path, 1, slot);
+ if (wret)
+ ret = wret;
+
+ free_extent_buffer(leaf);
+ wret = btrfs_free_extent(trans, root, bytenr,
+ blocksize, path->nodes[1]->start,
+ btrfs_header_owner(path->nodes[1]),
+ root_gen, 0, 0, 1);
+ if (wret)
+ ret = wret;
+ } else {
+ /* if we're still in the path, make sure
+ * we're dirty. Otherwise, one of the
+ * push_leaf functions must have already
+ * dirtied this buffer
+ */
+ if (path->nodes[0] == leaf)
+ btrfs_mark_buffer_dirty(leaf);
+ free_extent_buffer(leaf);
+ }
+ } else {
+ btrfs_mark_buffer_dirty(leaf);
+ }
+ }
+ return ret;
+}
+
+/*
+ * search the tree again to find a leaf with lesser keys
+ * returns 0 if it found something or 1 if there are no lesser leaves.
+ * returns < 0 on io errors.
+ */
+int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
+{
+ struct btrfs_key key;
+ struct btrfs_disk_key found_key;
+ int ret;
+
+ btrfs_item_key_to_cpu(path->nodes[0], &key, 0);
+
+ if (key.offset > 0)
+ key.offset--;
+ else if (key.type > 0)
+ key.type--;
+ else if (key.objectid > 0)
+ key.objectid--;
+ else
+ return 1;
+
+ btrfs_release_path(root, path);
+ ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+ if (ret < 0)
+ return ret;
+ btrfs_item_key(path->nodes[0], &found_key, 0);
+ ret = comp_keys(&found_key, &key);
+ if (ret < 0)
+ return 0;
+ return 1;
+}
+
+/*
+ * A helper function to walk down the tree starting at min_key, and looking
+ * for nodes or leaves that are either in cache or have a minimum
+ * transaction id. This is used by the btree defrag code, but could
+ * also be used to search for blocks that have changed since a given
+ * transaction id.
+ *
+ * This does not cow, but it does stuff the starting key it finds back
+ * into min_key, so you can call btrfs_search_slot with cow=1 on the
+ * key and get a writable path.
+ *
+ * This does lock as it descends, and path->keep_locks should be set
+ * to 1 by the caller.
+ *
+ * This honors path->lowest_level to prevent descent past a given level
+ * of the tree.
+ *
+ * returns zero if something useful was found, < 0 on error and 1 if there
+ * was nothing in the tree that matched the search criteria.
+ */
+int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key,
+ struct btrfs_key *max_key,
+ struct btrfs_path *path, int cache_only,
+ u64 min_trans)
+{
+ struct extent_buffer *cur;
+ struct btrfs_key found_key;
+ int slot;
+ int sret;
+ u32 nritems;
+ int level;
+ int ret = 1;
+
+again:
+ cur = btrfs_lock_root_node(root);
+ level = btrfs_header_level(cur);
+ WARN_ON(path->nodes[level]);
+ path->nodes[level] = cur;
+ path->locks[level] = 1;
+
+ if (btrfs_header_generation(cur) < min_trans) {
+ ret = 1;
+ goto out;
+ }
+ while(1) {
+ nritems = btrfs_header_nritems(cur);
+ level = btrfs_header_level(cur);
+ sret = bin_search(cur, min_key, level, &slot);
+
+ /* at level = 0, we're done, setup the path and exit */
+ if (level == 0) {
+ if (slot >= nritems)
+ goto find_next_key;
+ ret = 0;
+ path->slots[level] = slot;
+ btrfs_item_key_to_cpu(cur, &found_key, slot);
+ goto out;
+ }
+ if (sret && slot > 0)
+ slot--;
+ /*
+ * check this node pointer against the cache_only and
+ * min_trans parameters. If it isn't in cache or is too
+ * old, skip to the next one.
+ */
+ while(slot < nritems) {
+ u64 blockptr;
+ u64 gen;
+ struct extent_buffer *tmp;
+ struct btrfs_disk_key disk_key;
+
+ blockptr = btrfs_node_blockptr(cur, slot);
+ gen = btrfs_node_ptr_generation(cur, slot);
+ if (gen < min_trans) {
+ slot++;
+ continue;
+ }
+ if (!cache_only)
+ break;
+
+ if (max_key) {
+ btrfs_node_key(cur, &disk_key, slot);
+ if (comp_keys(&disk_key, max_key) >= 0) {
+ ret = 1;
+ goto out;
+ }
+ }
+
+ tmp = btrfs_find_tree_block(root, blockptr,
+ btrfs_level_size(root, level - 1));
+
+ if (tmp && btrfs_buffer_uptodate(tmp, gen)) {
+ free_extent_buffer(tmp);
+ break;
+ }
+ if (tmp)
+ free_extent_buffer(tmp);
+ slot++;
+ }
+find_next_key:
+ /*
+ * we didn't find a candidate key in this node, walk forward
+ * and find another one
+ */
+ if (slot >= nritems) {
+ path->slots[level] = slot;
+ sret = btrfs_find_next_key(root, path, min_key, level,
+ cache_only, min_trans);
+ if (sret == 0) {
+ btrfs_release_path(root, path);
+ goto again;
+ } else {
+ goto out;
+ }
+ }
+ /* save our key for returning back */
+ btrfs_node_key_to_cpu(cur, &found_key, slot);
+ path->slots[level] = slot;
+ if (level == path->lowest_level) {
+ ret = 0;
+ unlock_up(path, level, 1);
+ goto out;
+ }
+ cur = read_node_slot(root, cur, slot);
+
+ btrfs_tree_lock(cur);
+ path->locks[level - 1] = 1;
+ path->nodes[level - 1] = cur;
+ unlock_up(path, level, 1);
+ }
+out:
+ if (ret == 0)
+ memcpy(min_key, &found_key, sizeof(found_key));
+ return ret;
+}
+
+/*
+ * this is similar to btrfs_next_leaf, but does not try to preserve
+ * and fixup the path. It looks for and returns the next key in the
+ * tree based on the current path and the cache_only and min_trans
+ * parameters.
+ *
+ * 0 is returned if another key is found, < 0 if there are any errors
+ * and 1 is returned if there are no higher keys in the tree
+ *
+ * path->keep_locks should be set to 1 on the search made before
+ * calling this function.
+ */
+int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path,
+ struct btrfs_key *key, int lowest_level,
+ int cache_only, u64 min_trans)
+{
+ int level = lowest_level;
+ int slot;
+ struct extent_buffer *c;
+
+ while(level < BTRFS_MAX_LEVEL) {
+ if (!path->nodes[level])
+ return 1;
+
+ slot = path->slots[level] + 1;
+ c = path->nodes[level];
+next:
+ if (slot >= btrfs_header_nritems(c)) {
+ level++;
+ if (level == BTRFS_MAX_LEVEL) {
+ return 1;
+ }
+ continue;
+ }
+ if (level == 0)
+ btrfs_item_key_to_cpu(c, key, slot);
+ else {
+ u64 blockptr = btrfs_node_blockptr(c, slot);
+ u64 gen = btrfs_node_ptr_generation(c, slot);
+
+ if (cache_only) {
+ struct extent_buffer *cur;
+ cur = btrfs_find_tree_block(root, blockptr,
+ btrfs_level_size(root, level - 1));
+ if (!cur || !btrfs_buffer_uptodate(cur, gen)) {
+ slot++;
+ if (cur)
+ free_extent_buffer(cur);
+ goto next;
+ }
+ free_extent_buffer(cur);
+ }
+ if (gen < min_trans) {
+ slot++;
+ goto next;
+ }
+ btrfs_node_key_to_cpu(c, key, slot);
+ }
+ return 0;
+ }
+ return 1;
+}
+
+/*
+ * search the tree again to find a leaf with greater keys
+ * returns 0 if it found something or 1 if there are no greater leaves.
+ * returns < 0 on io errors.
+ */
+int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
+{
+ int slot;
+ int level = 1;
+ struct extent_buffer *c;
+ struct extent_buffer *next = NULL;
+ struct btrfs_key key;
+ u32 nritems;
+ int ret;
+
+ nritems = btrfs_header_nritems(path->nodes[0]);
+ if (nritems == 0) {
+ return 1;
+ }
+
+ btrfs_item_key_to_cpu(path->nodes[0], &key, nritems - 1);
+
+ btrfs_release_path(root, path);
+ path->keep_locks = 1;
+ ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+ path->keep_locks = 0;
+
+ if (ret < 0)
+ return ret;
+
+ nritems = btrfs_header_nritems(path->nodes[0]);
+ /*
+ * by releasing the path above we dropped all our locks. A balance
+ * could have added more items next to the key that used to be
+ * at the very end of the block. So, check again here and
+ * advance the path if there are now more items available.
+ */
+ if (nritems > 0 && path->slots[0] < nritems - 1) {
+ path->slots[0]++;
+ goto done;
+ }
+
+ while(level < BTRFS_MAX_LEVEL) {
+ if (!path->nodes[level])
+ return 1;
+
+ slot = path->slots[level] + 1;
+ c = path->nodes[level];
+ if (slot >= btrfs_header_nritems(c)) {
+ level++;
+ if (level == BTRFS_MAX_LEVEL) {
+ return 1;
+ }
+ continue;
+ }
+
+ if (next) {
+ btrfs_tree_unlock(next);
+ free_extent_buffer(next);
+ }
+
+ if (level == 1 && (path->locks[1] || path->skip_locking) &&
+ path->reada)
+ reada_for_search(root, path, level, slot, 0);
+
+ next = read_node_slot(root, c, slot);
+ if (!path->skip_locking) {
+ WARN_ON(!btrfs_tree_locked(c));
+ btrfs_tree_lock(next);
+ }
+ break;
+ }
+ path->slots[level] = slot;
+ while(1) {
+ level--;
+ c = path->nodes[level];
+ if (path->locks[level])
+ btrfs_tree_unlock(c);
+ free_extent_buffer(c);
+ path->nodes[level] = next;
+ path->slots[level] = 0;
+ if (!path->skip_locking)
+ path->locks[level] = 1;
+ if (!level)
+ break;
+ if (level == 1 && path->locks[1] && path->reada)
+ reada_for_search(root, path, level, slot, 0);
+ next = read_node_slot(root, next, 0);
+ if (!path->skip_locking) {
+ WARN_ON(!btrfs_tree_locked(path->nodes[level]));
+ btrfs_tree_lock(next);
+ }
+ }
+done:
+ unlock_up(path, 0, 1);
+ return 0;
+}
+
+/*
+ * this uses btrfs_prev_leaf to walk backwards in the tree, and keeps
+ * searching until it gets past min_objectid or finds an item of 'type'
+ *
+ * returns 0 if something is found, 1 if nothing was found and < 0 on error
+ */
+int btrfs_previous_item(struct btrfs_root *root,
+ struct btrfs_path *path, u64 min_objectid,
+ int type)
+{
+ struct btrfs_key found_key;
+ struct extent_buffer *leaf;
+ u32 nritems;
+ int ret;
+
+ while(1) {
+ if (path->slots[0] == 0) {
+ ret = btrfs_prev_leaf(root, path);
+ if (ret != 0)
+ return ret;
+ } else {
+ path->slots[0]--;
+ }
+ leaf = path->nodes[0];
+ nritems = btrfs_header_nritems(leaf);
+ if (nritems == 0)
+ return 1;
+ if (path->slots[0] == nritems)
+ path->slots[0]--;
+
+ btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
+ if (found_key.type == type)
+ return 0;
+ if (found_key.objectid < min_objectid)
+ break;
+ if (found_key.objectid == min_objectid &&
+ found_key.type < type)
+ break;
+ }
+ return 1;
+}
--- /dev/null
+/*
+ * Copyright (C) 2007 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#ifndef __BTRFS_CTREE__
+#define __BTRFS_CTREE__
+
+#include <linux/version.h>
+#include <linux/mm.h>
+#include <linux/highmem.h>
+#include <linux/fs.h>
+#include <linux/completion.h>
+#include <linux/backing-dev.h>
+#include <linux/wait.h>
+#include <asm/kmap_types.h>
+#include "bit-radix.h"
+#include "extent_io.h"
+#include "extent_map.h"
+#include "async-thread.h"
+
+struct btrfs_trans_handle;
+struct btrfs_transaction;
+extern struct kmem_cache *btrfs_trans_handle_cachep;
+extern struct kmem_cache *btrfs_transaction_cachep;
+extern struct kmem_cache *btrfs_bit_radix_cachep;
+extern struct kmem_cache *btrfs_path_cachep;
+struct btrfs_ordered_sum;
+
+#define BTRFS_MAGIC "_B9RfS_M"
+
+#define BTRFS_ACL_NOT_CACHED ((void *)-1)
+
+#ifdef CONFIG_LOCKDEP
+# define BTRFS_MAX_LEVEL 7
+#else
+# define BTRFS_MAX_LEVEL 8
+#endif
+
+/* holds pointers to all of the tree roots */
+#define BTRFS_ROOT_TREE_OBJECTID 1ULL
+
+/* stores information about which extents are in use, and reference counts */
+#define BTRFS_EXTENT_TREE_OBJECTID 2ULL
+
+/*
+ * chunk tree stores translations from logical -> physical block numbering
+ * the super block points to the chunk tree
+ */
+#define BTRFS_CHUNK_TREE_OBJECTID 3ULL
+
+/*
+ * stores information about which areas of a given device are in use.
+ * one per device. The tree of tree roots points to the device tree
+ */
+#define BTRFS_DEV_TREE_OBJECTID 4ULL
+
+/* one per subvolume, storing files and directories */
+#define BTRFS_FS_TREE_OBJECTID 5ULL
+
+/* directory objectid inside the root tree */
+#define BTRFS_ROOT_TREE_DIR_OBJECTID 6ULL
+
+/* orhpan objectid for tracking unlinked/truncated files */
+#define BTRFS_ORPHAN_OBJECTID -5ULL
+
+/* does write ahead logging to speed up fsyncs */
+#define BTRFS_TREE_LOG_OBJECTID -6ULL
+#define BTRFS_TREE_LOG_FIXUP_OBJECTID -7ULL
+
+/* dummy objectid represents multiple objectids */
+#define BTRFS_MULTIPLE_OBJECTIDS -255ULL
+
+/*
+ * All files have objectids in this range.
+ */
+#define BTRFS_FIRST_FREE_OBJECTID 256ULL
+#define BTRFS_LAST_FREE_OBJECTID -256ULL
+#define BTRFS_FIRST_CHUNK_TREE_OBJECTID 256ULL
+
+
+/*
+ * the device items go into the chunk tree. The key is in the form
+ * [ 1 BTRFS_DEV_ITEM_KEY device_id ]
+ */
+#define BTRFS_DEV_ITEMS_OBJECTID 1ULL
+
+/*
+ * we can actually store much bigger names, but lets not confuse the rest
+ * of linux
+ */
+#define BTRFS_NAME_LEN 255
+
+/* 32 bytes in various csum fields */
+#define BTRFS_CSUM_SIZE 32
+/* four bytes for CRC32 */
+#define BTRFS_CRC32_SIZE 4
+#define BTRFS_EMPTY_DIR_SIZE 0
+
+#define BTRFS_FT_UNKNOWN 0
+#define BTRFS_FT_REG_FILE 1
+#define BTRFS_FT_DIR 2
+#define BTRFS_FT_CHRDEV 3
+#define BTRFS_FT_BLKDEV 4
+#define BTRFS_FT_FIFO 5
+#define BTRFS_FT_SOCK 6
+#define BTRFS_FT_SYMLINK 7
+#define BTRFS_FT_XATTR 8
+#define BTRFS_FT_MAX 9
+
+/*
+ * the key defines the order in the tree, and so it also defines (optimal)
+ * block layout. objectid corresonds to the inode number. The flags
+ * tells us things about the object, and is a kind of stream selector.
+ * so for a given inode, keys with flags of 1 might refer to the inode
+ * data, flags of 2 may point to file data in the btree and flags == 3
+ * may point to extents.
+ *
+ * offset is the starting byte offset for this key in the stream.
+ *
+ * btrfs_disk_key is in disk byte order. struct btrfs_key is always
+ * in cpu native order. Otherwise they are identical and their sizes
+ * should be the same (ie both packed)
+ */
+struct btrfs_disk_key {
+ __le64 objectid;
+ u8 type;
+ __le64 offset;
+} __attribute__ ((__packed__));
+
+struct btrfs_key {
+ u64 objectid;
+ u8 type;
+ u64 offset;
+} __attribute__ ((__packed__));
+
+struct btrfs_mapping_tree {
+ struct extent_map_tree map_tree;
+};
+
+#define BTRFS_UUID_SIZE 16
+struct btrfs_dev_item {
+ /* the internal btrfs device id */
+ __le64 devid;
+
+ /* size of the device */
+ __le64 total_bytes;
+
+ /* bytes used */
+ __le64 bytes_used;
+
+ /* optimal io alignment for this device */
+ __le32 io_align;
+
+ /* optimal io width for this device */
+ __le32 io_width;
+
+ /* minimal io size for this device */
+ __le32 sector_size;
+
+ /* type and info about this device */
+ __le64 type;
+
+ /* grouping information for allocation decisions */
+ __le32 dev_group;
+
+ /* seek speed 0-100 where 100 is fastest */
+ u8 seek_speed;
+
+ /* bandwidth 0-100 where 100 is fastest */
+ u8 bandwidth;
+
+ /* btrfs generated uuid for this device */
+ u8 uuid[BTRFS_UUID_SIZE];
+} __attribute__ ((__packed__));
+
+struct btrfs_stripe {
+ __le64 devid;
+ __le64 offset;
+ u8 dev_uuid[BTRFS_UUID_SIZE];
+} __attribute__ ((__packed__));
+
+struct btrfs_chunk {
+ /* size of this chunk in bytes */
+ __le64 length;
+
+ /* objectid of the root referencing this chunk */
+ __le64 owner;
+
+ __le64 stripe_len;
+ __le64 type;
+
+ /* optimal io alignment for this chunk */
+ __le32 io_align;
+
+ /* optimal io width for this chunk */
+ __le32 io_width;
+
+ /* minimal io size for this chunk */
+ __le32 sector_size;
+
+ /* 2^16 stripes is quite a lot, a second limit is the size of a single
+ * item in the btree
+ */
+ __le16 num_stripes;
+
+ /* sub stripes only matter for raid10 */
+ __le16 sub_stripes;
+ struct btrfs_stripe stripe;
+ /* additional stripes go here */
+} __attribute__ ((__packed__));
+
+static inline unsigned long btrfs_chunk_item_size(int num_stripes)
+{
+ BUG_ON(num_stripes == 0);
+ return sizeof(struct btrfs_chunk) +
+ sizeof(struct btrfs_stripe) * (num_stripes - 1);
+}
+
+#define BTRFS_FSID_SIZE 16
+#define BTRFS_HEADER_FLAG_WRITTEN (1 << 0)
+
+/*
+ * every tree block (leaf or node) starts with this header.
+ */
+struct btrfs_header {
+ /* these first four must match the super block */
+ u8 csum[BTRFS_CSUM_SIZE];
+ u8 fsid[BTRFS_FSID_SIZE]; /* FS specific uuid */
+ __le64 bytenr; /* which block this node is supposed to live in */
+ __le64 flags;
+
+ /* allowed to be different from the super from here on down */
+ u8 chunk_tree_uuid[BTRFS_UUID_SIZE];
+ __le64 generation;
+ __le64 owner;
+ __le32 nritems;
+ u8 level;
+} __attribute__ ((__packed__));
+
+#define BTRFS_NODEPTRS_PER_BLOCK(r) (((r)->nodesize - \
+ sizeof(struct btrfs_header)) / \
+ sizeof(struct btrfs_key_ptr))
+#define __BTRFS_LEAF_DATA_SIZE(bs) ((bs) - sizeof(struct btrfs_header))
+#define BTRFS_LEAF_DATA_SIZE(r) (__BTRFS_LEAF_DATA_SIZE(r->leafsize))
+#define BTRFS_MAX_INLINE_DATA_SIZE(r) (BTRFS_LEAF_DATA_SIZE(r) - \
+ sizeof(struct btrfs_item) - \
+ sizeof(struct btrfs_file_extent_item))
+
+
+/*
+ * this is a very generous portion of the super block, giving us
+ * room to translate 14 chunks with 3 stripes each.
+ */
+#define BTRFS_SYSTEM_CHUNK_ARRAY_SIZE 2048
+#define BTRFS_LABEL_SIZE 256
+
+/*
+ * the super block basically lists the main trees of the FS
+ * it currently lacks any block count etc etc
+ */
+struct btrfs_super_block {
+ u8 csum[BTRFS_CSUM_SIZE];
+ /* the first 4 fields must match struct btrfs_header */
+ u8 fsid[16]; /* FS specific uuid */
+ __le64 bytenr; /* this block number */
+ __le64 flags;
+
+ /* allowed to be different from the btrfs_header from here own down */
+ __le64 magic;
+ __le64 generation;
+ __le64 root;
+ __le64 chunk_root;
+ __le64 log_root;
+ __le64 total_bytes;
+ __le64 bytes_used;
+ __le64 root_dir_objectid;
+ __le64 num_devices;
+ __le32 sectorsize;
+ __le32 nodesize;
+ __le32 leafsize;
+ __le32 stripesize;
+ __le32 sys_chunk_array_size;
+ u8 root_level;
+ u8 chunk_root_level;
+ u8 log_root_level;
+ struct btrfs_dev_item dev_item;
+ char label[BTRFS_LABEL_SIZE];
+ u8 sys_chunk_array[BTRFS_SYSTEM_CHUNK_ARRAY_SIZE];
+} __attribute__ ((__packed__));
+
+/*
+ * A leaf is full of items. offset and size tell us where to find
+ * the item in the leaf (relative to the start of the data area)
+ */
+struct btrfs_item {
+ struct btrfs_disk_key key;
+ __le32 offset;
+ __le32 size;
+} __attribute__ ((__packed__));
+
+/*
+ * leaves have an item area and a data area:
+ * [item0, item1....itemN] [free space] [dataN...data1, data0]
+ *
+ * The data is separate from the items to get the keys closer together
+ * during searches.
+ */
+struct btrfs_leaf {
+ struct btrfs_header header;
+ struct btrfs_item items[];
+} __attribute__ ((__packed__));
+
+/*
+ * all non-leaf blocks are nodes, they hold only keys and pointers to
+ * other blocks
+ */
+struct btrfs_key_ptr {
+ struct btrfs_disk_key key;
+ __le64 blockptr;
+ __le64 generation;
+} __attribute__ ((__packed__));
+
+struct btrfs_node {
+ struct btrfs_header header;
+ struct btrfs_key_ptr ptrs[];
+} __attribute__ ((__packed__));
+
+/*
+ * btrfs_paths remember the path taken from the root down to the leaf.
+ * level 0 is always the leaf, and nodes[1...BTRFS_MAX_LEVEL] will point
+ * to any other levels that are present.
+ *
+ * The slots array records the index of the item or block pointer
+ * used while walking the tree.
+ */
+struct btrfs_path {
+ struct extent_buffer *nodes[BTRFS_MAX_LEVEL];
+ int slots[BTRFS_MAX_LEVEL];
+ /* if there is real range locking, this locks field will change */
+ int locks[BTRFS_MAX_LEVEL];
+ int reada;
+ /* keep some upper locks as we walk down */
+ int keep_locks;
+ int skip_locking;
+ int lowest_level;
+};
+
+/*
+ * items in the extent btree are used to record the objectid of the
+ * owner of the block and the number of references
+ */
+struct btrfs_extent_item {
+ __le32 refs;
+} __attribute__ ((__packed__));
+
+struct btrfs_extent_ref {
+ __le64 root;
+ __le64 generation;
+ __le64 objectid;
+ __le64 offset;
+ __le32 num_refs;
+} __attribute__ ((__packed__));
+
+/* dev extents record free space on individual devices. The owner
+ * field points back to the chunk allocation mapping tree that allocated
+ * the extent. The chunk tree uuid field is a way to double check the owner
+ */
+struct btrfs_dev_extent {
+ __le64 chunk_tree;
+ __le64 chunk_objectid;
+ __le64 chunk_offset;
+ __le64 length;
+ u8 chunk_tree_uuid[BTRFS_UUID_SIZE];
+} __attribute__ ((__packed__));
+
+struct btrfs_inode_ref {
+ __le64 index;
+ __le16 name_len;
+ /* name goes here */
+} __attribute__ ((__packed__));
+
+struct btrfs_timespec {
+ __le64 sec;
+ __le32 nsec;
+} __attribute__ ((__packed__));
+
+/*
+ * there is no padding here on purpose. If you want to extent the inode,
+ * make a new item type
+ */
+struct btrfs_inode_item {
+ /* nfs style generation number */
+ __le64 generation;
+ /* transid that last touched this inode */
+ __le64 transid;
+ __le64 size;
+ __le64 nblocks;
+ __le64 block_group;
+ __le32 nlink;
+ __le32 uid;
+ __le32 gid;
+ __le32 mode;
+ __le64 rdev;
+ __le16 flags;
+ __le16 compat_flags;
+ struct btrfs_timespec atime;
+ struct btrfs_timespec ctime;
+ struct btrfs_timespec mtime;
+ struct btrfs_timespec otime;
+} __attribute__ ((__packed__));
+
+struct btrfs_dir_log_item {
+ __le64 end;
+} __attribute__ ((__packed__));
+
+struct btrfs_dir_item {
+ struct btrfs_disk_key location;
+ __le64 transid;
+ __le16 data_len;
+ __le16 name_len;
+ u8 type;
+} __attribute__ ((__packed__));
+
+struct btrfs_root_item {
+ struct btrfs_inode_item inode;
+ __le64 root_dirid;
+ __le64 bytenr;
+ __le64 byte_limit;
+ __le64 bytes_used;
+ __le32 flags;
+ __le32 refs;
+ struct btrfs_disk_key drop_progress;
+ u8 drop_level;
+ u8 level;
+} __attribute__ ((__packed__));
+
+#define BTRFS_FILE_EXTENT_REG 0
+#define BTRFS_FILE_EXTENT_INLINE 1
+
+struct btrfs_file_extent_item {
+ __le64 generation;
+ u8 type;
+ /*
+ * disk space consumed by the extent, checksum blocks are included
+ * in these numbers
+ */
+ __le64 disk_bytenr;
+ __le64 disk_num_bytes;
+ /*
+ * the logical offset in file blocks (no csums)
+ * this extent record is for. This allows a file extent to point
+ * into the middle of an existing extent on disk, sharing it
+ * between two snapshots (useful if some bytes in the middle of the
+ * extent have changed
+ */
+ __le64 offset;
+ /*
+ * the logical number of file blocks (no csums included)
+ */
+ __le64 num_bytes;
+} __attribute__ ((__packed__));
+
+struct btrfs_csum_item {
+ u8 csum;
+} __attribute__ ((__packed__));
+
+/* different types of block groups (and chunks) */
+#define BTRFS_BLOCK_GROUP_DATA (1 << 0)
+#define BTRFS_BLOCK_GROUP_SYSTEM (1 << 1)
+#define BTRFS_BLOCK_GROUP_METADATA (1 << 2)
+#define BTRFS_BLOCK_GROUP_RAID0 (1 << 3)
+#define BTRFS_BLOCK_GROUP_RAID1 (1 << 4)
+#define BTRFS_BLOCK_GROUP_DUP (1 << 5)
+#define BTRFS_BLOCK_GROUP_RAID10 (1 << 6)
+
+struct btrfs_block_group_item {
+ __le64 used;
+ __le64 chunk_objectid;
+ __le64 flags;
+} __attribute__ ((__packed__));
+
+struct btrfs_space_info {
+ u64 flags;
+ u64 total_bytes;
+ u64 bytes_used;
+ u64 bytes_pinned;
+ int full;
+ int force_alloc;
+ struct list_head list;
+
+ /* for block groups in our same type */
+ struct list_head block_groups;
+ spinlock_t lock;
+};
+
+struct btrfs_free_space {
+ struct rb_node bytes_index;
+ struct rb_node offset_index;
+ u64 offset;
+ u64 bytes;
+};
+
+struct btrfs_block_group_cache {
+ struct btrfs_key key;
+ struct btrfs_block_group_item item;
+ spinlock_t lock;
+ u64 pinned;
+ u64 flags;
+ int cached;
+ int ro;
+ int dirty;
+
+ struct btrfs_space_info *space_info;
+
+ /* free space cache stuff */
+ struct rb_root free_space_bytes;
+ struct rb_root free_space_offset;
+
+ /* block group cache stuff */
+ struct rb_node cache_node;
+
+ /* for block groups in the same raid type */
+ struct list_head list;
+};
+
+struct btrfs_device;
+struct btrfs_fs_devices;
+struct btrfs_fs_info {
+ u8 fsid[BTRFS_FSID_SIZE];
+ u8 chunk_tree_uuid[BTRFS_UUID_SIZE];
+ struct btrfs_root *extent_root;
+ struct btrfs_root *tree_root;
+ struct btrfs_root *chunk_root;
+ struct btrfs_root *dev_root;
+
+ /* the log root tree is a directory of all the other log roots */
+ struct btrfs_root *log_root_tree;
+ struct radix_tree_root fs_roots_radix;
+
+ /* block group cache stuff */
+ spinlock_t block_group_cache_lock;
+ struct rb_root block_group_cache_tree;
+
+ struct extent_io_tree pinned_extents;
+ struct extent_io_tree pending_del;
+ struct extent_io_tree extent_ins;
+
+ /* logical->physical extent mapping */
+ struct btrfs_mapping_tree mapping_tree;
+
+ u64 generation;
+ u64 last_trans_committed;
+ u64 last_trans_new_blockgroup;
+ u64 open_ioctl_trans;
+ unsigned long mount_opt;
+ u64 max_extent;
+ u64 max_inline;
+ u64 alloc_start;
+ struct btrfs_transaction *running_transaction;
+ wait_queue_head_t transaction_throttle;
+ wait_queue_head_t transaction_wait;
+ wait_queue_head_t async_submit_wait;
+
+ wait_queue_head_t tree_log_wait;
+
+ struct btrfs_super_block super_copy;
+ struct btrfs_super_block super_for_commit;
+ struct block_device *__bdev;
+ struct super_block *sb;
+ struct inode *btree_inode;
+ struct backing_dev_info bdi;
+ spinlock_t hash_lock;
+ struct mutex trans_mutex;
+ struct mutex tree_log_mutex;
+ struct mutex transaction_kthread_mutex;
+ struct mutex cleaner_mutex;
+ struct mutex alloc_mutex;
+ struct mutex chunk_mutex;
+ struct mutex drop_mutex;
+ struct mutex volume_mutex;
+ struct list_head trans_list;
+ struct list_head hashers;
+ struct list_head dead_roots;
+
+ atomic_t nr_async_submits;
+ atomic_t nr_async_bios;
+ atomic_t tree_log_writers;
+ atomic_t tree_log_commit;
+ unsigned long tree_log_batch;
+ u64 tree_log_transid;
+
+ /*
+ * this is used by the balancing code to wait for all the pending
+ * ordered extents
+ */
+ spinlock_t ordered_extent_lock;
+ struct list_head ordered_extents;
+ struct list_head delalloc_inodes;
+
+ /*
+ * there is a pool of worker threads for checksumming during writes
+ * and a pool for checksumming after reads. This is because readers
+ * can run with FS locks held, and the writers may be waiting for
+ * those locks. We don't want ordering in the pending list to cause
+ * deadlocks, and so the two are serviced separately.
+ *
+ * A third pool does submit_bio to avoid deadlocking with the other
+ * two
+ */
+ struct btrfs_workers workers;
+ struct btrfs_workers endio_workers;
+ struct btrfs_workers endio_write_workers;
+ struct btrfs_workers submit_workers;
+ /*
+ * fixup workers take dirty pages that didn't properly go through
+ * the cow mechanism and make them safe to write. It happens
+ * for the sys_munmap function call path
+ */
+ struct btrfs_workers fixup_workers;
+ struct task_struct *transaction_kthread;
+ struct task_struct *cleaner_kthread;
+ int thread_pool_size;
+
+ struct kobject super_kobj;
+ struct completion kobj_unregister;
+ int do_barriers;
+ int closing;
+ int log_root_recovering;
+ atomic_t throttles;
+ atomic_t throttle_gen;
+
+ u64 total_pinned;
+ struct list_head dirty_cowonly_roots;
+
+ struct btrfs_fs_devices *fs_devices;
+ struct list_head space_info;
+ spinlock_t delalloc_lock;
+ spinlock_t new_trans_lock;
+ u64 delalloc_bytes;
+ u64 last_alloc;
+ u64 last_data_alloc;
+ u64 last_log_alloc;
+
+ spinlock_t ref_cache_lock;
+ u64 total_ref_cache_size;
+
+ u64 avail_data_alloc_bits;
+ u64 avail_metadata_alloc_bits;
+ u64 avail_system_alloc_bits;
+ u64 data_alloc_profile;
+ u64 metadata_alloc_profile;
+ u64 system_alloc_profile;
+
+ void *bdev_holder;
+};
+
+struct btrfs_leaf_ref_tree {
+ struct rb_root root;
+ struct btrfs_leaf_ref *last;
+ struct list_head list;
+ spinlock_t lock;
+};
+
+/*
+ * in ram representation of the tree. extent_root is used for all allocations
+ * and for the extent tree extent_root root.
+ */
+struct btrfs_dirty_root;
+struct btrfs_root {
+ struct extent_buffer *node;
+
+ /* the node lock is held while changing the node pointer */
+ spinlock_t node_lock;
+
+ struct extent_buffer *commit_root;
+ struct btrfs_leaf_ref_tree *ref_tree;
+ struct btrfs_leaf_ref_tree ref_tree_struct;
+ struct btrfs_dirty_root *dirty_root;
+ struct btrfs_root *log_root;
+
+ struct btrfs_root_item root_item;
+ struct btrfs_key root_key;
+ struct btrfs_fs_info *fs_info;
+ struct inode *inode;
+ struct extent_io_tree dirty_log_pages;
+
+ struct kobject root_kobj;
+ struct completion kobj_unregister;
+ struct mutex objectid_mutex;
+ struct mutex log_mutex;
+
+ u64 objectid;
+ u64 last_trans;
+
+ /* data allocations are done in sectorsize units */
+ u32 sectorsize;
+
+ /* node allocations are done in nodesize units */
+ u32 nodesize;
+
+ /* leaf allocations are done in leafsize units */
+ u32 leafsize;
+
+ u32 stripesize;
+
+ u32 type;
+ u64 highest_inode;
+ u64 last_inode_alloc;
+ int ref_cows;
+ int track_dirty;
+ u64 defrag_trans_start;
+ struct btrfs_key defrag_progress;
+ struct btrfs_key defrag_max;
+ int defrag_running;
+ int defrag_level;
+ char *name;
+ int in_sysfs;
+
+ /* the dirty list is only used by non-reference counted roots */
+ struct list_head dirty_list;
+
+ spinlock_t list_lock;
+ struct list_head dead_list;
+ struct list_head orphan_list;
+};
+
+/*
+
+ * inode items have the data typically returned from stat and store other
+ * info about object characteristics. There is one for every file and dir in
+ * the FS
+ */
+#define BTRFS_INODE_ITEM_KEY 1
+#define BTRFS_INODE_REF_KEY 2
+#define BTRFS_XATTR_ITEM_KEY 8
+#define BTRFS_ORPHAN_ITEM_KEY 9
+/* reserve 2-15 close to the inode for later flexibility */
+
+/*
+ * dir items are the name -> inode pointers in a directory. There is one
+ * for every name in a directory.
+ */
+#define BTRFS_DIR_LOG_ITEM_KEY 14
+#define BTRFS_DIR_LOG_INDEX_KEY 15
+#define BTRFS_DIR_ITEM_KEY 16
+#define BTRFS_DIR_INDEX_KEY 17
+/*
+ * extent data is for file data
+ */
+#define BTRFS_EXTENT_DATA_KEY 18
+/*
+ * csum items have the checksums for data in the extents
+ */
+#define BTRFS_CSUM_ITEM_KEY 19
+
+
+/* reserve 21-31 for other file/dir stuff */
+
+/*
+ * root items point to tree roots. There are typically in the root
+ * tree used by the super block to find all the other trees
+ */
+#define BTRFS_ROOT_ITEM_KEY 32
+/*
+ * extent items are in the extent map tree. These record which blocks
+ * are used, and how many references there are to each block
+ */
+#define BTRFS_EXTENT_ITEM_KEY 33
+#define BTRFS_EXTENT_REF_KEY 34
+
+/*
+ * block groups give us hints into the extent allocation trees. Which
+ * blocks are free etc etc
+ */
+#define BTRFS_BLOCK_GROUP_ITEM_KEY 50
+
+#define BTRFS_DEV_EXTENT_KEY 75
+#define BTRFS_DEV_ITEM_KEY 76
+#define BTRFS_CHUNK_ITEM_KEY 77
+
+/*
+ * string items are for debugging. They just store a short string of
+ * data in the FS
+ */
+#define BTRFS_STRING_ITEM_KEY 253
+
+#define BTRFS_MOUNT_NODATASUM (1 << 0)
+#define BTRFS_MOUNT_NODATACOW (1 << 1)
+#define BTRFS_MOUNT_NOBARRIER (1 << 2)
+#define BTRFS_MOUNT_SSD (1 << 3)
+#define BTRFS_MOUNT_DEGRADED (1 << 4)
+
+#define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt)
+#define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt)
+#define btrfs_test_opt(root, opt) ((root)->fs_info->mount_opt & \
+ BTRFS_MOUNT_##opt)
+/*
+ * Inode flags
+ */
+#define BTRFS_INODE_NODATASUM (1 << 0)
+#define BTRFS_INODE_NODATACOW (1 << 1)
+#define BTRFS_INODE_READONLY (1 << 2)
+#define btrfs_clear_flag(inode, flag) (BTRFS_I(inode)->flags &= \
+ ~BTRFS_INODE_##flag)
+#define btrfs_set_flag(inode, flag) (BTRFS_I(inode)->flags |= \
+ BTRFS_INODE_##flag)
+#define btrfs_test_flag(inode, flag) (BTRFS_I(inode)->flags & \
+ BTRFS_INODE_##flag)
+/* some macros to generate set/get funcs for the struct fields. This
+ * assumes there is a lefoo_to_cpu for every type, so lets make a simple
+ * one for u8:
+ */
+#define le8_to_cpu(v) (v)
+#define cpu_to_le8(v) (v)
+#define __le8 u8
+
+#define read_eb_member(eb, ptr, type, member, result) ( \
+ read_extent_buffer(eb, (char *)(result), \
+ ((unsigned long)(ptr)) + \
+ offsetof(type, member), \
+ sizeof(((type *)0)->member)))
+
+#define write_eb_member(eb, ptr, type, member, result) ( \
+ write_extent_buffer(eb, (char *)(result), \
+ ((unsigned long)(ptr)) + \
+ offsetof(type, member), \
+ sizeof(((type *)0)->member)))
+
+#ifndef BTRFS_SETGET_FUNCS
+#define BTRFS_SETGET_FUNCS(name, type, member, bits) \
+u##bits btrfs_##name(struct extent_buffer *eb, type *s); \
+void btrfs_set_##name(struct extent_buffer *eb, type *s, u##bits val);
+#endif
+
+#define BTRFS_SETGET_HEADER_FUNCS(name, type, member, bits) \
+static inline u##bits btrfs_##name(struct extent_buffer *eb) \
+{ \
+ type *p = kmap_atomic(eb->first_page, KM_USER0); \
+ u##bits res = le##bits##_to_cpu(p->member); \
+ kunmap_atomic(p, KM_USER0); \
+ return res; \
+} \
+static inline void btrfs_set_##name(struct extent_buffer *eb, \
+ u##bits val) \
+{ \
+ type *p = kmap_atomic(eb->first_page, KM_USER0); \
+ p->member = cpu_to_le##bits(val); \
+ kunmap_atomic(p, KM_USER0); \
+}
+
+#define BTRFS_SETGET_STACK_FUNCS(name, type, member, bits) \
+static inline u##bits btrfs_##name(type *s) \
+{ \
+ return le##bits##_to_cpu(s->member); \
+} \
+static inline void btrfs_set_##name(type *s, u##bits val) \
+{ \
+ s->member = cpu_to_le##bits(val); \
+}
+
+BTRFS_SETGET_FUNCS(device_type, struct btrfs_dev_item, type, 64);
+BTRFS_SETGET_FUNCS(device_total_bytes, struct btrfs_dev_item, total_bytes, 64);
+BTRFS_SETGET_FUNCS(device_bytes_used, struct btrfs_dev_item, bytes_used, 64);
+BTRFS_SETGET_FUNCS(device_io_align, struct btrfs_dev_item, io_align, 32);
+BTRFS_SETGET_FUNCS(device_io_width, struct btrfs_dev_item, io_width, 32);
+BTRFS_SETGET_FUNCS(device_sector_size, struct btrfs_dev_item, sector_size, 32);
+BTRFS_SETGET_FUNCS(device_id, struct btrfs_dev_item, devid, 64);
+BTRFS_SETGET_FUNCS(device_group, struct btrfs_dev_item, dev_group, 32);
+BTRFS_SETGET_FUNCS(device_seek_speed, struct btrfs_dev_item, seek_speed, 8);
+BTRFS_SETGET_FUNCS(device_bandwidth, struct btrfs_dev_item, bandwidth, 8);
+
+BTRFS_SETGET_STACK_FUNCS(stack_device_type, struct btrfs_dev_item, type, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_device_total_bytes, struct btrfs_dev_item,
+ total_bytes, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_device_bytes_used, struct btrfs_dev_item,
+ bytes_used, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_device_io_align, struct btrfs_dev_item,
+ io_align, 32);
+BTRFS_SETGET_STACK_FUNCS(stack_device_io_width, struct btrfs_dev_item,
+ io_width, 32);
+BTRFS_SETGET_STACK_FUNCS(stack_device_sector_size, struct btrfs_dev_item,
+ sector_size, 32);
+BTRFS_SETGET_STACK_FUNCS(stack_device_id, struct btrfs_dev_item, devid, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_device_group, struct btrfs_dev_item,
+ dev_group, 32);
+BTRFS_SETGET_STACK_FUNCS(stack_device_seek_speed, struct btrfs_dev_item,
+ seek_speed, 8);
+BTRFS_SETGET_STACK_FUNCS(stack_device_bandwidth, struct btrfs_dev_item,
+ bandwidth, 8);
+
+static inline char *btrfs_device_uuid(struct btrfs_dev_item *d)
+{
+ return (char *)d + offsetof(struct btrfs_dev_item, uuid);
+}
+
+BTRFS_SETGET_FUNCS(chunk_length, struct btrfs_chunk, length, 64);
+BTRFS_SETGET_FUNCS(chunk_owner, struct btrfs_chunk, owner, 64);
+BTRFS_SETGET_FUNCS(chunk_stripe_len, struct btrfs_chunk, stripe_len, 64);
+BTRFS_SETGET_FUNCS(chunk_io_align, struct btrfs_chunk, io_align, 32);
+BTRFS_SETGET_FUNCS(chunk_io_width, struct btrfs_chunk, io_width, 32);
+BTRFS_SETGET_FUNCS(chunk_sector_size, struct btrfs_chunk, sector_size, 32);
+BTRFS_SETGET_FUNCS(chunk_type, struct btrfs_chunk, type, 64);
+BTRFS_SETGET_FUNCS(chunk_num_stripes, struct btrfs_chunk, num_stripes, 16);
+BTRFS_SETGET_FUNCS(chunk_sub_stripes, struct btrfs_chunk, sub_stripes, 16);
+BTRFS_SETGET_FUNCS(stripe_devid, struct btrfs_stripe, devid, 64);
+BTRFS_SETGET_FUNCS(stripe_offset, struct btrfs_stripe, offset, 64);
+
+static inline char *btrfs_stripe_dev_uuid(struct btrfs_stripe *s)
+{
+ return (char *)s + offsetof(struct btrfs_stripe, dev_uuid);
+}
+
+BTRFS_SETGET_STACK_FUNCS(stack_chunk_length, struct btrfs_chunk, length, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_chunk_owner, struct btrfs_chunk, owner, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_chunk_stripe_len, struct btrfs_chunk,
+ stripe_len, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_chunk_io_align, struct btrfs_chunk,
+ io_align, 32);
+BTRFS_SETGET_STACK_FUNCS(stack_chunk_io_width, struct btrfs_chunk,
+ io_width, 32);
+BTRFS_SETGET_STACK_FUNCS(stack_chunk_sector_size, struct btrfs_chunk,
+ sector_size, 32);
+BTRFS_SETGET_STACK_FUNCS(stack_chunk_type, struct btrfs_chunk, type, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_chunk_num_stripes, struct btrfs_chunk,
+ num_stripes, 16);
+BTRFS_SETGET_STACK_FUNCS(stack_chunk_sub_stripes, struct btrfs_chunk,
+ sub_stripes, 16);
+BTRFS_SETGET_STACK_FUNCS(stack_stripe_devid, struct btrfs_stripe, devid, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_stripe_offset, struct btrfs_stripe, offset, 64);
+
+static inline struct btrfs_stripe *btrfs_stripe_nr(struct btrfs_chunk *c,
+ int nr)
+{
+ unsigned long offset = (unsigned long)c;
+ offset += offsetof(struct btrfs_chunk, stripe);
+ offset += nr * sizeof(struct btrfs_stripe);
+ return (struct btrfs_stripe *)offset;
+}
+
+static inline char *btrfs_stripe_dev_uuid_nr(struct btrfs_chunk *c, int nr)
+{
+ return btrfs_stripe_dev_uuid(btrfs_stripe_nr(c, nr));
+}
+
+static inline u64 btrfs_stripe_offset_nr(struct extent_buffer *eb,
+ struct btrfs_chunk *c, int nr)
+{
+ return btrfs_stripe_offset(eb, btrfs_stripe_nr(c, nr));
+}
+
+static inline void btrfs_set_stripe_offset_nr(struct extent_buffer *eb,
+ struct btrfs_chunk *c, int nr,
+ u64 val)
+{
+ btrfs_set_stripe_offset(eb, btrfs_stripe_nr(c, nr), val);
+}
+
+static inline u64 btrfs_stripe_devid_nr(struct extent_buffer *eb,
+ struct btrfs_chunk *c, int nr)
+{
+ return btrfs_stripe_devid(eb, btrfs_stripe_nr(c, nr));
+}
+
+static inline void btrfs_set_stripe_devid_nr(struct extent_buffer *eb,
+ struct btrfs_chunk *c, int nr,
+ u64 val)
+{
+ btrfs_set_stripe_devid(eb, btrfs_stripe_nr(c, nr), val);
+}
+
+/* struct btrfs_block_group_item */
+BTRFS_SETGET_STACK_FUNCS(block_group_used, struct btrfs_block_group_item,
+ used, 64);
+BTRFS_SETGET_FUNCS(disk_block_group_used, struct btrfs_block_group_item,
+ used, 64);
+BTRFS_SETGET_STACK_FUNCS(block_group_chunk_objectid,
+ struct btrfs_block_group_item, chunk_objectid, 64);
+
+BTRFS_SETGET_FUNCS(disk_block_group_chunk_objectid,
+ struct btrfs_block_group_item, chunk_objectid, 64);
+BTRFS_SETGET_FUNCS(disk_block_group_flags,
+ struct btrfs_block_group_item, flags, 64);
+BTRFS_SETGET_STACK_FUNCS(block_group_flags,
+ struct btrfs_block_group_item, flags, 64);
+
+/* struct btrfs_inode_ref */
+BTRFS_SETGET_FUNCS(inode_ref_name_len, struct btrfs_inode_ref, name_len, 16);
+BTRFS_SETGET_FUNCS(inode_ref_index, struct btrfs_inode_ref, index, 64);
+
+/* struct btrfs_inode_item */
+BTRFS_SETGET_FUNCS(inode_generation, struct btrfs_inode_item, generation, 64);
+BTRFS_SETGET_FUNCS(inode_transid, struct btrfs_inode_item, transid, 64);
+BTRFS_SETGET_FUNCS(inode_size, struct btrfs_inode_item, size, 64);
+BTRFS_SETGET_FUNCS(inode_nblocks, struct btrfs_inode_item, nblocks, 64);
+BTRFS_SETGET_FUNCS(inode_block_group, struct btrfs_inode_item, block_group, 64);
+BTRFS_SETGET_FUNCS(inode_nlink, struct btrfs_inode_item, nlink, 32);
+BTRFS_SETGET_FUNCS(inode_uid, struct btrfs_inode_item, uid, 32);
+BTRFS_SETGET_FUNCS(inode_gid, struct btrfs_inode_item, gid, 32);
+BTRFS_SETGET_FUNCS(inode_mode, struct btrfs_inode_item, mode, 32);
+BTRFS_SETGET_FUNCS(inode_rdev, struct btrfs_inode_item, rdev, 64);
+BTRFS_SETGET_FUNCS(inode_flags, struct btrfs_inode_item, flags, 16);
+BTRFS_SETGET_FUNCS(inode_compat_flags, struct btrfs_inode_item,
+ compat_flags, 16);
+
+static inline struct btrfs_timespec *
+btrfs_inode_atime(struct btrfs_inode_item *inode_item)
+{
+ unsigned long ptr = (unsigned long)inode_item;
+ ptr += offsetof(struct btrfs_inode_item, atime);
+ return (struct btrfs_timespec *)ptr;
+}
+
+static inline struct btrfs_timespec *
+btrfs_inode_mtime(struct btrfs_inode_item *inode_item)
+{
+ unsigned long ptr = (unsigned long)inode_item;
+ ptr += offsetof(struct btrfs_inode_item, mtime);
+ return (struct btrfs_timespec *)ptr;
+}
+
+static inline struct btrfs_timespec *
+btrfs_inode_ctime(struct btrfs_inode_item *inode_item)
+{
+ unsigned long ptr = (unsigned long)inode_item;
+ ptr += offsetof(struct btrfs_inode_item, ctime);
+ return (struct btrfs_timespec *)ptr;
+}
+
+static inline struct btrfs_timespec *
+btrfs_inode_otime(struct btrfs_inode_item *inode_item)
+{
+ unsigned long ptr = (unsigned long)inode_item;
+ ptr += offsetof(struct btrfs_inode_item, otime);
+ return (struct btrfs_timespec *)ptr;
+}
+
+BTRFS_SETGET_FUNCS(timespec_sec, struct btrfs_timespec, sec, 64);
+BTRFS_SETGET_FUNCS(timespec_nsec, struct btrfs_timespec, nsec, 32);
+
+/* struct btrfs_dev_extent */
+BTRFS_SETGET_FUNCS(dev_extent_chunk_tree, struct btrfs_dev_extent,
+ chunk_tree, 64);
+BTRFS_SETGET_FUNCS(dev_extent_chunk_objectid, struct btrfs_dev_extent,
+ chunk_objectid, 64);
+BTRFS_SETGET_FUNCS(dev_extent_chunk_offset, struct btrfs_dev_extent,
+ chunk_offset, 64);
+BTRFS_SETGET_FUNCS(dev_extent_length, struct btrfs_dev_extent, length, 64);
+
+static inline u8 *btrfs_dev_extent_chunk_tree_uuid(struct btrfs_dev_extent *dev)
+{
+ unsigned long ptr = offsetof(struct btrfs_dev_extent, chunk_tree_uuid);
+ return (u8 *)((unsigned long)dev + ptr);
+}
+
+/* struct btrfs_extent_ref */
+BTRFS_SETGET_FUNCS(ref_root, struct btrfs_extent_ref, root, 64);
+BTRFS_SETGET_FUNCS(ref_generation, struct btrfs_extent_ref, generation, 64);
+BTRFS_SETGET_FUNCS(ref_objectid, struct btrfs_extent_ref, objectid, 64);
+BTRFS_SETGET_FUNCS(ref_offset, struct btrfs_extent_ref, offset, 64);
+BTRFS_SETGET_FUNCS(ref_num_refs, struct btrfs_extent_ref, num_refs, 32);
+
+BTRFS_SETGET_STACK_FUNCS(stack_ref_root, struct btrfs_extent_ref, root, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_ref_generation, struct btrfs_extent_ref,
+ generation, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_ref_objectid, struct btrfs_extent_ref,
+ objectid, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_ref_offset, struct btrfs_extent_ref,
+ offset, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_ref_num_refs, struct btrfs_extent_ref,
+ num_refs, 32);
+
+/* struct btrfs_extent_item */
+BTRFS_SETGET_FUNCS(extent_refs, struct btrfs_extent_item, refs, 32);
+BTRFS_SETGET_STACK_FUNCS(stack_extent_refs, struct btrfs_extent_item,
+ refs, 32);
+
+/* struct btrfs_node */
+BTRFS_SETGET_FUNCS(key_blockptr, struct btrfs_key_ptr, blockptr, 64);
+BTRFS_SETGET_FUNCS(key_generation, struct btrfs_key_ptr, generation, 64);
+
+static inline u64 btrfs_node_blockptr(struct extent_buffer *eb, int nr)
+{
+ unsigned long ptr;
+ ptr = offsetof(struct btrfs_node, ptrs) +
+ sizeof(struct btrfs_key_ptr) * nr;
+ return btrfs_key_blockptr(eb, (struct btrfs_key_ptr *)ptr);
+}
+
+static inline void btrfs_set_node_blockptr(struct extent_buffer *eb,
+ int nr, u64 val)
+{
+ unsigned long ptr;
+ ptr = offsetof(struct btrfs_node, ptrs) +
+ sizeof(struct btrfs_key_ptr) * nr;
+ btrfs_set_key_blockptr(eb, (struct btrfs_key_ptr *)ptr, val);
+}
+
+static inline u64 btrfs_node_ptr_generation(struct extent_buffer *eb, int nr)
+{
+ unsigned long ptr;
+ ptr = offsetof(struct btrfs_node, ptrs) +
+ sizeof(struct btrfs_key_ptr) * nr;
+ return btrfs_key_generation(eb, (struct btrfs_key_ptr *)ptr);
+}
+
+static inline void btrfs_set_node_ptr_generation(struct extent_buffer *eb,
+ int nr, u64 val)
+{
+ unsigned long ptr;
+ ptr = offsetof(struct btrfs_node, ptrs) +
+ sizeof(struct btrfs_key_ptr) * nr;
+ btrfs_set_key_generation(eb, (struct btrfs_key_ptr *)ptr, val);
+}
+
+static inline unsigned long btrfs_node_key_ptr_offset(int nr)
+{
+ return offsetof(struct btrfs_node, ptrs) +
+ sizeof(struct btrfs_key_ptr) * nr;
+}
+
+void btrfs_node_key(struct extent_buffer *eb,
+ struct btrfs_disk_key *disk_key, int nr);
+
+static inline void btrfs_set_node_key(struct extent_buffer *eb,
+ struct btrfs_disk_key *disk_key, int nr)
+{
+ unsigned long ptr;
+ ptr = btrfs_node_key_ptr_offset(nr);
+ write_eb_member(eb, (struct btrfs_key_ptr *)ptr,
+ struct btrfs_key_ptr, key, disk_key);
+}
+
+/* struct btrfs_item */
+BTRFS_SETGET_FUNCS(item_offset, struct btrfs_item, offset, 32);
+BTRFS_SETGET_FUNCS(item_size, struct btrfs_item, size, 32);
+
+static inline unsigned long btrfs_item_nr_offset(int nr)
+{
+ return offsetof(struct btrfs_leaf, items) +
+ sizeof(struct btrfs_item) * nr;
+}
+
+static inline struct btrfs_item *btrfs_item_nr(struct extent_buffer *eb,
+ int nr)
+{
+ return (struct btrfs_item *)btrfs_item_nr_offset(nr);
+}
+
+static inline u32 btrfs_item_end(struct extent_buffer *eb,
+ struct btrfs_item *item)
+{
+ return btrfs_item_offset(eb, item) + btrfs_item_size(eb, item);
+}
+
+static inline u32 btrfs_item_end_nr(struct extent_buffer *eb, int nr)
+{
+ return btrfs_item_end(eb, btrfs_item_nr(eb, nr));
+}
+
+static inline u32 btrfs_item_offset_nr(struct extent_buffer *eb, int nr)
+{
+ return btrfs_item_offset(eb, btrfs_item_nr(eb, nr));
+}
+
+static inline u32 btrfs_item_size_nr(struct extent_buffer *eb, int nr)
+{
+ return btrfs_item_size(eb, btrfs_item_nr(eb, nr));
+}
+
+static inline void btrfs_item_key(struct extent_buffer *eb,
+ struct btrfs_disk_key *disk_key, int nr)
+{
+ struct btrfs_item *item = btrfs_item_nr(eb, nr);
+ read_eb_member(eb, item, struct btrfs_item, key, disk_key);
+}
+
+static inline void btrfs_set_item_key(struct extent_buffer *eb,
+ struct btrfs_disk_key *disk_key, int nr)
+{
+ struct btrfs_item *item = btrfs_item_nr(eb, nr);
+ write_eb_member(eb, item, struct btrfs_item, key, disk_key);
+}
+
+BTRFS_SETGET_FUNCS(dir_log_end, struct btrfs_dir_log_item, end, 64);
+
+/* struct btrfs_dir_item */
+BTRFS_SETGET_FUNCS(dir_data_len, struct btrfs_dir_item, data_len, 16);
+BTRFS_SETGET_FUNCS(dir_type, struct btrfs_dir_item, type, 8);
+BTRFS_SETGET_FUNCS(dir_name_len, struct btrfs_dir_item, name_len, 16);
+BTRFS_SETGET_FUNCS(dir_transid, struct btrfs_dir_item, transid, 64);
+
+static inline void btrfs_dir_item_key(struct extent_buffer *eb,
+ struct btrfs_dir_item *item,
+ struct btrfs_disk_key *key)
+{
+ read_eb_member(eb, item, struct btrfs_dir_item, location, key);
+}
+
+static inline void btrfs_set_dir_item_key(struct extent_buffer *eb,
+ struct btrfs_dir_item *item,
+ struct btrfs_disk_key *key)
+{
+ write_eb_member(eb, item, struct btrfs_dir_item, location, key);
+}
+
+/* struct btrfs_disk_key */
+BTRFS_SETGET_STACK_FUNCS(disk_key_objectid, struct btrfs_disk_key,
+ objectid, 64);
+BTRFS_SETGET_STACK_FUNCS(disk_key_offset, struct btrfs_disk_key, offset, 64);
+BTRFS_SETGET_STACK_FUNCS(disk_key_type, struct btrfs_disk_key, type, 8);
+
+static inline void btrfs_disk_key_to_cpu(struct btrfs_key *cpu,
+ struct btrfs_disk_key *disk)
+{
+ cpu->offset = le64_to_cpu(disk->offset);
+ cpu->type = disk->type;
+ cpu->objectid = le64_to_cpu(disk->objectid);
+}
+
+static inline void btrfs_cpu_key_to_disk(struct btrfs_disk_key *disk,
+ struct btrfs_key *cpu)
+{
+ disk->offset = cpu_to_le64(cpu->offset);
+ disk->type = cpu->type;
+ disk->objectid = cpu_to_le64(cpu->objectid);
+}
+
+static inline void btrfs_node_key_to_cpu(struct extent_buffer *eb,
+ struct btrfs_key *key, int nr)
+{
+ struct btrfs_disk_key disk_key;
+ btrfs_node_key(eb, &disk_key, nr);
+ btrfs_disk_key_to_cpu(key, &disk_key);
+}
+
+static inline void btrfs_item_key_to_cpu(struct extent_buffer *eb,
+ struct btrfs_key *key, int nr)
+{
+ struct btrfs_disk_key disk_key;
+ btrfs_item_key(eb, &disk_key, nr);
+ btrfs_disk_key_to_cpu(key, &disk_key);
+}
+
+static inline void btrfs_dir_item_key_to_cpu(struct extent_buffer *eb,
+ struct btrfs_dir_item *item,
+ struct btrfs_key *key)
+{
+ struct btrfs_disk_key disk_key;
+ btrfs_dir_item_key(eb, item, &disk_key);
+ btrfs_disk_key_to_cpu(key, &disk_key);
+}
+
+
+static inline u8 btrfs_key_type(struct btrfs_key *key)
+{
+ return key->type;
+}
+
+static inline void btrfs_set_key_type(struct btrfs_key *key, u8 val)
+{
+ key->type = val;
+}
+
+/* struct btrfs_header */
+BTRFS_SETGET_HEADER_FUNCS(header_bytenr, struct btrfs_header, bytenr, 64);
+BTRFS_SETGET_HEADER_FUNCS(header_generation, struct btrfs_header,
+ generation, 64);
+BTRFS_SETGET_HEADER_FUNCS(header_owner, struct btrfs_header, owner, 64);
+BTRFS_SETGET_HEADER_FUNCS(header_nritems, struct btrfs_header, nritems, 32);
+BTRFS_SETGET_HEADER_FUNCS(header_flags, struct btrfs_header, flags, 64);
+BTRFS_SETGET_HEADER_FUNCS(header_level, struct btrfs_header, level, 8);
+
+static inline int btrfs_header_flag(struct extent_buffer *eb, u64 flag)
+{
+ return (btrfs_header_flags(eb) & flag) == flag;
+}
+
+static inline int btrfs_set_header_flag(struct extent_buffer *eb, u64 flag)
+{
+ u64 flags = btrfs_header_flags(eb);
+ btrfs_set_header_flags(eb, flags | flag);
+ return (flags & flag) == flag;
+}
+
+static inline int btrfs_clear_header_flag(struct extent_buffer *eb, u64 flag)
+{
+ u64 flags = btrfs_header_flags(eb);
+ btrfs_set_header_flags(eb, flags & ~flag);
+ return (flags & flag) == flag;
+}
+
+static inline u8 *btrfs_header_fsid(struct extent_buffer *eb)
+{
+ unsigned long ptr = offsetof(struct btrfs_header, fsid);
+ return (u8 *)ptr;
+}
+
+static inline u8 *btrfs_header_chunk_tree_uuid(struct extent_buffer *eb)
+{
+ unsigned long ptr = offsetof(struct btrfs_header, chunk_tree_uuid);
+ return (u8 *)ptr;
+}
+
+static inline u8 *btrfs_super_fsid(struct extent_buffer *eb)
+{
+ unsigned long ptr = offsetof(struct btrfs_super_block, fsid);
+ return (u8 *)ptr;
+}
+
+static inline u8 *btrfs_header_csum(struct extent_buffer *eb)
+{
+ unsigned long ptr = offsetof(struct btrfs_header, csum);
+ return (u8 *)ptr;
+}
+
+static inline struct btrfs_node *btrfs_buffer_node(struct extent_buffer *eb)
+{
+ return NULL;
+}
+
+static inline struct btrfs_leaf *btrfs_buffer_leaf(struct extent_buffer *eb)
+{
+ return NULL;
+}
+
+static inline struct btrfs_header *btrfs_buffer_header(struct extent_buffer *eb)
+{
+ return NULL;
+}
+
+static inline int btrfs_is_leaf(struct extent_buffer *eb)
+{
+ return (btrfs_header_level(eb) == 0);
+}
+
+/* struct btrfs_root_item */
+BTRFS_SETGET_FUNCS(disk_root_refs, struct btrfs_root_item, refs, 32);
+BTRFS_SETGET_FUNCS(disk_root_bytenr, struct btrfs_root_item, bytenr, 64);
+BTRFS_SETGET_FUNCS(disk_root_level, struct btrfs_root_item, level, 8);
+
+BTRFS_SETGET_STACK_FUNCS(root_bytenr, struct btrfs_root_item, bytenr, 64);
+BTRFS_SETGET_STACK_FUNCS(root_level, struct btrfs_root_item, level, 8);
+BTRFS_SETGET_STACK_FUNCS(root_dirid, struct btrfs_root_item, root_dirid, 64);
+BTRFS_SETGET_STACK_FUNCS(root_refs, struct btrfs_root_item, refs, 32);
+BTRFS_SETGET_STACK_FUNCS(root_flags, struct btrfs_root_item, flags, 32);
+BTRFS_SETGET_STACK_FUNCS(root_used, struct btrfs_root_item, bytes_used, 64);
+BTRFS_SETGET_STACK_FUNCS(root_limit, struct btrfs_root_item, byte_limit, 64);
+
+/* struct btrfs_super_block */
+BTRFS_SETGET_STACK_FUNCS(super_bytenr, struct btrfs_super_block, bytenr, 64);
+BTRFS_SETGET_STACK_FUNCS(super_flags, struct btrfs_super_block, flags, 64);
+BTRFS_SETGET_STACK_FUNCS(super_generation, struct btrfs_super_block,
+ generation, 64);
+BTRFS_SETGET_STACK_FUNCS(super_root, struct btrfs_super_block, root, 64);
+BTRFS_SETGET_STACK_FUNCS(super_sys_array_size,
+ struct btrfs_super_block, sys_chunk_array_size, 32);
+BTRFS_SETGET_STACK_FUNCS(super_root_level, struct btrfs_super_block,
+ root_level, 8);
+BTRFS_SETGET_STACK_FUNCS(super_chunk_root, struct btrfs_super_block,
+ chunk_root, 64);
+BTRFS_SETGET_STACK_FUNCS(super_chunk_root_level, struct btrfs_super_block,
+ chunk_root_level, 8);
+BTRFS_SETGET_STACK_FUNCS(super_log_root, struct btrfs_super_block,
+ log_root, 64);
+BTRFS_SETGET_STACK_FUNCS(super_log_root_level, struct btrfs_super_block,
+ log_root_level, 8);
+BTRFS_SETGET_STACK_FUNCS(super_total_bytes, struct btrfs_super_block,
+ total_bytes, 64);
+BTRFS_SETGET_STACK_FUNCS(super_bytes_used, struct btrfs_super_block,
+ bytes_used, 64);
+BTRFS_SETGET_STACK_FUNCS(super_sectorsize, struct btrfs_super_block,
+ sectorsize, 32);
+BTRFS_SETGET_STACK_FUNCS(super_nodesize, struct btrfs_super_block,
+ nodesize, 32);
+BTRFS_SETGET_STACK_FUNCS(super_leafsize, struct btrfs_super_block,
+ leafsize, 32);
+BTRFS_SETGET_STACK_FUNCS(super_stripesize, struct btrfs_super_block,
+ stripesize, 32);
+BTRFS_SETGET_STACK_FUNCS(super_root_dir, struct btrfs_super_block,
+ root_dir_objectid, 64);
+BTRFS_SETGET_STACK_FUNCS(super_num_devices, struct btrfs_super_block,
+ num_devices, 64);
+
+static inline unsigned long btrfs_leaf_data(struct extent_buffer *l)
+{
+ return offsetof(struct btrfs_leaf, items);
+}
+
+/* struct btrfs_file_extent_item */
+BTRFS_SETGET_FUNCS(file_extent_type, struct btrfs_file_extent_item, type, 8);
+
+static inline unsigned long btrfs_file_extent_inline_start(struct
+ btrfs_file_extent_item *e)
+{
+ unsigned long offset = (unsigned long)e;
+ offset += offsetof(struct btrfs_file_extent_item, disk_bytenr);
+ return offset;
+}
+
+static inline u32 btrfs_file_extent_calc_inline_size(u32 datasize)
+{
+ return offsetof(struct btrfs_file_extent_item, disk_bytenr) + datasize;
+}
+
+static inline u32 btrfs_file_extent_inline_len(struct extent_buffer *eb,
+ struct btrfs_item *e)
+{
+ unsigned long offset;
+ offset = offsetof(struct btrfs_file_extent_item, disk_bytenr);
+ return btrfs_item_size(eb, e) - offset;
+}
+
+BTRFS_SETGET_FUNCS(file_extent_disk_bytenr, struct btrfs_file_extent_item,
+ disk_bytenr, 64);
+BTRFS_SETGET_FUNCS(file_extent_generation, struct btrfs_file_extent_item,
+ generation, 64);
+BTRFS_SETGET_FUNCS(file_extent_disk_num_bytes, struct btrfs_file_extent_item,
+ disk_num_bytes, 64);
+BTRFS_SETGET_FUNCS(file_extent_offset, struct btrfs_file_extent_item,
+ offset, 64);
+BTRFS_SETGET_FUNCS(file_extent_num_bytes, struct btrfs_file_extent_item,
+ num_bytes, 64);
+
+static inline struct btrfs_root *btrfs_sb(struct super_block *sb)
+{
+ return sb->s_fs_info;
+}
+
+static inline int btrfs_set_root_name(struct btrfs_root *root,
+ const char *name, int len)
+{
+ /* if we already have a name just free it */
+ if (root->name)
+ kfree(root->name);
+
+ root->name = kmalloc(len+1, GFP_KERNEL);
+ if (!root->name)
+ return -ENOMEM;
+
+ memcpy(root->name, name, len);
+ root->name[len] ='\0';
+
+ return 0;
+}
+
+static inline u32 btrfs_level_size(struct btrfs_root *root, int level) {
+ if (level == 0)
+ return root->leafsize;
+ return root->nodesize;
+}
+
+/* helper function to cast into the data area of the leaf. */
+#define btrfs_item_ptr(leaf, slot, type) \
+ ((type *)(btrfs_leaf_data(leaf) + \
+ btrfs_item_offset_nr(leaf, slot)))
+
+#define btrfs_item_ptr_offset(leaf, slot) \
+ ((unsigned long)(btrfs_leaf_data(leaf) + \
+ btrfs_item_offset_nr(leaf, slot)))
+
+static inline struct dentry *fdentry(struct file *file) {
+#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
+ return file->f_dentry;
+#else
+ return file->f_path.dentry;
+#endif
+}
+
+/* extent-tree.c */
+int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len);
+int btrfs_update_pinned_extents(struct btrfs_root *root,
+ u64 bytenr, u64 num, int pin);
+int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, struct extent_buffer *leaf);
+int btrfs_cross_ref_exists(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_key *key, u64 bytenr);
+int btrfs_extent_post_op(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root);
+int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy);
+struct btrfs_block_group_cache *btrfs_lookup_block_group(struct
+ btrfs_fs_info *info,
+ u64 bytenr);
+struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root,
+ struct btrfs_block_group_cache
+ *hint, u64 search_start,
+ int data, int owner);
+struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ u32 blocksize, u64 parent,
+ u64 root_objectid,
+ u64 ref_generation,
+ int level,
+ u64 hint,
+ u64 empty_size);
+struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ u64 bytenr, u32 blocksize);
+int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 new_size);
+int btrfs_insert_extent_backref(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path,
+ u64 bytenr, u64 parent,
+ u64 root_objectid, u64 ref_generation,
+ u64 owner, u64 owner_offset);
+int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ u64 num_bytes, u64 parent, u64 min_bytes,
+ u64 root_objectid, u64 ref_generation,
+ u64 owner, u64 owner_offset,
+ u64 empty_size, u64 hint_byte,
+ u64 search_end, struct btrfs_key *ins, u64 data);
+int btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, u64 parent,
+ u64 root_objectid, u64 ref_generation,
+ u64 owner, u64 owner_offset,
+ struct btrfs_key *ins);
+int btrfs_alloc_logged_extent(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, u64 parent,
+ u64 root_objectid, u64 ref_generation,
+ u64 owner, u64 owner_offset,
+ struct btrfs_key *ins);
+int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ u64 num_bytes, u64 min_alloc_size,
+ u64 empty_size, u64 hint_byte,
+ u64 search_end, struct btrfs_key *ins,
+ u64 data);
+int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
+ struct extent_buffer *orig_buf, struct extent_buffer *buf,
+ u32 *nr_extents);
+int btrfs_cache_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
+ struct extent_buffer *buf, u32 nr_extents);
+int btrfs_update_ref(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, struct extent_buffer *orig_buf,
+ struct extent_buffer *buf, int start_slot, int nr);
+int btrfs_free_extent(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ u64 bytenr, u64 num_bytes, u64 parent,
+ u64 root_objectid, u64 ref_generation,
+ u64 owner_objectid, u64 owner_offset, int pin);
+int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len);
+int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct extent_io_tree *unpin);
+int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ u64 bytenr, u64 num_bytes, u64 parent,
+ u64 root_objectid, u64 ref_generation,
+ u64 owner, u64 owner_offset);
+int btrfs_update_extent_ref(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, u64 bytenr,
+ u64 orig_parent, u64 parent,
+ u64 root_objectid, u64 ref_generation,
+ u64 owner, u64 owner_offset);
+int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root);
+int btrfs_free_block_groups(struct btrfs_fs_info *info);
+int btrfs_read_block_groups(struct btrfs_root *root);
+int btrfs_make_block_group(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, u64 bytes_used,
+ u64 type, u64 chunk_objectid, u64 chunk_offset,
+ u64 size);
+/* ctree.c */
+int btrfs_previous_item(struct btrfs_root *root,
+ struct btrfs_path *path, u64 min_objectid,
+ int type);
+int btrfs_set_item_key_safe(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, struct btrfs_path *path,
+ struct btrfs_key *new_key);
+struct extent_buffer *btrfs_root_node(struct btrfs_root *root);
+struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root);
+int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path,
+ struct btrfs_key *key, int lowest_level,
+ int cache_only, u64 min_trans);
+int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key,
+ struct btrfs_key *max_key,
+ struct btrfs_path *path, int cache_only,
+ u64 min_trans);
+int btrfs_cow_block(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, struct extent_buffer *buf,
+ struct extent_buffer *parent, int parent_slot,
+ struct extent_buffer **cow_ret, u64 prealloc_dest);
+int btrfs_copy_root(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct extent_buffer *buf,
+ struct extent_buffer **cow_ret, u64 new_root_objectid);
+int btrfs_extend_item(struct btrfs_trans_handle *trans, struct btrfs_root
+ *root, struct btrfs_path *path, u32 data_size);
+int btrfs_truncate_item(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path,
+ u32 new_size, int from_end);
+int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
+ *root, struct btrfs_key *key, struct btrfs_path *p, int
+ ins_len, int cow);
+int btrfs_realloc_node(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, struct extent_buffer *parent,
+ int start_slot, int cache_only, u64 *last_ret,
+ struct btrfs_key *progress);
+void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p);
+struct btrfs_path *btrfs_alloc_path(void);
+void btrfs_free_path(struct btrfs_path *p);
+void btrfs_init_path(struct btrfs_path *p);
+int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
+ struct btrfs_path *path, int slot, int nr);
+
+static inline int btrfs_del_item(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path)
+{
+ return btrfs_del_items(trans, root, path, path->slots[0], 1);
+}
+
+int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root
+ *root, struct btrfs_key *key, void *data, u32 data_size);
+int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path,
+ struct btrfs_key *cpu_key, u32 *data_size, int nr);
+
+static inline int btrfs_insert_empty_item(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path,
+ struct btrfs_key *key,
+ u32 data_size)
+{
+ return btrfs_insert_empty_items(trans, root, path, key, &data_size, 1);
+}
+
+int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path);
+int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path);
+int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf);
+int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
+ *root);
+/* root-item.c */
+int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root,
+ struct btrfs_key *key);
+int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root
+ *root, struct btrfs_key *key, struct btrfs_root_item
+ *item);
+int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root
+ *root, struct btrfs_key *key, struct btrfs_root_item
+ *item);
+int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, struct
+ btrfs_root_item *item, struct btrfs_key *key);
+int btrfs_search_root(struct btrfs_root *root, u64 search_start,
+ u64 *found_objectid);
+int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid,
+ struct btrfs_root *latest_root);
+/* dir-item.c */
+int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root
+ *root, const char *name, int name_len, u64 dir,
+ struct btrfs_key *location, u8 type, u64 index);
+struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path, u64 dir,
+ const char *name, int name_len,
+ int mod);
+struct btrfs_dir_item *
+btrfs_lookup_dir_index_item(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path, u64 dir,
+ u64 objectid, const char *name, int name_len,
+ int mod);
+struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root,
+ struct btrfs_path *path,
+ const char *name, int name_len);
+int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path,
+ struct btrfs_dir_item *di);
+int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, const char *name,
+ u16 name_len, const void *data, u16 data_len,
+ u64 dir);
+struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path, u64 dir,
+ const char *name, u16 name_len,
+ int mod);
+
+/* orphan.c */
+int btrfs_insert_orphan_item(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, u64 offset);
+int btrfs_del_orphan_item(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, u64 offset);
+
+/* inode-map.c */
+int btrfs_find_free_objectid(struct btrfs_trans_handle *trans,
+ struct btrfs_root *fs_root,
+ u64 dirid, u64 *objectid);
+int btrfs_find_highest_inode(struct btrfs_root *fs_root, u64 *objectid);
+
+/* inode-item.c */
+int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ const char *name, int name_len,
+ u64 inode_objectid, u64 ref_objectid, u64 index);
+int btrfs_del_inode_ref(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ const char *name, int name_len,
+ u64 inode_objectid, u64 ref_objectid, u64 *index);
+int btrfs_insert_empty_inode(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path, u64 objectid);
+int btrfs_lookup_inode(struct btrfs_trans_handle *trans, struct btrfs_root
+ *root, struct btrfs_path *path,
+ struct btrfs_key *location, int mod);
+
+/* file-item.c */
+int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode,
+ struct bio *bio);
+int btrfs_insert_file_extent(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ u64 objectid, u64 pos, u64 disk_offset,
+ u64 disk_num_bytes,
+ u64 num_bytes, u64 offset);
+int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path, u64 objectid,
+ u64 bytenr, int mod);
+int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, struct inode *inode,
+ struct btrfs_ordered_sum *sums);
+int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode,
+ struct bio *bio);
+struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path,
+ u64 objectid, u64 offset,
+ int cow);
+int btrfs_csum_truncate(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, struct btrfs_path *path,
+ u64 isize);
+/* inode.c */
+
+/* RHEL and EL kernels have a patch that renames PG_checked to FsMisc */
+#if defined(ClearPageFsMisc) && !defined(ClearPageChecked)
+#define ClearPageChecked ClearPageFsMisc
+#define SetPageChecked SetPageFsMisc
+#define PageChecked PageFsMisc
+#endif
+
+int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct inode *dir, struct inode *inode,
+ const char *name, int name_len);
+int btrfs_add_link(struct btrfs_trans_handle *trans,
+ struct inode *parent_inode, struct inode *inode,
+ const char *name, int name_len, int add_backref, u64 index);
+int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct inode *inode, u64 new_size,
+ u32 min_type);
+
+int btrfs_start_delalloc_inodes(struct btrfs_root *root);
+int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end);
+int btrfs_writepages(struct address_space *mapping,
+ struct writeback_control *wbc);
+int btrfs_create_subvol_root(struct btrfs_root *new_root,
+ struct btrfs_trans_handle *trans, u64 new_dirid,
+ struct btrfs_block_group_cache *block_group);
+
+void btrfs_invalidate_dcache_root(struct btrfs_root *root, char *name,
+ int namelen);
+
+int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
+ size_t size, struct bio *bio);
+
+static inline void dec_i_blocks(struct inode *inode, u64 dec)
+{
+ dec = dec >> 9;
+ if (dec <= inode->i_blocks)
+ inode->i_blocks -= dec;
+ else
+ inode->i_blocks = 0;
+}
+
+unsigned long btrfs_force_ra(struct address_space *mapping,
+ struct file_ra_state *ra, struct file *file,
+ pgoff_t offset, pgoff_t last_index);
+int btrfs_check_free_space(struct btrfs_root *root, u64 num_required,
+ int for_del);
+int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page);
+int btrfs_readpage(struct file *file, struct page *page);
+void btrfs_delete_inode(struct inode *inode);
+void btrfs_put_inode(struct inode *inode);
+void btrfs_read_locked_inode(struct inode *inode);
+int btrfs_write_inode(struct inode *inode, int wait);
+void btrfs_dirty_inode(struct inode *inode);
+struct inode *btrfs_alloc_inode(struct super_block *sb);
+void btrfs_destroy_inode(struct inode *inode);
+int btrfs_init_cachep(void);
+void btrfs_destroy_cachep(void);
+long btrfs_ioctl_trans_end(struct file *file);
+struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid,
+ struct btrfs_root *root);
+struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
+ struct btrfs_root *root, int *is_new);
+int btrfs_commit_write(struct file *file, struct page *page,
+ unsigned from, unsigned to);
+struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,
+ size_t page_offset, u64 start, u64 end,
+ int create);
+int btrfs_update_inode(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct inode *inode);
+
+/* ioctl.c */
+long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
+
+/* file.c */
+int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync);
+int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end);
+int btrfs_check_file(struct btrfs_root *root, struct inode *inode);
+extern struct file_operations btrfs_file_operations;
+int btrfs_drop_extents(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, struct inode *inode,
+ u64 start, u64 end, u64 inline_limit, u64 *hint_block);
+int btrfs_release_file(struct inode *inode, struct file *file);
+
+/* tree-defrag.c */
+int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, int cache_only);
+
+/* sysfs.c */
+int btrfs_init_sysfs(void);
+void btrfs_exit_sysfs(void);
+int btrfs_sysfs_add_super(struct btrfs_fs_info *fs);
+int btrfs_sysfs_add_root(struct btrfs_root *root);
+void btrfs_sysfs_del_root(struct btrfs_root *root);
+void btrfs_sysfs_del_super(struct btrfs_fs_info *root);
+
+/* xattr.c */
+ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size);
+
+/* super.c */
+u64 btrfs_parse_size(char *str);
+int btrfs_parse_options(struct btrfs_root *root, char *options);
+int btrfs_sync_fs(struct super_block *sb, int wait);
+
+/* acl.c */
+int btrfs_check_acl(struct inode *inode, int mask);
+int btrfs_init_acl(struct inode *inode, struct inode *dir);
+int btrfs_acl_chmod(struct inode *inode);
+
+/* free-space-cache.c */
+int btrfs_add_free_space(struct btrfs_block_group_cache *block_group,
+ u64 bytenr, u64 size);
+int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
+ u64 bytenr, u64 size);
+void btrfs_remove_free_space_cache(struct btrfs_block_group_cache
+ *block_group);
+struct btrfs_free_space *btrfs_find_free_space(struct btrfs_block_group_cache
+ *block_group, u64 offset,
+ u64 bytes);
+void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
+ u64 bytes);
+u64 btrfs_block_group_free_space(struct btrfs_block_group_cache *block_group);
+#endif
--- /dev/null
+/*
+ * Copyright (C) 2007 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include "ctree.h"
+#include "disk-io.h"
+#include "hash.h"
+#include "transaction.h"
+
+static struct btrfs_dir_item *insert_with_overflow(struct btrfs_trans_handle
+ *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path,
+ struct btrfs_key *cpu_key,
+ u32 data_size,
+ const char *name,
+ int name_len)
+{
+ int ret;
+ char *ptr;
+ struct btrfs_item *item;
+ struct extent_buffer *leaf;
+
+ ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size);
+ if (ret == -EEXIST) {
+ struct btrfs_dir_item *di;
+ di = btrfs_match_dir_item_name(root, path, name, name_len);
+ if (di)
+ return ERR_PTR(-EEXIST);
+ ret = btrfs_extend_item(trans, root, path, data_size);
+ WARN_ON(ret > 0);
+ }
+ if (ret < 0)
+ return ERR_PTR(ret);
+ WARN_ON(ret > 0);
+ leaf = path->nodes[0];
+ item = btrfs_item_nr(leaf, path->slots[0]);
+ ptr = btrfs_item_ptr(leaf, path->slots[0], char);
+ BUG_ON(data_size > btrfs_item_size(leaf, item));
+ ptr += btrfs_item_size(leaf, item) - data_size;
+ return (struct btrfs_dir_item *)ptr;
+}
+
+int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, const char *name,
+ u16 name_len, const void *data, u16 data_len,
+ u64 dir)
+{
+ int ret = 0;
+ struct btrfs_path *path;
+ struct btrfs_dir_item *dir_item;
+ unsigned long name_ptr, data_ptr;
+ struct btrfs_key key, location;
+ struct btrfs_disk_key disk_key;
+ struct extent_buffer *leaf;
+ u32 data_size;
+
+ key.objectid = dir;
+ btrfs_set_key_type(&key, BTRFS_XATTR_ITEM_KEY);
+ key.offset = btrfs_name_hash(name, name_len);
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+ if (name_len + data_len + sizeof(struct btrfs_dir_item) >
+ BTRFS_LEAF_DATA_SIZE(root) - sizeof(struct btrfs_item))
+ return -ENOSPC;
+
+ data_size = sizeof(*dir_item) + name_len + data_len;
+ dir_item = insert_with_overflow(trans, root, path, &key, data_size,
+ name, name_len);
+ /*
+ * FIXME: at some point we should handle xattr's that are larger than
+ * what we can fit in our leaf. We set location to NULL b/c we arent
+ * pointing at anything else, that will change if we store the xattr
+ * data in a separate inode.
+ */
+ BUG_ON(IS_ERR(dir_item));
+ memset(&location, 0, sizeof(location));
+
+ leaf = path->nodes[0];
+ btrfs_cpu_key_to_disk(&disk_key, &location);
+ btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
+ btrfs_set_dir_type(leaf, dir_item, BTRFS_FT_XATTR);
+ btrfs_set_dir_name_len(leaf, dir_item, name_len);
+ btrfs_set_dir_transid(leaf, dir_item, trans->transid);
+ btrfs_set_dir_data_len(leaf, dir_item, data_len);
+ name_ptr = (unsigned long)(dir_item + 1);
+ data_ptr = (unsigned long)((char *)name_ptr + name_len);
+
+ write_extent_buffer(leaf, name, name_ptr, name_len);
+ write_extent_buffer(leaf, data, data_ptr, data_len);
+ btrfs_mark_buffer_dirty(path->nodes[0]);
+
+ btrfs_free_path(path);
+ return ret;
+}
+
+int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root
+ *root, const char *name, int name_len, u64 dir,
+ struct btrfs_key *location, u8 type, u64 index)
+{
+ int ret = 0;
+ int ret2 = 0;
+ struct btrfs_path *path;
+ struct btrfs_dir_item *dir_item;
+ struct extent_buffer *leaf;
+ unsigned long name_ptr;
+ struct btrfs_key key;
+ struct btrfs_disk_key disk_key;
+ u32 data_size;
+
+ key.objectid = dir;
+ btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY);
+ key.offset = btrfs_name_hash(name, name_len);
+ path = btrfs_alloc_path();
+ data_size = sizeof(*dir_item) + name_len;
+ dir_item = insert_with_overflow(trans, root, path, &key, data_size,
+ name, name_len);
+ if (IS_ERR(dir_item)) {
+ ret = PTR_ERR(dir_item);
+ if (ret == -EEXIST)
+ goto second_insert;
+ goto out;
+ }
+
+ leaf = path->nodes[0];
+ btrfs_cpu_key_to_disk(&disk_key, location);
+ btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
+ btrfs_set_dir_type(leaf, dir_item, type);
+ btrfs_set_dir_data_len(leaf, dir_item, 0);
+ btrfs_set_dir_name_len(leaf, dir_item, name_len);
+ btrfs_set_dir_transid(leaf, dir_item, trans->transid);
+ name_ptr = (unsigned long)(dir_item + 1);
+
+ write_extent_buffer(leaf, name, name_ptr, name_len);
+ btrfs_mark_buffer_dirty(leaf);
+
+second_insert:
+ /* FIXME, use some real flag for selecting the extra index */
+ if (root == root->fs_info->tree_root) {
+ ret = 0;
+ goto out;
+ }
+ btrfs_release_path(root, path);
+
+ btrfs_set_key_type(&key, BTRFS_DIR_INDEX_KEY);
+ key.offset = index;
+ dir_item = insert_with_overflow(trans, root, path, &key, data_size,
+ name, name_len);
+ if (IS_ERR(dir_item)) {
+ ret2 = PTR_ERR(dir_item);
+ goto out;
+ }
+ leaf = path->nodes[0];
+ btrfs_cpu_key_to_disk(&disk_key, location);
+ btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
+ btrfs_set_dir_type(leaf, dir_item, type);
+ btrfs_set_dir_data_len(leaf, dir_item, 0);
+ btrfs_set_dir_name_len(leaf, dir_item, name_len);
+ btrfs_set_dir_transid(leaf, dir_item, trans->transid);
+ name_ptr = (unsigned long)(dir_item + 1);
+ write_extent_buffer(leaf, name, name_ptr, name_len);
+ btrfs_mark_buffer_dirty(leaf);
+out:
+ btrfs_free_path(path);
+ if (ret)
+ return ret;
+ if (ret2)
+ return ret2;
+ return 0;
+}
+
+struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path, u64 dir,
+ const char *name, int name_len,
+ int mod)
+{
+ int ret;
+ struct btrfs_key key;
+ int ins_len = mod < 0 ? -1 : 0;
+ int cow = mod != 0;
+ struct btrfs_key found_key;
+ struct extent_buffer *leaf;
+
+ key.objectid = dir;
+ btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY);
+
+ key.offset = btrfs_name_hash(name, name_len);
+
+ ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow);
+ if (ret < 0)
+ return ERR_PTR(ret);
+ if (ret > 0) {
+ if (path->slots[0] == 0)
+ return NULL;
+ path->slots[0]--;
+ }
+
+ leaf = path->nodes[0];
+ btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
+
+ if (found_key.objectid != dir ||
+ btrfs_key_type(&found_key) != BTRFS_DIR_ITEM_KEY ||
+ found_key.offset != key.offset)
+ return NULL;
+
+ return btrfs_match_dir_item_name(root, path, name, name_len);
+}
+
+struct btrfs_dir_item *
+btrfs_lookup_dir_index_item(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path, u64 dir,
+ u64 objectid, const char *name, int name_len,
+ int mod)
+{
+ int ret;
+ struct btrfs_key key;
+ int ins_len = mod < 0 ? -1 : 0;
+ int cow = mod != 0;
+
+ key.objectid = dir;
+ btrfs_set_key_type(&key, BTRFS_DIR_INDEX_KEY);
+ key.offset = objectid;
+
+ ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow);
+ if (ret < 0)
+ return ERR_PTR(ret);
+ if (ret > 0)
+ return ERR_PTR(-ENOENT);
+ return btrfs_match_dir_item_name(root, path, name, name_len);
+}
+
+struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path, u64 dir,
+ const char *name, u16 name_len,
+ int mod)
+{
+ int ret;
+ struct btrfs_key key;
+ int ins_len = mod < 0 ? -1 : 0;
+ int cow = mod != 0;
+ struct btrfs_key found_key;
+ struct extent_buffer *leaf;
+
+ key.objectid = dir;
+ btrfs_set_key_type(&key, BTRFS_XATTR_ITEM_KEY);
+ key.offset = btrfs_name_hash(name, name_len);
+ ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow);
+ if (ret < 0)
+ return ERR_PTR(ret);
+ if (ret > 0) {
+ if (path->slots[0] == 0)
+ return NULL;
+ path->slots[0]--;
+ }
+
+ leaf = path->nodes[0];
+ btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
+
+ if (found_key.objectid != dir ||
+ btrfs_key_type(&found_key) != BTRFS_XATTR_ITEM_KEY ||
+ found_key.offset != key.offset)
+ return NULL;
+
+ return btrfs_match_dir_item_name(root, path, name, name_len);
+}
+
+struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root,
+ struct btrfs_path *path,
+ const char *name, int name_len)
+{
+ struct btrfs_dir_item *dir_item;
+ unsigned long name_ptr;
+ u32 total_len;
+ u32 cur = 0;
+ u32 this_len;
+ struct extent_buffer *leaf;
+
+ leaf = path->nodes[0];
+ dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item);
+ total_len = btrfs_item_size_nr(leaf, path->slots[0]);
+ while(cur < total_len) {
+ this_len = sizeof(*dir_item) +
+ btrfs_dir_name_len(leaf, dir_item) +
+ btrfs_dir_data_len(leaf, dir_item);
+ name_ptr = (unsigned long)(dir_item + 1);
+
+ if (btrfs_dir_name_len(leaf, dir_item) == name_len &&
+ memcmp_extent_buffer(leaf, name, name_ptr, name_len) == 0)
+ return dir_item;
+
+ cur += this_len;
+ dir_item = (struct btrfs_dir_item *)((char *)dir_item +
+ this_len);
+ }
+ return NULL;
+}
+
+int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path,
+ struct btrfs_dir_item *di)
+{
+
+ struct extent_buffer *leaf;
+ u32 sub_item_len;
+ u32 item_len;
+ int ret = 0;
+
+ leaf = path->nodes[0];
+ sub_item_len = sizeof(*di) + btrfs_dir_name_len(leaf, di) +
+ btrfs_dir_data_len(leaf, di);
+ item_len = btrfs_item_size_nr(leaf, path->slots[0]);
+ if (sub_item_len == item_len) {
+ ret = btrfs_del_item(trans, root, path);
+ } else {
+ /* MARKER */
+ unsigned long ptr = (unsigned long)di;
+ unsigned long start;
+
+ start = btrfs_item_ptr_offset(leaf, path->slots[0]);
+ memmove_extent_buffer(leaf, ptr, ptr + sub_item_len,
+ item_len - (ptr + sub_item_len - start));
+ ret = btrfs_truncate_item(trans, root, path,
+ item_len - sub_item_len, 1);
+ }
+ return 0;
+}
--- /dev/null
+/*
+ * Copyright (C) 2007 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include <linux/version.h>
+#include <linux/fs.h>
+#include <linux/blkdev.h>
+#include <linux/scatterlist.h>
+#include <linux/swap.h>
+#include <linux/radix-tree.h>
+#include <linux/writeback.h>
+#include <linux/buffer_head.h> // for block_sync_page
+#include <linux/workqueue.h>
+#include <linux/kthread.h>
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,20)
+# include <linux/freezer.h>
+#else
+# include <linux/sched.h>
+#endif
+#include "crc32c.h"
+#include "ctree.h"
+#include "disk-io.h"
+#include "transaction.h"
+#include "btrfs_inode.h"
+#include "volumes.h"
+#include "print-tree.h"
+#include "async-thread.h"
+#include "locking.h"
+#include "ref-cache.h"
+#include "tree-log.h"
+
+#if 0
+static int check_tree_block(struct btrfs_root *root, struct extent_buffer *buf)
+{
+ if (extent_buffer_blocknr(buf) != btrfs_header_blocknr(buf)) {
+ printk(KERN_CRIT "buf blocknr(buf) is %llu, header is %llu\n",
+ (unsigned long long)extent_buffer_blocknr(buf),
+ (unsigned long long)btrfs_header_blocknr(buf));
+ return 1;
+ }
+ return 0;
+}
+#endif
+
+static struct extent_io_ops btree_extent_io_ops;
+static void end_workqueue_fn(struct btrfs_work *work);
+
+struct end_io_wq {
+ struct bio *bio;
+ bio_end_io_t *end_io;
+ void *private;
+ struct btrfs_fs_info *info;
+ int error;
+ int metadata;
+ struct list_head list;
+ struct btrfs_work work;
+};
+
+struct async_submit_bio {
+ struct inode *inode;
+ struct bio *bio;
+ struct list_head list;
+ extent_submit_bio_hook_t *submit_bio_hook;
+ int rw;
+ int mirror_num;
+ struct btrfs_work work;
+};
+
+struct extent_map *btree_get_extent(struct inode *inode, struct page *page,
+ size_t page_offset, u64 start, u64 len,
+ int create)
+{
+ struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
+ struct extent_map *em;
+ int ret;
+
+ spin_lock(&em_tree->lock);
+ em = lookup_extent_mapping(em_tree, start, len);
+ if (em) {
+ em->bdev =
+ BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev;
+ spin_unlock(&em_tree->lock);
+ goto out;
+ }
+ spin_unlock(&em_tree->lock);
+
+ em = alloc_extent_map(GFP_NOFS);
+ if (!em) {
+ em = ERR_PTR(-ENOMEM);
+ goto out;
+ }
+ em->start = 0;
+ em->len = (u64)-1;
+ em->block_start = 0;
+ em->bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev;
+
+ spin_lock(&em_tree->lock);
+ ret = add_extent_mapping(em_tree, em);
+ if (ret == -EEXIST) {
+ u64 failed_start = em->start;
+ u64 failed_len = em->len;
+
+ printk("failed to insert %Lu %Lu -> %Lu into tree\n",
+ em->start, em->len, em->block_start);
+ free_extent_map(em);
+ em = lookup_extent_mapping(em_tree, start, len);
+ if (em) {
+ printk("after failing, found %Lu %Lu %Lu\n",
+ em->start, em->len, em->block_start);
+ ret = 0;
+ } else {
+ em = lookup_extent_mapping(em_tree, failed_start,
+ failed_len);
+ if (em) {
+ printk("double failure lookup gives us "
+ "%Lu %Lu -> %Lu\n", em->start,
+ em->len, em->block_start);
+ free_extent_map(em);
+ }
+ ret = -EIO;
+ }
+ } else if (ret) {
+ free_extent_map(em);
+ em = NULL;
+ }
+ spin_unlock(&em_tree->lock);
+
+ if (ret)
+ em = ERR_PTR(ret);
+out:
+ return em;
+}
+
+u32 btrfs_csum_data(struct btrfs_root *root, char *data, u32 seed, size_t len)
+{
+ return btrfs_crc32c(seed, data, len);
+}
+
+void btrfs_csum_final(u32 crc, char *result)
+{
+ *(__le32 *)result = ~cpu_to_le32(crc);
+}
+
+static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf,
+ int verify)
+{
+ char result[BTRFS_CRC32_SIZE];
+ unsigned long len;
+ unsigned long cur_len;
+ unsigned long offset = BTRFS_CSUM_SIZE;
+ char *map_token = NULL;
+ char *kaddr;
+ unsigned long map_start;
+ unsigned long map_len;
+ int err;
+ u32 crc = ~(u32)0;
+
+ len = buf->len - offset;
+ while(len > 0) {
+ err = map_private_extent_buffer(buf, offset, 32,
+ &map_token, &kaddr,
+ &map_start, &map_len, KM_USER0);
+ if (err) {
+ printk("failed to map extent buffer! %lu\n",
+ offset);
+ return 1;
+ }
+ cur_len = min(len, map_len - (offset - map_start));
+ crc = btrfs_csum_data(root, kaddr + offset - map_start,
+ crc, cur_len);
+ len -= cur_len;
+ offset += cur_len;
+ unmap_extent_buffer(buf, map_token, KM_USER0);
+ }
+ btrfs_csum_final(crc, result);
+
+ if (verify) {
+ /* FIXME, this is not good */
+ if (memcmp_extent_buffer(buf, result, 0, BTRFS_CRC32_SIZE)) {
+ u32 val;
+ u32 found = 0;
+ memcpy(&found, result, BTRFS_CRC32_SIZE);
+
+ read_extent_buffer(buf, &val, 0, BTRFS_CRC32_SIZE);
+ printk("btrfs: %s checksum verify failed on %llu "
+ "wanted %X found %X level %d\n",
+ root->fs_info->sb->s_id,
+ buf->start, val, found, btrfs_header_level(buf));
+ return 1;
+ }
+ } else {
+ write_extent_buffer(buf, result, 0, BTRFS_CRC32_SIZE);
+ }
+ return 0;
+}
+
+static int verify_parent_transid(struct extent_io_tree *io_tree,
+ struct extent_buffer *eb, u64 parent_transid)
+{
+ int ret;
+
+ if (!parent_transid || btrfs_header_generation(eb) == parent_transid)
+ return 0;
+
+ lock_extent(io_tree, eb->start, eb->start + eb->len - 1, GFP_NOFS);
+ if (extent_buffer_uptodate(io_tree, eb) &&
+ btrfs_header_generation(eb) == parent_transid) {
+ ret = 0;
+ goto out;
+ }
+ printk("parent transid verify failed on %llu wanted %llu found %llu\n",
+ (unsigned long long)eb->start,
+ (unsigned long long)parent_transid,
+ (unsigned long long)btrfs_header_generation(eb));
+ ret = 1;
+ clear_extent_buffer_uptodate(io_tree, eb);
+out:
+ unlock_extent(io_tree, eb->start, eb->start + eb->len - 1,
+ GFP_NOFS);
+ return ret;
+
+}
+
+static int btree_read_extent_buffer_pages(struct btrfs_root *root,
+ struct extent_buffer *eb,
+ u64 start, u64 parent_transid)
+{
+ struct extent_io_tree *io_tree;
+ int ret;
+ int num_copies = 0;
+ int mirror_num = 0;
+
+ io_tree = &BTRFS_I(root->fs_info->btree_inode)->io_tree;
+ while (1) {
+ ret = read_extent_buffer_pages(io_tree, eb, start, 1,
+ btree_get_extent, mirror_num);
+ if (!ret &&
+ !verify_parent_transid(io_tree, eb, parent_transid))
+ return ret;
+printk("read extent buffer pages failed with ret %d mirror no %d\n", ret, mirror_num);
+ num_copies = btrfs_num_copies(&root->fs_info->mapping_tree,
+ eb->start, eb->len);
+ if (num_copies == 1)
+ return ret;
+
+ mirror_num++;
+ if (mirror_num > num_copies)
+ return ret;
+ }
+ return -EIO;
+}
+
+int csum_dirty_buffer(struct btrfs_root *root, struct page *page)
+{
+ struct extent_io_tree *tree;
+ u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
+ u64 found_start;
+ int found_level;
+ unsigned long len;
+ struct extent_buffer *eb;
+ int ret;
+
+ tree = &BTRFS_I(page->mapping->host)->io_tree;
+
+ if (page->private == EXTENT_PAGE_PRIVATE)
+ goto out;
+ if (!page->private)
+ goto out;
+ len = page->private >> 2;
+ if (len == 0) {
+ WARN_ON(1);
+ }
+ eb = alloc_extent_buffer(tree, start, len, page, GFP_NOFS);
+ ret = btree_read_extent_buffer_pages(root, eb, start + PAGE_CACHE_SIZE,
+ btrfs_header_generation(eb));
+ BUG_ON(ret);
+ found_start = btrfs_header_bytenr(eb);
+ if (found_start != start) {
+ printk("warning: eb start incorrect %Lu buffer %Lu len %lu\n",
+ start, found_start, len);
+ WARN_ON(1);
+ goto err;
+ }
+ if (eb->first_page != page) {
+ printk("bad first page %lu %lu\n", eb->first_page->index,
+ page->index);
+ WARN_ON(1);
+ goto err;
+ }
+ if (!PageUptodate(page)) {
+ printk("csum not up to date page %lu\n", page->index);
+ WARN_ON(1);
+ goto err;
+ }
+ found_level = btrfs_header_level(eb);
+
+ csum_tree_block(root, eb, 0);
+err:
+ free_extent_buffer(eb);
+out:
+ return 0;
+}
+
+int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
+ struct extent_state *state)
+{
+ struct extent_io_tree *tree;
+ u64 found_start;
+ int found_level;
+ unsigned long len;
+ struct extent_buffer *eb;
+ struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
+ int ret = 0;
+
+ tree = &BTRFS_I(page->mapping->host)->io_tree;
+ if (page->private == EXTENT_PAGE_PRIVATE)
+ goto out;
+ if (!page->private)
+ goto out;
+ len = page->private >> 2;
+ if (len == 0) {
+ WARN_ON(1);
+ }
+ eb = alloc_extent_buffer(tree, start, len, page, GFP_NOFS);
+
+ found_start = btrfs_header_bytenr(eb);
+ if (found_start != start) {
+ printk("bad tree block start %llu %llu\n",
+ (unsigned long long)found_start,
+ (unsigned long long)eb->start);
+ ret = -EIO;
+ goto err;
+ }
+ if (eb->first_page != page) {
+ printk("bad first page %lu %lu\n", eb->first_page->index,
+ page->index);
+ WARN_ON(1);
+ ret = -EIO;
+ goto err;
+ }
+ if (memcmp_extent_buffer(eb, root->fs_info->fsid,
+ (unsigned long)btrfs_header_fsid(eb),
+ BTRFS_FSID_SIZE)) {
+ printk("bad fsid on block %Lu\n", eb->start);
+ ret = -EIO;
+ goto err;
+ }
+ found_level = btrfs_header_level(eb);
+
+ ret = csum_tree_block(root, eb, 1);
+ if (ret)
+ ret = -EIO;
+
+ end = min_t(u64, eb->len, PAGE_CACHE_SIZE);
+ end = eb->start + end - 1;
+err:
+ free_extent_buffer(eb);
+out:
+ return ret;
+}
+
+#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23)
+static void end_workqueue_bio(struct bio *bio, int err)
+#else
+static int end_workqueue_bio(struct bio *bio,
+ unsigned int bytes_done, int err)
+#endif
+{
+ struct end_io_wq *end_io_wq = bio->bi_private;
+ struct btrfs_fs_info *fs_info;
+
+#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
+ if (bio->bi_size)
+ return 1;
+#endif
+
+ fs_info = end_io_wq->info;
+ end_io_wq->error = err;
+ end_io_wq->work.func = end_workqueue_fn;
+ end_io_wq->work.flags = 0;
+ if (bio->bi_rw & (1 << BIO_RW))
+ btrfs_queue_worker(&fs_info->endio_write_workers,
+ &end_io_wq->work);
+ else
+ btrfs_queue_worker(&fs_info->endio_workers, &end_io_wq->work);
+
+#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
+ return 0;
+#endif
+}
+
+int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
+ int metadata)
+{
+ struct end_io_wq *end_io_wq;
+ end_io_wq = kmalloc(sizeof(*end_io_wq), GFP_NOFS);
+ if (!end_io_wq)
+ return -ENOMEM;
+
+ end_io_wq->private = bio->bi_private;
+ end_io_wq->end_io = bio->bi_end_io;
+ end_io_wq->info = info;
+ end_io_wq->error = 0;
+ end_io_wq->bio = bio;
+ end_io_wq->metadata = metadata;
+
+ bio->bi_private = end_io_wq;
+ bio->bi_end_io = end_workqueue_bio;
+ return 0;
+}
+
+unsigned long btrfs_async_submit_limit(struct btrfs_fs_info *info)
+{
+ unsigned long limit = min_t(unsigned long,
+ info->workers.max_workers,
+ info->fs_devices->open_devices);
+ return 256 * limit;
+}
+
+int btrfs_congested_async(struct btrfs_fs_info *info, int iodone)
+{
+ return atomic_read(&info->nr_async