Merge tag 'for-4.17-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux
[muen/linux.git] / fs / btrfs / extent-tree.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18 #include <linux/sched.h>
19 #include <linux/sched/signal.h>
20 #include <linux/pagemap.h>
21 #include <linux/writeback.h>
22 #include <linux/blkdev.h>
23 #include <linux/sort.h>
24 #include <linux/rcupdate.h>
25 #include <linux/kthread.h>
26 #include <linux/slab.h>
27 #include <linux/ratelimit.h>
28 #include <linux/percpu_counter.h>
29 #include <linux/lockdep.h>
30 #include <linux/crc32c.h>
31 #include "tree-log.h"
32 #include "disk-io.h"
33 #include "print-tree.h"
34 #include "volumes.h"
35 #include "raid56.h"
36 #include "locking.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "math.h"
40 #include "sysfs.h"
41 #include "qgroup.h"
42 #include "ref-verify.h"
43
44 #undef SCRAMBLE_DELAYED_REFS
45
46 /*
47  * control flags for do_chunk_alloc's force field
48  * CHUNK_ALLOC_NO_FORCE means to only allocate a chunk
49  * if we really need one.
50  *
51  * CHUNK_ALLOC_LIMITED means to only try and allocate one
52  * if we have very few chunks already allocated.  This is
53  * used as part of the clustering code to help make sure
54  * we have a good pool of storage to cluster in, without
55  * filling the FS with empty chunks
56  *
57  * CHUNK_ALLOC_FORCE means it must try to allocate one
58  *
59  */
60 enum {
61         CHUNK_ALLOC_NO_FORCE = 0,
62         CHUNK_ALLOC_LIMITED = 1,
63         CHUNK_ALLOC_FORCE = 2,
64 };
65
66 static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
67                                struct btrfs_fs_info *fs_info,
68                                 struct btrfs_delayed_ref_node *node, u64 parent,
69                                 u64 root_objectid, u64 owner_objectid,
70                                 u64 owner_offset, int refs_to_drop,
71                                 struct btrfs_delayed_extent_op *extra_op);
72 static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op,
73                                     struct extent_buffer *leaf,
74                                     struct btrfs_extent_item *ei);
75 static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
76                                       struct btrfs_fs_info *fs_info,
77                                       u64 parent, u64 root_objectid,
78                                       u64 flags, u64 owner, u64 offset,
79                                       struct btrfs_key *ins, int ref_mod);
80 static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
81                                      struct btrfs_fs_info *fs_info,
82                                      u64 parent, u64 root_objectid,
83                                      u64 flags, struct btrfs_disk_key *key,
84                                      int level, struct btrfs_key *ins);
85 static int do_chunk_alloc(struct btrfs_trans_handle *trans,
86                           struct btrfs_fs_info *fs_info, u64 flags,
87                           int force);
88 static int find_next_key(struct btrfs_path *path, int level,
89                          struct btrfs_key *key);
90 static void dump_space_info(struct btrfs_fs_info *fs_info,
91                             struct btrfs_space_info *info, u64 bytes,
92                             int dump_block_groups);
93 static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv,
94                                u64 num_bytes);
95 static void space_info_add_new_bytes(struct btrfs_fs_info *fs_info,
96                                      struct btrfs_space_info *space_info,
97                                      u64 num_bytes);
98 static void space_info_add_old_bytes(struct btrfs_fs_info *fs_info,
99                                      struct btrfs_space_info *space_info,
100                                      u64 num_bytes);
101
102 static noinline int
103 block_group_cache_done(struct btrfs_block_group_cache *cache)
104 {
105         smp_mb();
106         return cache->cached == BTRFS_CACHE_FINISHED ||
107                 cache->cached == BTRFS_CACHE_ERROR;
108 }
109
110 static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits)
111 {
112         return (cache->flags & bits) == bits;
113 }
114
115 void btrfs_get_block_group(struct btrfs_block_group_cache *cache)
116 {
117         atomic_inc(&cache->count);
118 }
119
120 void btrfs_put_block_group(struct btrfs_block_group_cache *cache)
121 {
122         if (atomic_dec_and_test(&cache->count)) {
123                 WARN_ON(cache->pinned > 0);
124                 WARN_ON(cache->reserved > 0);
125
126                 /*
127                  * If not empty, someone is still holding mutex of
128                  * full_stripe_lock, which can only be released by caller.
129                  * And it will definitely cause use-after-free when caller
130                  * tries to release full stripe lock.
131                  *
132                  * No better way to resolve, but only to warn.
133                  */
134                 WARN_ON(!RB_EMPTY_ROOT(&cache->full_stripe_locks_root.root));
135                 kfree(cache->free_space_ctl);
136                 kfree(cache);
137         }
138 }
139
140 /*
141  * this adds the block group to the fs_info rb tree for the block group
142  * cache
143  */
144 static int btrfs_add_block_group_cache(struct btrfs_fs_info *info,
145                                 struct btrfs_block_group_cache *block_group)
146 {
147         struct rb_node **p;
148         struct rb_node *parent = NULL;
149         struct btrfs_block_group_cache *cache;
150
151         spin_lock(&info->block_group_cache_lock);
152         p = &info->block_group_cache_tree.rb_node;
153
154         while (*p) {
155                 parent = *p;
156                 cache = rb_entry(parent, struct btrfs_block_group_cache,
157                                  cache_node);
158                 if (block_group->key.objectid < cache->key.objectid) {
159                         p = &(*p)->rb_left;
160                 } else if (block_group->key.objectid > cache->key.objectid) {
161                         p = &(*p)->rb_right;
162                 } else {
163                         spin_unlock(&info->block_group_cache_lock);
164                         return -EEXIST;
165                 }
166         }
167
168         rb_link_node(&block_group->cache_node, parent, p);
169         rb_insert_color(&block_group->cache_node,
170                         &info->block_group_cache_tree);
171
172         if (info->first_logical_byte > block_group->key.objectid)
173                 info->first_logical_byte = block_group->key.objectid;
174
175         spin_unlock(&info->block_group_cache_lock);
176
177         return 0;
178 }
179
180 /*
181  * This will return the block group at or after bytenr if contains is 0, else
182  * it will return the block group that contains the bytenr
183  */
184 static struct btrfs_block_group_cache *
185 block_group_cache_tree_search(struct btrfs_fs_info *info, u64 bytenr,
186                               int contains)
187 {
188         struct btrfs_block_group_cache *cache, *ret = NULL;
189         struct rb_node *n;
190         u64 end, start;
191
192         spin_lock(&info->block_group_cache_lock);
193         n = info->block_group_cache_tree.rb_node;
194
195         while (n) {
196                 cache = rb_entry(n, struct btrfs_block_group_cache,
197                                  cache_node);
198                 end = cache->key.objectid + cache->key.offset - 1;
199                 start = cache->key.objectid;
200
201                 if (bytenr < start) {
202                         if (!contains && (!ret || start < ret->key.objectid))
203                                 ret = cache;
204                         n = n->rb_left;
205                 } else if (bytenr > start) {
206                         if (contains && bytenr <= end) {
207                                 ret = cache;
208                                 break;
209                         }
210                         n = n->rb_right;
211                 } else {
212                         ret = cache;
213                         break;
214                 }
215         }
216         if (ret) {
217                 btrfs_get_block_group(ret);
218                 if (bytenr == 0 && info->first_logical_byte > ret->key.objectid)
219                         info->first_logical_byte = ret->key.objectid;
220         }
221         spin_unlock(&info->block_group_cache_lock);
222
223         return ret;
224 }
225
226 static int add_excluded_extent(struct btrfs_fs_info *fs_info,
227                                u64 start, u64 num_bytes)
228 {
229         u64 end = start + num_bytes - 1;
230         set_extent_bits(&fs_info->freed_extents[0],
231                         start, end, EXTENT_UPTODATE);
232         set_extent_bits(&fs_info->freed_extents[1],
233                         start, end, EXTENT_UPTODATE);
234         return 0;
235 }
236
237 static void free_excluded_extents(struct btrfs_fs_info *fs_info,
238                                   struct btrfs_block_group_cache *cache)
239 {
240         u64 start, end;
241
242         start = cache->key.objectid;
243         end = start + cache->key.offset - 1;
244
245         clear_extent_bits(&fs_info->freed_extents[0],
246                           start, end, EXTENT_UPTODATE);
247         clear_extent_bits(&fs_info->freed_extents[1],
248                           start, end, EXTENT_UPTODATE);
249 }
250
251 static int exclude_super_stripes(struct btrfs_fs_info *fs_info,
252                                  struct btrfs_block_group_cache *cache)
253 {
254         u64 bytenr;
255         u64 *logical;
256         int stripe_len;
257         int i, nr, ret;
258
259         if (cache->key.objectid < BTRFS_SUPER_INFO_OFFSET) {
260                 stripe_len = BTRFS_SUPER_INFO_OFFSET - cache->key.objectid;
261                 cache->bytes_super += stripe_len;
262                 ret = add_excluded_extent(fs_info, cache->key.objectid,
263                                           stripe_len);
264                 if (ret)
265                         return ret;
266         }
267
268         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
269                 bytenr = btrfs_sb_offset(i);
270                 ret = btrfs_rmap_block(fs_info, cache->key.objectid,
271                                        bytenr, 0, &logical, &nr, &stripe_len);
272                 if (ret)
273                         return ret;
274
275                 while (nr--) {
276                         u64 start, len;
277
278                         if (logical[nr] > cache->key.objectid +
279                             cache->key.offset)
280                                 continue;
281
282                         if (logical[nr] + stripe_len <= cache->key.objectid)
283                                 continue;
284
285                         start = logical[nr];
286                         if (start < cache->key.objectid) {
287                                 start = cache->key.objectid;
288                                 len = (logical[nr] + stripe_len) - start;
289                         } else {
290                                 len = min_t(u64, stripe_len,
291                                             cache->key.objectid +
292                                             cache->key.offset - start);
293                         }
294
295                         cache->bytes_super += len;
296                         ret = add_excluded_extent(fs_info, start, len);
297                         if (ret) {
298                                 kfree(logical);
299                                 return ret;
300                         }
301                 }
302
303                 kfree(logical);
304         }
305         return 0;
306 }
307
308 static struct btrfs_caching_control *
309 get_caching_control(struct btrfs_block_group_cache *cache)
310 {
311         struct btrfs_caching_control *ctl;
312
313         spin_lock(&cache->lock);
314         if (!cache->caching_ctl) {
315                 spin_unlock(&cache->lock);
316                 return NULL;
317         }
318
319         ctl = cache->caching_ctl;
320         refcount_inc(&ctl->count);
321         spin_unlock(&cache->lock);
322         return ctl;
323 }
324
325 static void put_caching_control(struct btrfs_caching_control *ctl)
326 {
327         if (refcount_dec_and_test(&ctl->count))
328                 kfree(ctl);
329 }
330
331 #ifdef CONFIG_BTRFS_DEBUG
332 static void fragment_free_space(struct btrfs_block_group_cache *block_group)
333 {
334         struct btrfs_fs_info *fs_info = block_group->fs_info;
335         u64 start = block_group->key.objectid;
336         u64 len = block_group->key.offset;
337         u64 chunk = block_group->flags & BTRFS_BLOCK_GROUP_METADATA ?
338                 fs_info->nodesize : fs_info->sectorsize;
339         u64 step = chunk << 1;
340
341         while (len > chunk) {
342                 btrfs_remove_free_space(block_group, start, chunk);
343                 start += step;
344                 if (len < step)
345                         len = 0;
346                 else
347                         len -= step;
348         }
349 }
350 #endif
351
352 /*
353  * this is only called by cache_block_group, since we could have freed extents
354  * we need to check the pinned_extents for any extents that can't be used yet
355  * since their free space will be released as soon as the transaction commits.
356  */
357 u64 add_new_free_space(struct btrfs_block_group_cache *block_group,
358                        struct btrfs_fs_info *info, u64 start, u64 end)
359 {
360         u64 extent_start, extent_end, size, total_added = 0;
361         int ret;
362
363         while (start < end) {
364                 ret = find_first_extent_bit(info->pinned_extents, start,
365                                             &extent_start, &extent_end,
366                                             EXTENT_DIRTY | EXTENT_UPTODATE,
367                                             NULL);
368                 if (ret)
369                         break;
370
371                 if (extent_start <= start) {
372                         start = extent_end + 1;
373                 } else if (extent_start > start && extent_start < end) {
374                         size = extent_start - start;
375                         total_added += size;
376                         ret = btrfs_add_free_space(block_group, start,
377                                                    size);
378                         BUG_ON(ret); /* -ENOMEM or logic error */
379                         start = extent_end + 1;
380                 } else {
381                         break;
382                 }
383         }
384
385         if (start < end) {
386                 size = end - start;
387                 total_added += size;
388                 ret = btrfs_add_free_space(block_group, start, size);
389                 BUG_ON(ret); /* -ENOMEM or logic error */
390         }
391
392         return total_added;
393 }
394
395 static int load_extent_tree_free(struct btrfs_caching_control *caching_ctl)
396 {
397         struct btrfs_block_group_cache *block_group = caching_ctl->block_group;
398         struct btrfs_fs_info *fs_info = block_group->fs_info;
399         struct btrfs_root *extent_root = fs_info->extent_root;
400         struct btrfs_path *path;
401         struct extent_buffer *leaf;
402         struct btrfs_key key;
403         u64 total_found = 0;
404         u64 last = 0;
405         u32 nritems;
406         int ret;
407         bool wakeup = true;
408
409         path = btrfs_alloc_path();
410         if (!path)
411                 return -ENOMEM;
412
413         last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET);
414
415 #ifdef CONFIG_BTRFS_DEBUG
416         /*
417          * If we're fragmenting we don't want to make anybody think we can
418          * allocate from this block group until we've had a chance to fragment
419          * the free space.
420          */
421         if (btrfs_should_fragment_free_space(block_group))
422                 wakeup = false;
423 #endif
424         /*
425          * We don't want to deadlock with somebody trying to allocate a new
426          * extent for the extent root while also trying to search the extent
427          * root to add free space.  So we skip locking and search the commit
428          * root, since its read-only
429          */
430         path->skip_locking = 1;
431         path->search_commit_root = 1;
432         path->reada = READA_FORWARD;
433
434         key.objectid = last;
435         key.offset = 0;
436         key.type = BTRFS_EXTENT_ITEM_KEY;
437
438 next:
439         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
440         if (ret < 0)
441                 goto out;
442
443         leaf = path->nodes[0];
444         nritems = btrfs_header_nritems(leaf);
445
446         while (1) {
447                 if (btrfs_fs_closing(fs_info) > 1) {
448                         last = (u64)-1;
449                         break;
450                 }
451
452                 if (path->slots[0] < nritems) {
453                         btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
454                 } else {
455                         ret = find_next_key(path, 0, &key);
456                         if (ret)
457                                 break;
458
459                         if (need_resched() ||
460                             rwsem_is_contended(&fs_info->commit_root_sem)) {
461                                 if (wakeup)
462                                         caching_ctl->progress = last;
463                                 btrfs_release_path(path);
464                                 up_read(&fs_info->commit_root_sem);
465                                 mutex_unlock(&caching_ctl->mutex);
466                                 cond_resched();
467                                 mutex_lock(&caching_ctl->mutex);
468                                 down_read(&fs_info->commit_root_sem);
469                                 goto next;
470                         }
471
472                         ret = btrfs_next_leaf(extent_root, path);
473                         if (ret < 0)
474                                 goto out;
475                         if (ret)
476                                 break;
477                         leaf = path->nodes[0];
478                         nritems = btrfs_header_nritems(leaf);
479                         continue;
480                 }
481
482                 if (key.objectid < last) {
483                         key.objectid = last;
484                         key.offset = 0;
485                         key.type = BTRFS_EXTENT_ITEM_KEY;
486
487                         if (wakeup)
488                                 caching_ctl->progress = last;
489                         btrfs_release_path(path);
490                         goto next;
491                 }
492
493                 if (key.objectid < block_group->key.objectid) {
494                         path->slots[0]++;
495                         continue;
496                 }
497
498                 if (key.objectid >= block_group->key.objectid +
499                     block_group->key.offset)
500                         break;
501
502                 if (key.type == BTRFS_EXTENT_ITEM_KEY ||
503                     key.type == BTRFS_METADATA_ITEM_KEY) {
504                         total_found += add_new_free_space(block_group,
505                                                           fs_info, last,
506                                                           key.objectid);
507                         if (key.type == BTRFS_METADATA_ITEM_KEY)
508                                 last = key.objectid +
509                                         fs_info->nodesize;
510                         else
511                                 last = key.objectid + key.offset;
512
513                         if (total_found > CACHING_CTL_WAKE_UP) {
514                                 total_found = 0;
515                                 if (wakeup)
516                                         wake_up(&caching_ctl->wait);
517                         }
518                 }
519                 path->slots[0]++;
520         }
521         ret = 0;
522
523         total_found += add_new_free_space(block_group, fs_info, last,
524                                           block_group->key.objectid +
525                                           block_group->key.offset);
526         caching_ctl->progress = (u64)-1;
527
528 out:
529         btrfs_free_path(path);
530         return ret;
531 }
532
533 static noinline void caching_thread(struct btrfs_work *work)
534 {
535         struct btrfs_block_group_cache *block_group;
536         struct btrfs_fs_info *fs_info;
537         struct btrfs_caching_control *caching_ctl;
538         int ret;
539
540         caching_ctl = container_of(work, struct btrfs_caching_control, work);
541         block_group = caching_ctl->block_group;
542         fs_info = block_group->fs_info;
543
544         mutex_lock(&caching_ctl->mutex);
545         down_read(&fs_info->commit_root_sem);
546
547         if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE))
548                 ret = load_free_space_tree(caching_ctl);
549         else
550                 ret = load_extent_tree_free(caching_ctl);
551
552         spin_lock(&block_group->lock);
553         block_group->caching_ctl = NULL;
554         block_group->cached = ret ? BTRFS_CACHE_ERROR : BTRFS_CACHE_FINISHED;
555         spin_unlock(&block_group->lock);
556
557 #ifdef CONFIG_BTRFS_DEBUG
558         if (btrfs_should_fragment_free_space(block_group)) {
559                 u64 bytes_used;
560
561                 spin_lock(&block_group->space_info->lock);
562                 spin_lock(&block_group->lock);
563                 bytes_used = block_group->key.offset -
564                         btrfs_block_group_used(&block_group->item);
565                 block_group->space_info->bytes_used += bytes_used >> 1;
566                 spin_unlock(&block_group->lock);
567                 spin_unlock(&block_group->space_info->lock);
568                 fragment_free_space(block_group);
569         }
570 #endif
571
572         caching_ctl->progress = (u64)-1;
573
574         up_read(&fs_info->commit_root_sem);
575         free_excluded_extents(fs_info, block_group);
576         mutex_unlock(&caching_ctl->mutex);
577
578         wake_up(&caching_ctl->wait);
579
580         put_caching_control(caching_ctl);
581         btrfs_put_block_group(block_group);
582 }
583
584 static int cache_block_group(struct btrfs_block_group_cache *cache,
585                              int load_cache_only)
586 {
587         DEFINE_WAIT(wait);
588         struct btrfs_fs_info *fs_info = cache->fs_info;
589         struct btrfs_caching_control *caching_ctl;
590         int ret = 0;
591
592         caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_NOFS);
593         if (!caching_ctl)
594                 return -ENOMEM;
595
596         INIT_LIST_HEAD(&caching_ctl->list);
597         mutex_init(&caching_ctl->mutex);
598         init_waitqueue_head(&caching_ctl->wait);
599         caching_ctl->block_group = cache;
600         caching_ctl->progress = cache->key.objectid;
601         refcount_set(&caching_ctl->count, 1);
602         btrfs_init_work(&caching_ctl->work, btrfs_cache_helper,
603                         caching_thread, NULL, NULL);
604
605         spin_lock(&cache->lock);
606         /*
607          * This should be a rare occasion, but this could happen I think in the
608          * case where one thread starts to load the space cache info, and then
609          * some other thread starts a transaction commit which tries to do an
610          * allocation while the other thread is still loading the space cache
611          * info.  The previous loop should have kept us from choosing this block
612          * group, but if we've moved to the state where we will wait on caching
613          * block groups we need to first check if we're doing a fast load here,
614          * so we can wait for it to finish, otherwise we could end up allocating
615          * from a block group who's cache gets evicted for one reason or
616          * another.
617          */
618         while (cache->cached == BTRFS_CACHE_FAST) {
619                 struct btrfs_caching_control *ctl;
620
621                 ctl = cache->caching_ctl;
622                 refcount_inc(&ctl->count);
623                 prepare_to_wait(&ctl->wait, &wait, TASK_UNINTERRUPTIBLE);
624                 spin_unlock(&cache->lock);
625
626                 schedule();
627
628                 finish_wait(&ctl->wait, &wait);
629                 put_caching_control(ctl);
630                 spin_lock(&cache->lock);
631         }
632
633         if (cache->cached != BTRFS_CACHE_NO) {
634                 spin_unlock(&cache->lock);
635                 kfree(caching_ctl);
636                 return 0;
637         }
638         WARN_ON(cache->caching_ctl);
639         cache->caching_ctl = caching_ctl;
640         cache->cached = BTRFS_CACHE_FAST;
641         spin_unlock(&cache->lock);
642
643         if (btrfs_test_opt(fs_info, SPACE_CACHE)) {
644                 mutex_lock(&caching_ctl->mutex);
645                 ret = load_free_space_cache(fs_info, cache);
646
647                 spin_lock(&cache->lock);
648                 if (ret == 1) {
649                         cache->caching_ctl = NULL;
650                         cache->cached = BTRFS_CACHE_FINISHED;
651                         cache->last_byte_to_unpin = (u64)-1;
652                         caching_ctl->progress = (u64)-1;
653                 } else {
654                         if (load_cache_only) {
655                                 cache->caching_ctl = NULL;
656                                 cache->cached = BTRFS_CACHE_NO;
657                         } else {
658                                 cache->cached = BTRFS_CACHE_STARTED;
659                                 cache->has_caching_ctl = 1;
660                         }
661                 }
662                 spin_unlock(&cache->lock);
663 #ifdef CONFIG_BTRFS_DEBUG
664                 if (ret == 1 &&
665                     btrfs_should_fragment_free_space(cache)) {
666                         u64 bytes_used;
667
668                         spin_lock(&cache->space_info->lock);
669                         spin_lock(&cache->lock);
670                         bytes_used = cache->key.offset -
671                                 btrfs_block_group_used(&cache->item);
672                         cache->space_info->bytes_used += bytes_used >> 1;
673                         spin_unlock(&cache->lock);
674                         spin_unlock(&cache->space_info->lock);
675                         fragment_free_space(cache);
676                 }
677 #endif
678                 mutex_unlock(&caching_ctl->mutex);
679
680                 wake_up(&caching_ctl->wait);
681                 if (ret == 1) {
682                         put_caching_control(caching_ctl);
683                         free_excluded_extents(fs_info, cache);
684                         return 0;
685                 }
686         } else {
687                 /*
688                  * We're either using the free space tree or no caching at all.
689                  * Set cached to the appropriate value and wakeup any waiters.
690                  */
691                 spin_lock(&cache->lock);
692                 if (load_cache_only) {
693                         cache->caching_ctl = NULL;
694                         cache->cached = BTRFS_CACHE_NO;
695                 } else {
696                         cache->cached = BTRFS_CACHE_STARTED;
697                         cache->has_caching_ctl = 1;
698                 }
699                 spin_unlock(&cache->lock);
700                 wake_up(&caching_ctl->wait);
701         }
702
703         if (load_cache_only) {
704                 put_caching_control(caching_ctl);
705                 return 0;
706         }
707
708         down_write(&fs_info->commit_root_sem);
709         refcount_inc(&caching_ctl->count);
710         list_add_tail(&caching_ctl->list, &fs_info->caching_block_groups);
711         up_write(&fs_info->commit_root_sem);
712
713         btrfs_get_block_group(cache);
714
715         btrfs_queue_work(fs_info->caching_workers, &caching_ctl->work);
716
717         return ret;
718 }
719
720 /*
721  * return the block group that starts at or after bytenr
722  */
723 static struct btrfs_block_group_cache *
724 btrfs_lookup_first_block_group(struct btrfs_fs_info *info, u64 bytenr)
725 {
726         return block_group_cache_tree_search(info, bytenr, 0);
727 }
728
729 /*
730  * return the block group that contains the given bytenr
731  */
732 struct btrfs_block_group_cache *btrfs_lookup_block_group(
733                                                  struct btrfs_fs_info *info,
734                                                  u64 bytenr)
735 {
736         return block_group_cache_tree_search(info, bytenr, 1);
737 }
738
739 static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
740                                                   u64 flags)
741 {
742         struct list_head *head = &info->space_info;
743         struct btrfs_space_info *found;
744
745         flags &= BTRFS_BLOCK_GROUP_TYPE_MASK;
746
747         rcu_read_lock();
748         list_for_each_entry_rcu(found, head, list) {
749                 if (found->flags & flags) {
750                         rcu_read_unlock();
751                         return found;
752                 }
753         }
754         rcu_read_unlock();
755         return NULL;
756 }
757
758 static void add_pinned_bytes(struct btrfs_fs_info *fs_info, s64 num_bytes,
759                              u64 owner, u64 root_objectid)
760 {
761         struct btrfs_space_info *space_info;
762         u64 flags;
763
764         if (owner < BTRFS_FIRST_FREE_OBJECTID) {
765                 if (root_objectid == BTRFS_CHUNK_TREE_OBJECTID)
766                         flags = BTRFS_BLOCK_GROUP_SYSTEM;
767                 else
768                         flags = BTRFS_BLOCK_GROUP_METADATA;
769         } else {
770                 flags = BTRFS_BLOCK_GROUP_DATA;
771         }
772
773         space_info = __find_space_info(fs_info, flags);
774         ASSERT(space_info);
775         percpu_counter_add(&space_info->total_bytes_pinned, num_bytes);
776 }
777
778 /*
779  * after adding space to the filesystem, we need to clear the full flags
780  * on all the space infos.
781  */
782 void btrfs_clear_space_info_full(struct btrfs_fs_info *info)
783 {
784         struct list_head *head = &info->space_info;
785         struct btrfs_space_info *found;
786
787         rcu_read_lock();
788         list_for_each_entry_rcu(found, head, list)
789                 found->full = 0;
790         rcu_read_unlock();
791 }
792
793 /* simple helper to search for an existing data extent at a given offset */
794 int btrfs_lookup_data_extent(struct btrfs_fs_info *fs_info, u64 start, u64 len)
795 {
796         int ret;
797         struct btrfs_key key;
798         struct btrfs_path *path;
799
800         path = btrfs_alloc_path();
801         if (!path)
802                 return -ENOMEM;
803
804         key.objectid = start;
805         key.offset = len;
806         key.type = BTRFS_EXTENT_ITEM_KEY;
807         ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, path, 0, 0);
808         btrfs_free_path(path);
809         return ret;
810 }
811
812 /*
813  * helper function to lookup reference count and flags of a tree block.
814  *
815  * the head node for delayed ref is used to store the sum of all the
816  * reference count modifications queued up in the rbtree. the head
817  * node may also store the extent flags to set. This way you can check
818  * to see what the reference count and extent flags would be if all of
819  * the delayed refs are not processed.
820  */
821 int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
822                              struct btrfs_fs_info *fs_info, u64 bytenr,
823                              u64 offset, int metadata, u64 *refs, u64 *flags)
824 {
825         struct btrfs_delayed_ref_head *head;
826         struct btrfs_delayed_ref_root *delayed_refs;
827         struct btrfs_path *path;
828         struct btrfs_extent_item *ei;
829         struct extent_buffer *leaf;
830         struct btrfs_key key;
831         u32 item_size;
832         u64 num_refs;
833         u64 extent_flags;
834         int ret;
835
836         /*
837          * If we don't have skinny metadata, don't bother doing anything
838          * different
839          */
840         if (metadata && !btrfs_fs_incompat(fs_info, SKINNY_METADATA)) {
841                 offset = fs_info->nodesize;
842                 metadata = 0;
843         }
844
845         path = btrfs_alloc_path();
846         if (!path)
847                 return -ENOMEM;
848
849         if (!trans) {
850                 path->skip_locking = 1;
851                 path->search_commit_root = 1;
852         }
853
854 search_again:
855         key.objectid = bytenr;
856         key.offset = offset;
857         if (metadata)
858                 key.type = BTRFS_METADATA_ITEM_KEY;
859         else
860                 key.type = BTRFS_EXTENT_ITEM_KEY;
861
862         ret = btrfs_search_slot(trans, fs_info->extent_root, &key, path, 0, 0);
863         if (ret < 0)
864                 goto out_free;
865
866         if (ret > 0 && metadata && key.type == BTRFS_METADATA_ITEM_KEY) {
867                 if (path->slots[0]) {
868                         path->slots[0]--;
869                         btrfs_item_key_to_cpu(path->nodes[0], &key,
870                                               path->slots[0]);
871                         if (key.objectid == bytenr &&
872                             key.type == BTRFS_EXTENT_ITEM_KEY &&
873                             key.offset == fs_info->nodesize)
874                                 ret = 0;
875                 }
876         }
877
878         if (ret == 0) {
879                 leaf = path->nodes[0];
880                 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
881                 if (item_size >= sizeof(*ei)) {
882                         ei = btrfs_item_ptr(leaf, path->slots[0],
883                                             struct btrfs_extent_item);
884                         num_refs = btrfs_extent_refs(leaf, ei);
885                         extent_flags = btrfs_extent_flags(leaf, ei);
886                 } else {
887 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
888                         struct btrfs_extent_item_v0 *ei0;
889                         BUG_ON(item_size != sizeof(*ei0));
890                         ei0 = btrfs_item_ptr(leaf, path->slots[0],
891                                              struct btrfs_extent_item_v0);
892                         num_refs = btrfs_extent_refs_v0(leaf, ei0);
893                         /* FIXME: this isn't correct for data */
894                         extent_flags = BTRFS_BLOCK_FLAG_FULL_BACKREF;
895 #else
896                         BUG();
897 #endif
898                 }
899                 BUG_ON(num_refs == 0);
900         } else {
901                 num_refs = 0;
902                 extent_flags = 0;
903                 ret = 0;
904         }
905
906         if (!trans)
907                 goto out;
908
909         delayed_refs = &trans->transaction->delayed_refs;
910         spin_lock(&delayed_refs->lock);
911         head = btrfs_find_delayed_ref_head(delayed_refs, bytenr);
912         if (head) {
913                 if (!mutex_trylock(&head->mutex)) {
914                         refcount_inc(&head->refs);
915                         spin_unlock(&delayed_refs->lock);
916
917                         btrfs_release_path(path);
918
919                         /*
920                          * Mutex was contended, block until it's released and try
921                          * again
922                          */
923                         mutex_lock(&head->mutex);
924                         mutex_unlock(&head->mutex);
925                         btrfs_put_delayed_ref_head(head);
926                         goto search_again;
927                 }
928                 spin_lock(&head->lock);
929                 if (head->extent_op && head->extent_op->update_flags)
930                         extent_flags |= head->extent_op->flags_to_set;
931                 else
932                         BUG_ON(num_refs == 0);
933
934                 num_refs += head->ref_mod;
935                 spin_unlock(&head->lock);
936                 mutex_unlock(&head->mutex);
937         }
938         spin_unlock(&delayed_refs->lock);
939 out:
940         WARN_ON(num_refs == 0);
941         if (refs)
942                 *refs = num_refs;
943         if (flags)
944                 *flags = extent_flags;
945 out_free:
946         btrfs_free_path(path);
947         return ret;
948 }
949
950 /*
951  * Back reference rules.  Back refs have three main goals:
952  *
953  * 1) differentiate between all holders of references to an extent so that
954  *    when a reference is dropped we can make sure it was a valid reference
955  *    before freeing the extent.
956  *
957  * 2) Provide enough information to quickly find the holders of an extent
958  *    if we notice a given block is corrupted or bad.
959  *
960  * 3) Make it easy to migrate blocks for FS shrinking or storage pool
961  *    maintenance.  This is actually the same as #2, but with a slightly
962  *    different use case.
963  *
964  * There are two kinds of back refs. The implicit back refs is optimized
965  * for pointers in non-shared tree blocks. For a given pointer in a block,
966  * back refs of this kind provide information about the block's owner tree
967  * and the pointer's key. These information allow us to find the block by
968  * b-tree searching. The full back refs is for pointers in tree blocks not
969  * referenced by their owner trees. The location of tree block is recorded
970  * in the back refs. Actually the full back refs is generic, and can be
971  * used in all cases the implicit back refs is used. The major shortcoming
972  * of the full back refs is its overhead. Every time a tree block gets
973  * COWed, we have to update back refs entry for all pointers in it.
974  *
975  * For a newly allocated tree block, we use implicit back refs for
976  * pointers in it. This means most tree related operations only involve
977  * implicit back refs. For a tree block created in old transaction, the
978  * only way to drop a reference to it is COW it. So we can detect the
979  * event that tree block loses its owner tree's reference and do the
980  * back refs conversion.
981  *
982  * When a tree block is COWed through a tree, there are four cases:
983  *
984  * The reference count of the block is one and the tree is the block's
985  * owner tree. Nothing to do in this case.
986  *
987  * The reference count of the block is one and the tree is not the
988  * block's owner tree. In this case, full back refs is used for pointers
989  * in the block. Remove these full back refs, add implicit back refs for
990  * every pointers in the new block.
991  *
992  * The reference count of the block is greater than one and the tree is
993  * the block's owner tree. In this case, implicit back refs is used for
994  * pointers in the block. Add full back refs for every pointers in the
995  * block, increase lower level extents' reference counts. The original
996  * implicit back refs are entailed to the new block.
997  *
998  * The reference count of the block is greater than one and the tree is
999  * not the block's owner tree. Add implicit back refs for every pointer in
1000  * the new block, increase lower level extents' reference count.
1001  *
1002  * Back Reference Key composing:
1003  *
1004  * The key objectid corresponds to the first byte in the extent,
1005  * The key type is used to differentiate between types of back refs.
1006  * There are different meanings of the key offset for different types
1007  * of back refs.
1008  *
1009  * File extents can be referenced by:
1010  *
1011  * - multiple snapshots, subvolumes, or different generations in one subvol
1012  * - different files inside a single subvolume
1013  * - different offsets inside a file (bookend extents in file.c)
1014  *
1015  * The extent ref structure for the implicit back refs has fields for:
1016  *
1017  * - Objectid of the subvolume root
1018  * - objectid of the file holding the reference
1019  * - original offset in the file
1020  * - how many bookend extents
1021  *
1022  * The key offset for the implicit back refs is hash of the first
1023  * three fields.
1024  *
1025  * The extent ref structure for the full back refs has field for:
1026  *
1027  * - number of pointers in the tree leaf
1028  *
1029  * The key offset for the implicit back refs is the first byte of
1030  * the tree leaf
1031  *
1032  * When a file extent is allocated, The implicit back refs is used.
1033  * the fields are filled in:
1034  *
1035  *     (root_key.objectid, inode objectid, offset in file, 1)
1036  *
1037  * When a file extent is removed file truncation, we find the
1038  * corresponding implicit back refs and check the following fields:
1039  *
1040  *     (btrfs_header_owner(leaf), inode objectid, offset in file)
1041  *
1042  * Btree extents can be referenced by:
1043  *
1044  * - Different subvolumes
1045  *
1046  * Both the implicit back refs and the full back refs for tree blocks
1047  * only consist of key. The key offset for the implicit back refs is
1048  * objectid of block's owner tree. The key offset for the full back refs
1049  * is the first byte of parent block.
1050  *
1051  * When implicit back refs is used, information about the lowest key and
1052  * level of the tree block are required. These information are stored in
1053  * tree block info structure.
1054  */
1055
1056 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1057 static int convert_extent_item_v0(struct btrfs_trans_handle *trans,
1058                                   struct btrfs_fs_info *fs_info,
1059                                   struct btrfs_path *path,
1060                                   u64 owner, u32 extra_size)
1061 {
1062         struct btrfs_root *root = fs_info->extent_root;
1063         struct btrfs_extent_item *item;
1064         struct btrfs_extent_item_v0 *ei0;
1065         struct btrfs_extent_ref_v0 *ref0;
1066         struct btrfs_tree_block_info *bi;
1067         struct extent_buffer *leaf;
1068         struct btrfs_key key;
1069         struct btrfs_key found_key;
1070         u32 new_size = sizeof(*item);
1071         u64 refs;
1072         int ret;
1073
1074         leaf = path->nodes[0];
1075         BUG_ON(btrfs_item_size_nr(leaf, path->slots[0]) != sizeof(*ei0));
1076
1077         btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1078         ei0 = btrfs_item_ptr(leaf, path->slots[0],
1079                              struct btrfs_extent_item_v0);
1080         refs = btrfs_extent_refs_v0(leaf, ei0);
1081
1082         if (owner == (u64)-1) {
1083                 while (1) {
1084                         if (path->slots[0] >= btrfs_header_nritems(leaf)) {
1085                                 ret = btrfs_next_leaf(root, path);
1086                                 if (ret < 0)
1087                                         return ret;
1088                                 BUG_ON(ret > 0); /* Corruption */
1089                                 leaf = path->nodes[0];
1090                         }
1091                         btrfs_item_key_to_cpu(leaf, &found_key,
1092                                               path->slots[0]);
1093                         BUG_ON(key.objectid != found_key.objectid);
1094                         if (found_key.type != BTRFS_EXTENT_REF_V0_KEY) {
1095                                 path->slots[0]++;
1096                                 continue;
1097                         }
1098                         ref0 = btrfs_item_ptr(leaf, path->slots[0],
1099                                               struct btrfs_extent_ref_v0);
1100                         owner = btrfs_ref_objectid_v0(leaf, ref0);
1101                         break;
1102                 }
1103         }
1104         btrfs_release_path(path);
1105
1106         if (owner < BTRFS_FIRST_FREE_OBJECTID)
1107                 new_size += sizeof(*bi);
1108
1109         new_size -= sizeof(*ei0);
1110         ret = btrfs_search_slot(trans, root, &key, path,
1111                                 new_size + extra_size, 1);
1112         if (ret < 0)
1113                 return ret;
1114         BUG_ON(ret); /* Corruption */
1115
1116         btrfs_extend_item(fs_info, path, new_size);
1117
1118         leaf = path->nodes[0];
1119         item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1120         btrfs_set_extent_refs(leaf, item, refs);
1121         /* FIXME: get real generation */
1122         btrfs_set_extent_generation(leaf, item, 0);
1123         if (owner < BTRFS_FIRST_FREE_OBJECTID) {
1124                 btrfs_set_extent_flags(leaf, item,
1125                                        BTRFS_EXTENT_FLAG_TREE_BLOCK |
1126                                        BTRFS_BLOCK_FLAG_FULL_BACKREF);
1127                 bi = (struct btrfs_tree_block_info *)(item + 1);
1128                 /* FIXME: get first key of the block */
1129                 memzero_extent_buffer(leaf, (unsigned long)bi, sizeof(*bi));
1130                 btrfs_set_tree_block_level(leaf, bi, (int)owner);
1131         } else {
1132                 btrfs_set_extent_flags(leaf, item, BTRFS_EXTENT_FLAG_DATA);
1133         }
1134         btrfs_mark_buffer_dirty(leaf);
1135         return 0;
1136 }
1137 #endif
1138
1139 /*
1140  * is_data == BTRFS_REF_TYPE_BLOCK, tree block type is required,
1141  * is_data == BTRFS_REF_TYPE_DATA, data type is requried,
1142  * is_data == BTRFS_REF_TYPE_ANY, either type is OK.
1143  */
1144 int btrfs_get_extent_inline_ref_type(const struct extent_buffer *eb,
1145                                      struct btrfs_extent_inline_ref *iref,
1146                                      enum btrfs_inline_ref_type is_data)
1147 {
1148         int type = btrfs_extent_inline_ref_type(eb, iref);
1149         u64 offset = btrfs_extent_inline_ref_offset(eb, iref);
1150
1151         if (type == BTRFS_TREE_BLOCK_REF_KEY ||
1152             type == BTRFS_SHARED_BLOCK_REF_KEY ||
1153             type == BTRFS_SHARED_DATA_REF_KEY ||
1154             type == BTRFS_EXTENT_DATA_REF_KEY) {
1155                 if (is_data == BTRFS_REF_TYPE_BLOCK) {
1156                         if (type == BTRFS_TREE_BLOCK_REF_KEY)
1157                                 return type;
1158                         if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
1159                                 ASSERT(eb->fs_info);
1160                                 /*
1161                                  * Every shared one has parent tree
1162                                  * block, which must be aligned to
1163                                  * nodesize.
1164                                  */
1165                                 if (offset &&
1166                                     IS_ALIGNED(offset, eb->fs_info->nodesize))
1167                                         return type;
1168                         }
1169                 } else if (is_data == BTRFS_REF_TYPE_DATA) {
1170                         if (type == BTRFS_EXTENT_DATA_REF_KEY)
1171                                 return type;
1172                         if (type == BTRFS_SHARED_DATA_REF_KEY) {
1173                                 ASSERT(eb->fs_info);
1174                                 /*
1175                                  * Every shared one has parent tree
1176                                  * block, which must be aligned to
1177                                  * nodesize.
1178                                  */
1179                                 if (offset &&
1180                                     IS_ALIGNED(offset, eb->fs_info->nodesize))
1181                                         return type;
1182                         }
1183                 } else {
1184                         ASSERT(is_data == BTRFS_REF_TYPE_ANY);
1185                         return type;
1186                 }
1187         }
1188
1189         btrfs_print_leaf((struct extent_buffer *)eb);
1190         btrfs_err(eb->fs_info, "eb %llu invalid extent inline ref type %d",
1191                   eb->start, type);
1192         WARN_ON(1);
1193
1194         return BTRFS_REF_TYPE_INVALID;
1195 }
1196
1197 static u64 hash_extent_data_ref(u64 root_objectid, u64 owner, u64 offset)
1198 {
1199         u32 high_crc = ~(u32)0;
1200         u32 low_crc = ~(u32)0;
1201         __le64 lenum;
1202
1203         lenum = cpu_to_le64(root_objectid);
1204         high_crc = crc32c(high_crc, &lenum, sizeof(lenum));
1205         lenum = cpu_to_le64(owner);
1206         low_crc = crc32c(low_crc, &lenum, sizeof(lenum));
1207         lenum = cpu_to_le64(offset);
1208         low_crc = crc32c(low_crc, &lenum, sizeof(lenum));
1209
1210         return ((u64)high_crc << 31) ^ (u64)low_crc;
1211 }
1212
1213 static u64 hash_extent_data_ref_item(struct extent_buffer *leaf,
1214                                      struct btrfs_extent_data_ref *ref)
1215 {
1216         return hash_extent_data_ref(btrfs_extent_data_ref_root(leaf, ref),
1217                                     btrfs_extent_data_ref_objectid(leaf, ref),
1218                                     btrfs_extent_data_ref_offset(leaf, ref));
1219 }
1220
1221 static int match_extent_data_ref(struct extent_buffer *leaf,
1222                                  struct btrfs_extent_data_ref *ref,
1223                                  u64 root_objectid, u64 owner, u64 offset)
1224 {
1225         if (btrfs_extent_data_ref_root(leaf, ref) != root_objectid ||
1226             btrfs_extent_data_ref_objectid(leaf, ref) != owner ||
1227             btrfs_extent_data_ref_offset(leaf, ref) != offset)
1228                 return 0;
1229         return 1;
1230 }
1231
1232 static noinline int lookup_extent_data_ref(struct btrfs_trans_handle *trans,
1233                                            struct btrfs_fs_info *fs_info,
1234                                            struct btrfs_path *path,
1235                                            u64 bytenr, u64 parent,
1236                                            u64 root_objectid,
1237                                            u64 owner, u64 offset)
1238 {
1239         struct btrfs_root *root = fs_info->extent_root;
1240         struct btrfs_key key;
1241         struct btrfs_extent_data_ref *ref;
1242         struct extent_buffer *leaf;
1243         u32 nritems;
1244         int ret;
1245         int recow;
1246         int err = -ENOENT;
1247
1248         key.objectid = bytenr;
1249         if (parent) {
1250                 key.type = BTRFS_SHARED_DATA_REF_KEY;
1251                 key.offset = parent;
1252         } else {
1253                 key.type = BTRFS_EXTENT_DATA_REF_KEY;
1254                 key.offset = hash_extent_data_ref(root_objectid,
1255                                                   owner, offset);
1256         }
1257 again:
1258         recow = 0;
1259         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1260         if (ret < 0) {
1261                 err = ret;
1262                 goto fail;
1263         }
1264
1265         if (parent) {
1266                 if (!ret)
1267                         return 0;
1268 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1269                 key.type = BTRFS_EXTENT_REF_V0_KEY;
1270                 btrfs_release_path(path);
1271                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1272                 if (ret < 0) {
1273                         err = ret;
1274                         goto fail;
1275                 }
1276                 if (!ret)
1277                         return 0;
1278 #endif
1279                 goto fail;
1280         }
1281
1282         leaf = path->nodes[0];
1283         nritems = btrfs_header_nritems(leaf);
1284         while (1) {
1285                 if (path->slots[0] >= nritems) {
1286                         ret = btrfs_next_leaf(root, path);
1287                         if (ret < 0)
1288                                 err = ret;
1289                         if (ret)
1290                                 goto fail;
1291
1292                         leaf = path->nodes[0];
1293                         nritems = btrfs_header_nritems(leaf);
1294                         recow = 1;
1295                 }
1296
1297                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1298                 if (key.objectid != bytenr ||
1299                     key.type != BTRFS_EXTENT_DATA_REF_KEY)
1300                         goto fail;
1301
1302                 ref = btrfs_item_ptr(leaf, path->slots[0],
1303                                      struct btrfs_extent_data_ref);
1304
1305                 if (match_extent_data_ref(leaf, ref, root_objectid,
1306                                           owner, offset)) {
1307                         if (recow) {
1308                                 btrfs_release_path(path);
1309                                 goto again;
1310                         }
1311                         err = 0;
1312                         break;
1313                 }
1314                 path->slots[0]++;
1315         }
1316 fail:
1317         return err;
1318 }
1319
1320 static noinline int insert_extent_data_ref(struct btrfs_trans_handle *trans,
1321                                            struct btrfs_fs_info *fs_info,
1322                                            struct btrfs_path *path,
1323                                            u64 bytenr, u64 parent,
1324                                            u64 root_objectid, u64 owner,
1325                                            u64 offset, int refs_to_add)
1326 {
1327         struct btrfs_root *root = fs_info->extent_root;
1328         struct btrfs_key key;
1329         struct extent_buffer *leaf;
1330         u32 size;
1331         u32 num_refs;
1332         int ret;
1333
1334         key.objectid = bytenr;
1335         if (parent) {
1336                 key.type = BTRFS_SHARED_DATA_REF_KEY;
1337                 key.offset = parent;
1338                 size = sizeof(struct btrfs_shared_data_ref);
1339         } else {
1340                 key.type = BTRFS_EXTENT_DATA_REF_KEY;
1341                 key.offset = hash_extent_data_ref(root_objectid,
1342                                                   owner, offset);
1343                 size = sizeof(struct btrfs_extent_data_ref);
1344         }
1345
1346         ret = btrfs_insert_empty_item(trans, root, path, &key, size);
1347         if (ret && ret != -EEXIST)
1348                 goto fail;
1349
1350         leaf = path->nodes[0];
1351         if (parent) {
1352                 struct btrfs_shared_data_ref *ref;
1353                 ref = btrfs_item_ptr(leaf, path->slots[0],
1354                                      struct btrfs_shared_data_ref);
1355                 if (ret == 0) {
1356                         btrfs_set_shared_data_ref_count(leaf, ref, refs_to_add);
1357                 } else {
1358                         num_refs = btrfs_shared_data_ref_count(leaf, ref);
1359                         num_refs += refs_to_add;
1360                         btrfs_set_shared_data_ref_count(leaf, ref, num_refs);
1361                 }
1362         } else {
1363                 struct btrfs_extent_data_ref *ref;
1364                 while (ret == -EEXIST) {
1365                         ref = btrfs_item_ptr(leaf, path->slots[0],
1366                                              struct btrfs_extent_data_ref);
1367                         if (match_extent_data_ref(leaf, ref, root_objectid,
1368                                                   owner, offset))
1369                                 break;
1370                         btrfs_release_path(path);
1371                         key.offset++;
1372                         ret = btrfs_insert_empty_item(trans, root, path, &key,
1373                                                       size);
1374                         if (ret && ret != -EEXIST)
1375                                 goto fail;
1376
1377                         leaf = path->nodes[0];
1378                 }
1379                 ref = btrfs_item_ptr(leaf, path->slots[0],
1380                                      struct btrfs_extent_data_ref);
1381                 if (ret == 0) {
1382                         btrfs_set_extent_data_ref_root(leaf, ref,
1383                                                        root_objectid);
1384                         btrfs_set_extent_data_ref_objectid(leaf, ref, owner);
1385                         btrfs_set_extent_data_ref_offset(leaf, ref, offset);
1386                         btrfs_set_extent_data_ref_count(leaf, ref, refs_to_add);
1387                 } else {
1388                         num_refs = btrfs_extent_data_ref_count(leaf, ref);
1389                         num_refs += refs_to_add;
1390                         btrfs_set_extent_data_ref_count(leaf, ref, num_refs);
1391                 }
1392         }
1393         btrfs_mark_buffer_dirty(leaf);
1394         ret = 0;
1395 fail:
1396         btrfs_release_path(path);
1397         return ret;
1398 }
1399
1400 static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans,
1401                                            struct btrfs_fs_info *fs_info,
1402                                            struct btrfs_path *path,
1403                                            int refs_to_drop, int *last_ref)
1404 {
1405         struct btrfs_key key;
1406         struct btrfs_extent_data_ref *ref1 = NULL;
1407         struct btrfs_shared_data_ref *ref2 = NULL;
1408         struct extent_buffer *leaf;
1409         u32 num_refs = 0;
1410         int ret = 0;
1411
1412         leaf = path->nodes[0];
1413         btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1414
1415         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
1416                 ref1 = btrfs_item_ptr(leaf, path->slots[0],
1417                                       struct btrfs_extent_data_ref);
1418                 num_refs = btrfs_extent_data_ref_count(leaf, ref1);
1419         } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
1420                 ref2 = btrfs_item_ptr(leaf, path->slots[0],
1421                                       struct btrfs_shared_data_ref);
1422                 num_refs = btrfs_shared_data_ref_count(leaf, ref2);
1423 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1424         } else if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
1425                 struct btrfs_extent_ref_v0 *ref0;
1426                 ref0 = btrfs_item_ptr(leaf, path->slots[0],
1427                                       struct btrfs_extent_ref_v0);
1428                 num_refs = btrfs_ref_count_v0(leaf, ref0);
1429 #endif
1430         } else {
1431                 BUG();
1432         }
1433
1434         BUG_ON(num_refs < refs_to_drop);
1435         num_refs -= refs_to_drop;
1436
1437         if (num_refs == 0) {
1438                 ret = btrfs_del_item(trans, fs_info->extent_root, path);
1439                 *last_ref = 1;
1440         } else {
1441                 if (key.type == BTRFS_EXTENT_DATA_REF_KEY)
1442                         btrfs_set_extent_data_ref_count(leaf, ref1, num_refs);
1443                 else if (key.type == BTRFS_SHARED_DATA_REF_KEY)
1444                         btrfs_set_shared_data_ref_count(leaf, ref2, num_refs);
1445 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1446                 else {
1447                         struct btrfs_extent_ref_v0 *ref0;
1448                         ref0 = btrfs_item_ptr(leaf, path->slots[0],
1449                                         struct btrfs_extent_ref_v0);
1450                         btrfs_set_ref_count_v0(leaf, ref0, num_refs);
1451                 }
1452 #endif
1453                 btrfs_mark_buffer_dirty(leaf);
1454         }
1455         return ret;
1456 }
1457
1458 static noinline u32 extent_data_ref_count(struct btrfs_path *path,
1459                                           struct btrfs_extent_inline_ref *iref)
1460 {
1461         struct btrfs_key key;
1462         struct extent_buffer *leaf;
1463         struct btrfs_extent_data_ref *ref1;
1464         struct btrfs_shared_data_ref *ref2;
1465         u32 num_refs = 0;
1466         int type;
1467
1468         leaf = path->nodes[0];
1469         btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1470         if (iref) {
1471                 /*
1472                  * If type is invalid, we should have bailed out earlier than
1473                  * this call.
1474                  */
1475                 type = btrfs_get_extent_inline_ref_type(leaf, iref, BTRFS_REF_TYPE_DATA);
1476                 ASSERT(type != BTRFS_REF_TYPE_INVALID);
1477                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
1478                         ref1 = (struct btrfs_extent_data_ref *)(&iref->offset);
1479                         num_refs = btrfs_extent_data_ref_count(leaf, ref1);
1480                 } else {
1481                         ref2 = (struct btrfs_shared_data_ref *)(iref + 1);
1482                         num_refs = btrfs_shared_data_ref_count(leaf, ref2);
1483                 }
1484         } else if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
1485                 ref1 = btrfs_item_ptr(leaf, path->slots[0],
1486                                       struct btrfs_extent_data_ref);
1487                 num_refs = btrfs_extent_data_ref_count(leaf, ref1);
1488         } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
1489                 ref2 = btrfs_item_ptr(leaf, path->slots[0],
1490                                       struct btrfs_shared_data_ref);
1491                 num_refs = btrfs_shared_data_ref_count(leaf, ref2);
1492 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1493         } else if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
1494                 struct btrfs_extent_ref_v0 *ref0;
1495                 ref0 = btrfs_item_ptr(leaf, path->slots[0],
1496                                       struct btrfs_extent_ref_v0);
1497                 num_refs = btrfs_ref_count_v0(leaf, ref0);
1498 #endif
1499         } else {
1500                 WARN_ON(1);
1501         }
1502         return num_refs;
1503 }
1504
1505 static noinline int lookup_tree_block_ref(struct btrfs_trans_handle *trans,
1506                                           struct btrfs_fs_info *fs_info,
1507                                           struct btrfs_path *path,
1508                                           u64 bytenr, u64 parent,
1509                                           u64 root_objectid)
1510 {
1511         struct btrfs_root *root = fs_info->extent_root;
1512         struct btrfs_key key;
1513         int ret;
1514
1515         key.objectid = bytenr;
1516         if (parent) {
1517                 key.type = BTRFS_SHARED_BLOCK_REF_KEY;
1518                 key.offset = parent;
1519         } else {
1520                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
1521                 key.offset = root_objectid;
1522         }
1523
1524         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1525         if (ret > 0)
1526                 ret = -ENOENT;
1527 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1528         if (ret == -ENOENT && parent) {
1529                 btrfs_release_path(path);
1530                 key.type = BTRFS_EXTENT_REF_V0_KEY;
1531                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1532                 if (ret > 0)
1533                         ret = -ENOENT;
1534         }
1535 #endif
1536         return ret;
1537 }
1538
1539 static noinline int insert_tree_block_ref(struct btrfs_trans_handle *trans,
1540                                           struct btrfs_fs_info *fs_info,
1541                                           struct btrfs_path *path,
1542                                           u64 bytenr, u64 parent,
1543                                           u64 root_objectid)
1544 {
1545         struct btrfs_key key;
1546         int ret;
1547
1548         key.objectid = bytenr;
1549         if (parent) {
1550                 key.type = BTRFS_SHARED_BLOCK_REF_KEY;
1551                 key.offset = parent;
1552         } else {
1553                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
1554                 key.offset = root_objectid;
1555         }
1556
1557         ret = btrfs_insert_empty_item(trans, fs_info->extent_root,
1558                                       path, &key, 0);
1559         btrfs_release_path(path);
1560         return ret;
1561 }
1562
1563 static inline int extent_ref_type(u64 parent, u64 owner)
1564 {
1565         int type;
1566         if (owner < BTRFS_FIRST_FREE_OBJECTID) {
1567                 if (parent > 0)
1568                         type = BTRFS_SHARED_BLOCK_REF_KEY;
1569                 else
1570                         type = BTRFS_TREE_BLOCK_REF_KEY;
1571         } else {
1572                 if (parent > 0)
1573                         type = BTRFS_SHARED_DATA_REF_KEY;
1574                 else
1575                         type = BTRFS_EXTENT_DATA_REF_KEY;
1576         }
1577         return type;
1578 }
1579
1580 static int find_next_key(struct btrfs_path *path, int level,
1581                          struct btrfs_key *key)
1582
1583 {
1584         for (; level < BTRFS_MAX_LEVEL; level++) {
1585                 if (!path->nodes[level])
1586                         break;
1587                 if (path->slots[level] + 1 >=
1588                     btrfs_header_nritems(path->nodes[level]))
1589                         continue;
1590                 if (level == 0)
1591                         btrfs_item_key_to_cpu(path->nodes[level], key,
1592                                               path->slots[level] + 1);
1593                 else
1594                         btrfs_node_key_to_cpu(path->nodes[level], key,
1595                                               path->slots[level] + 1);
1596                 return 0;
1597         }
1598         return 1;
1599 }
1600
1601 /*
1602  * look for inline back ref. if back ref is found, *ref_ret is set
1603  * to the address of inline back ref, and 0 is returned.
1604  *
1605  * if back ref isn't found, *ref_ret is set to the address where it
1606  * should be inserted, and -ENOENT is returned.
1607  *
1608  * if insert is true and there are too many inline back refs, the path
1609  * points to the extent item, and -EAGAIN is returned.
1610  *
1611  * NOTE: inline back refs are ordered in the same way that back ref
1612  *       items in the tree are ordered.
1613  */
1614 static noinline_for_stack
1615 int lookup_inline_extent_backref(struct btrfs_trans_handle *trans,
1616                                  struct btrfs_fs_info *fs_info,
1617                                  struct btrfs_path *path,
1618                                  struct btrfs_extent_inline_ref **ref_ret,
1619                                  u64 bytenr, u64 num_bytes,
1620                                  u64 parent, u64 root_objectid,
1621                                  u64 owner, u64 offset, int insert)
1622 {
1623         struct btrfs_root *root = fs_info->extent_root;
1624         struct btrfs_key key;
1625         struct extent_buffer *leaf;
1626         struct btrfs_extent_item *ei;
1627         struct btrfs_extent_inline_ref *iref;
1628         u64 flags;
1629         u64 item_size;
1630         unsigned long ptr;
1631         unsigned long end;
1632         int extra_size;
1633         int type;
1634         int want;
1635         int ret;
1636         int err = 0;
1637         bool skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
1638         int needed;
1639
1640         key.objectid = bytenr;
1641         key.type = BTRFS_EXTENT_ITEM_KEY;
1642         key.offset = num_bytes;
1643
1644         want = extent_ref_type(parent, owner);
1645         if (insert) {
1646                 extra_size = btrfs_extent_inline_ref_size(want);
1647                 path->keep_locks = 1;
1648         } else
1649                 extra_size = -1;
1650
1651         /*
1652          * Owner is our parent level, so we can just add one to get the level
1653          * for the block we are interested in.
1654          */
1655         if (skinny_metadata && owner < BTRFS_FIRST_FREE_OBJECTID) {
1656                 key.type = BTRFS_METADATA_ITEM_KEY;
1657                 key.offset = owner;
1658         }
1659
1660 again:
1661         ret = btrfs_search_slot(trans, root, &key, path, extra_size, 1);
1662         if (ret < 0) {
1663                 err = ret;
1664                 goto out;
1665         }
1666
1667         /*
1668          * We may be a newly converted file system which still has the old fat
1669          * extent entries for metadata, so try and see if we have one of those.
1670          */
1671         if (ret > 0 && skinny_metadata) {
1672                 skinny_metadata = false;
1673                 if (path->slots[0]) {
1674                         path->slots[0]--;
1675                         btrfs_item_key_to_cpu(path->nodes[0], &key,
1676                                               path->slots[0]);
1677                         if (key.objectid == bytenr &&
1678                             key.type == BTRFS_EXTENT_ITEM_KEY &&
1679                             key.offset == num_bytes)
1680                                 ret = 0;
1681                 }
1682                 if (ret) {
1683                         key.objectid = bytenr;
1684                         key.type = BTRFS_EXTENT_ITEM_KEY;
1685                         key.offset = num_bytes;
1686                         btrfs_release_path(path);
1687                         goto again;
1688                 }
1689         }
1690
1691         if (ret && !insert) {
1692                 err = -ENOENT;
1693                 goto out;
1694         } else if (WARN_ON(ret)) {
1695                 err = -EIO;
1696                 goto out;
1697         }
1698
1699         leaf = path->nodes[0];
1700         item_size = btrfs_item_size_nr(leaf, path->slots[0]);
1701 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1702         if (item_size < sizeof(*ei)) {
1703                 if (!insert) {
1704                         err = -ENOENT;
1705                         goto out;
1706                 }
1707                 ret = convert_extent_item_v0(trans, fs_info, path, owner,
1708                                              extra_size);
1709                 if (ret < 0) {
1710                         err = ret;
1711                         goto out;
1712                 }
1713                 leaf = path->nodes[0];
1714                 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
1715         }
1716 #endif
1717         BUG_ON(item_size < sizeof(*ei));
1718
1719         ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1720         flags = btrfs_extent_flags(leaf, ei);
1721
1722         ptr = (unsigned long)(ei + 1);
1723         end = (unsigned long)ei + item_size;
1724
1725         if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK && !skinny_metadata) {
1726                 ptr += sizeof(struct btrfs_tree_block_info);
1727                 BUG_ON(ptr > end);
1728         }
1729
1730         if (owner >= BTRFS_FIRST_FREE_OBJECTID)
1731                 needed = BTRFS_REF_TYPE_DATA;
1732         else
1733                 needed = BTRFS_REF_TYPE_BLOCK;
1734
1735         err = -ENOENT;
1736         while (1) {
1737                 if (ptr >= end) {
1738                         WARN_ON(ptr > end);
1739                         break;
1740                 }
1741                 iref = (struct btrfs_extent_inline_ref *)ptr;
1742                 type = btrfs_get_extent_inline_ref_type(leaf, iref, needed);
1743                 if (type == BTRFS_REF_TYPE_INVALID) {
1744                         err = -EINVAL;
1745                         goto out;
1746                 }
1747
1748                 if (want < type)
1749                         break;
1750                 if (want > type) {
1751                         ptr += btrfs_extent_inline_ref_size(type);
1752                         continue;
1753                 }
1754
1755                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
1756                         struct btrfs_extent_data_ref *dref;
1757                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
1758                         if (match_extent_data_ref(leaf, dref, root_objectid,
1759                                                   owner, offset)) {
1760                                 err = 0;
1761                                 break;
1762                         }
1763                         if (hash_extent_data_ref_item(leaf, dref) <
1764                             hash_extent_data_ref(root_objectid, owner, offset))
1765                                 break;
1766                 } else {
1767                         u64 ref_offset;
1768                         ref_offset = btrfs_extent_inline_ref_offset(leaf, iref);
1769                         if (parent > 0) {
1770                                 if (parent == ref_offset) {
1771                                         err = 0;
1772                                         break;
1773                                 }
1774                                 if (ref_offset < parent)
1775                                         break;
1776                         } else {
1777                                 if (root_objectid == ref_offset) {
1778                                         err = 0;
1779                                         break;
1780                                 }
1781                                 if (ref_offset < root_objectid)
1782                                         break;
1783                         }
1784                 }
1785                 ptr += btrfs_extent_inline_ref_size(type);
1786         }
1787         if (err == -ENOENT && insert) {
1788                 if (item_size + extra_size >=
1789                     BTRFS_MAX_EXTENT_ITEM_SIZE(root)) {
1790                         err = -EAGAIN;
1791                         goto out;
1792                 }
1793                 /*
1794                  * To add new inline back ref, we have to make sure
1795                  * there is no corresponding back ref item.
1796                  * For simplicity, we just do not add new inline back
1797                  * ref if there is any kind of item for this block
1798                  */
1799                 if (find_next_key(path, 0, &key) == 0 &&
1800                     key.objectid == bytenr &&
1801                     key.type < BTRFS_BLOCK_GROUP_ITEM_KEY) {
1802                         err = -EAGAIN;
1803                         goto out;
1804                 }
1805         }
1806         *ref_ret = (struct btrfs_extent_inline_ref *)ptr;
1807 out:
1808         if (insert) {
1809                 path->keep_locks = 0;
1810                 btrfs_unlock_up_safe(path, 1);
1811         }
1812         return err;
1813 }
1814
1815 /*
1816  * helper to add new inline back ref
1817  */
1818 static noinline_for_stack
1819 void setup_inline_extent_backref(struct btrfs_fs_info *fs_info,
1820                                  struct btrfs_path *path,
1821                                  struct btrfs_extent_inline_ref *iref,
1822                                  u64 parent, u64 root_objectid,
1823                                  u64 owner, u64 offset, int refs_to_add,
1824                                  struct btrfs_delayed_extent_op *extent_op)
1825 {
1826         struct extent_buffer *leaf;
1827         struct btrfs_extent_item *ei;
1828         unsigned long ptr;
1829         unsigned long end;
1830         unsigned long item_offset;
1831         u64 refs;
1832         int size;
1833         int type;
1834
1835         leaf = path->nodes[0];
1836         ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1837         item_offset = (unsigned long)iref - (unsigned long)ei;
1838
1839         type = extent_ref_type(parent, owner);
1840         size = btrfs_extent_inline_ref_size(type);
1841
1842         btrfs_extend_item(fs_info, path, size);
1843
1844         ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1845         refs = btrfs_extent_refs(leaf, ei);
1846         refs += refs_to_add;
1847         btrfs_set_extent_refs(leaf, ei, refs);
1848         if (extent_op)
1849                 __run_delayed_extent_op(extent_op, leaf, ei);
1850
1851         ptr = (unsigned long)ei + item_offset;
1852         end = (unsigned long)ei + btrfs_item_size_nr(leaf, path->slots[0]);
1853         if (ptr < end - size)
1854                 memmove_extent_buffer(leaf, ptr + size, ptr,
1855                                       end - size - ptr);
1856
1857         iref = (struct btrfs_extent_inline_ref *)ptr;
1858         btrfs_set_extent_inline_ref_type(leaf, iref, type);
1859         if (type == BTRFS_EXTENT_DATA_REF_KEY) {
1860                 struct btrfs_extent_data_ref *dref;
1861                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
1862                 btrfs_set_extent_data_ref_root(leaf, dref, root_objectid);
1863                 btrfs_set_extent_data_ref_objectid(leaf, dref, owner);
1864                 btrfs_set_extent_data_ref_offset(leaf, dref, offset);
1865                 btrfs_set_extent_data_ref_count(leaf, dref, refs_to_add);
1866         } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
1867                 struct btrfs_shared_data_ref *sref;
1868                 sref = (struct btrfs_shared_data_ref *)(iref + 1);
1869                 btrfs_set_shared_data_ref_count(leaf, sref, refs_to_add);
1870                 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
1871         } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
1872                 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
1873         } else {
1874                 btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid);
1875         }
1876         btrfs_mark_buffer_dirty(leaf);
1877 }
1878
1879 static int lookup_extent_backref(struct btrfs_trans_handle *trans,
1880                                  struct btrfs_fs_info *fs_info,
1881                                  struct btrfs_path *path,
1882                                  struct btrfs_extent_inline_ref **ref_ret,
1883                                  u64 bytenr, u64 num_bytes, u64 parent,
1884                                  u64 root_objectid, u64 owner, u64 offset)
1885 {
1886         int ret;
1887
1888         ret = lookup_inline_extent_backref(trans, fs_info, path, ref_ret,
1889                                            bytenr, num_bytes, parent,
1890                                            root_objectid, owner, offset, 0);
1891         if (ret != -ENOENT)
1892                 return ret;
1893
1894         btrfs_release_path(path);
1895         *ref_ret = NULL;
1896
1897         if (owner < BTRFS_FIRST_FREE_OBJECTID) {
1898                 ret = lookup_tree_block_ref(trans, fs_info, path, bytenr,
1899                                             parent, root_objectid);
1900         } else {
1901                 ret = lookup_extent_data_ref(trans, fs_info, path, bytenr,
1902                                              parent, root_objectid, owner,
1903                                              offset);
1904         }
1905         return ret;
1906 }
1907
1908 /*
1909  * helper to update/remove inline back ref
1910  */
1911 static noinline_for_stack
1912 void update_inline_extent_backref(struct btrfs_fs_info *fs_info,
1913                                   struct btrfs_path *path,
1914                                   struct btrfs_extent_inline_ref *iref,
1915                                   int refs_to_mod,
1916                                   struct btrfs_delayed_extent_op *extent_op,
1917                                   int *last_ref)
1918 {
1919         struct extent_buffer *leaf;
1920         struct btrfs_extent_item *ei;
1921         struct btrfs_extent_data_ref *dref = NULL;
1922         struct btrfs_shared_data_ref *sref = NULL;
1923         unsigned long ptr;
1924         unsigned long end;
1925         u32 item_size;
1926         int size;
1927         int type;
1928         u64 refs;
1929
1930         leaf = path->nodes[0];
1931         ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1932         refs = btrfs_extent_refs(leaf, ei);
1933         WARN_ON(refs_to_mod < 0 && refs + refs_to_mod <= 0);
1934         refs += refs_to_mod;
1935         btrfs_set_extent_refs(leaf, ei, refs);
1936         if (extent_op)
1937                 __run_delayed_extent_op(extent_op, leaf, ei);
1938
1939         /*
1940          * If type is invalid, we should have bailed out after
1941          * lookup_inline_extent_backref().
1942          */
1943         type = btrfs_get_extent_inline_ref_type(leaf, iref, BTRFS_REF_TYPE_ANY);
1944         ASSERT(type != BTRFS_REF_TYPE_INVALID);
1945
1946         if (type == BTRFS_EXTENT_DATA_REF_KEY) {
1947                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
1948                 refs = btrfs_extent_data_ref_count(leaf, dref);
1949         } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
1950                 sref = (struct btrfs_shared_data_ref *)(iref + 1);
1951                 refs = btrfs_shared_data_ref_count(leaf, sref);
1952         } else {
1953                 refs = 1;
1954                 BUG_ON(refs_to_mod != -1);
1955         }
1956
1957         BUG_ON(refs_to_mod < 0 && refs < -refs_to_mod);
1958         refs += refs_to_mod;
1959
1960         if (refs > 0) {
1961                 if (type == BTRFS_EXTENT_DATA_REF_KEY)
1962                         btrfs_set_extent_data_ref_count(leaf, dref, refs);
1963                 else
1964                         btrfs_set_shared_data_ref_count(leaf, sref, refs);
1965         } else {
1966                 *last_ref = 1;
1967                 size =  btrfs_extent_inline_ref_size(type);
1968                 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
1969                 ptr = (unsigned long)iref;
1970                 end = (unsigned long)ei + item_size;
1971                 if (ptr + size < end)
1972                         memmove_extent_buffer(leaf, ptr, ptr + size,
1973                                               end - ptr - size);
1974                 item_size -= size;
1975                 btrfs_truncate_item(fs_info, path, item_size, 1);
1976         }
1977         btrfs_mark_buffer_dirty(leaf);
1978 }
1979
1980 static noinline_for_stack
1981 int insert_inline_extent_backref(struct btrfs_trans_handle *trans,
1982                                  struct btrfs_fs_info *fs_info,
1983                                  struct btrfs_path *path,
1984                                  u64 bytenr, u64 num_bytes, u64 parent,
1985                                  u64 root_objectid, u64 owner,
1986                                  u64 offset, int refs_to_add,
1987                                  struct btrfs_delayed_extent_op *extent_op)
1988 {
1989         struct btrfs_extent_inline_ref *iref;
1990         int ret;
1991
1992         ret = lookup_inline_extent_backref(trans, fs_info, path, &iref,
1993                                            bytenr, num_bytes, parent,
1994                                            root_objectid, owner, offset, 1);
1995         if (ret == 0) {
1996                 BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID);
1997                 update_inline_extent_backref(fs_info, path, iref,
1998                                              refs_to_add, extent_op, NULL);
1999         } else if (ret == -ENOENT) {
2000                 setup_inline_extent_backref(fs_info, path, iref, parent,
2001                                             root_objectid, owner, offset,
2002                                             refs_to_add, extent_op);
2003                 ret = 0;
2004         }
2005         return ret;
2006 }
2007
2008 static int insert_extent_backref(struct btrfs_trans_handle *trans,
2009                                  struct btrfs_fs_info *fs_info,
2010                                  struct btrfs_path *path,
2011                                  u64 bytenr, u64 parent, u64 root_objectid,
2012                                  u64 owner, u64 offset, int refs_to_add)
2013 {
2014         int ret;
2015         if (owner < BTRFS_FIRST_FREE_OBJECTID) {
2016                 BUG_ON(refs_to_add != 1);
2017                 ret = insert_tree_block_ref(trans, fs_info, path, bytenr,
2018                                             parent, root_objectid);
2019         } else {
2020                 ret = insert_extent_data_ref(trans, fs_info, path, bytenr,
2021                                              parent, root_objectid,
2022                                              owner, offset, refs_to_add);
2023         }
2024         return ret;
2025 }
2026
2027 static int remove_extent_backref(struct btrfs_trans_handle *trans,
2028                                  struct btrfs_fs_info *fs_info,
2029                                  struct btrfs_path *path,
2030                                  struct btrfs_extent_inline_ref *iref,
2031                                  int refs_to_drop, int is_data, int *last_ref)
2032 {
2033         int ret = 0;
2034
2035         BUG_ON(!is_data && refs_to_drop != 1);
2036         if (iref) {
2037                 update_inline_extent_backref(fs_info, path, iref,
2038                                              -refs_to_drop, NULL, last_ref);
2039         } else if (is_data) {
2040                 ret = remove_extent_data_ref(trans, fs_info, path, refs_to_drop,
2041                                              last_ref);
2042         } else {
2043                 *last_ref = 1;
2044                 ret = btrfs_del_item(trans, fs_info->extent_root, path);
2045         }
2046         return ret;
2047 }
2048
2049 #define in_range(b, first, len)        ((b) >= (first) && (b) < (first) + (len))
2050 static int btrfs_issue_discard(struct block_device *bdev, u64 start, u64 len,
2051                                u64 *discarded_bytes)
2052 {
2053         int j, ret = 0;
2054         u64 bytes_left, end;
2055         u64 aligned_start = ALIGN(start, 1 << 9);
2056
2057         if (WARN_ON(start != aligned_start)) {
2058                 len -= aligned_start - start;
2059                 len = round_down(len, 1 << 9);
2060                 start = aligned_start;
2061         }
2062
2063         *discarded_bytes = 0;
2064
2065         if (!len)
2066                 return 0;
2067
2068         end = start + len;
2069         bytes_left = len;
2070
2071         /* Skip any superblocks on this device. */
2072         for (j = 0; j < BTRFS_SUPER_MIRROR_MAX; j++) {
2073                 u64 sb_start = btrfs_sb_offset(j);
2074                 u64 sb_end = sb_start + BTRFS_SUPER_INFO_SIZE;
2075                 u64 size = sb_start - start;
2076
2077                 if (!in_range(sb_start, start, bytes_left) &&
2078                     !in_range(sb_end, start, bytes_left) &&
2079                     !in_range(start, sb_start, BTRFS_SUPER_INFO_SIZE))
2080                         continue;
2081
2082                 /*
2083                  * Superblock spans beginning of range.  Adjust start and
2084                  * try again.
2085                  */
2086                 if (sb_start <= start) {
2087                         start += sb_end - start;
2088                         if (start > end) {
2089                                 bytes_left = 0;
2090                                 break;
2091                         }
2092                         bytes_left = end - start;
2093                         continue;
2094                 }
2095
2096                 if (size) {
2097                         ret = blkdev_issue_discard(bdev, start >> 9, size >> 9,
2098                                                    GFP_NOFS, 0);
2099                         if (!ret)
2100                                 *discarded_bytes += size;
2101                         else if (ret != -EOPNOTSUPP)
2102                                 return ret;
2103                 }
2104
2105                 start = sb_end;
2106                 if (start > end) {
2107                         bytes_left = 0;
2108                         break;
2109                 }
2110                 bytes_left = end - start;
2111         }
2112
2113         if (bytes_left) {
2114                 ret = blkdev_issue_discard(bdev, start >> 9, bytes_left >> 9,
2115                                            GFP_NOFS, 0);
2116                 if (!ret)
2117                         *discarded_bytes += bytes_left;
2118         }
2119         return ret;
2120 }
2121
2122 int btrfs_discard_extent(struct btrfs_fs_info *fs_info, u64 bytenr,
2123                          u64 num_bytes, u64 *actual_bytes)
2124 {
2125         int ret;
2126         u64 discarded_bytes = 0;
2127         struct btrfs_bio *bbio = NULL;
2128
2129
2130         /*
2131          * Avoid races with device replace and make sure our bbio has devices
2132          * associated to its stripes that don't go away while we are discarding.
2133          */
2134         btrfs_bio_counter_inc_blocked(fs_info);
2135         /* Tell the block device(s) that the sectors can be discarded */
2136         ret = btrfs_map_block(fs_info, BTRFS_MAP_DISCARD, bytenr, &num_bytes,
2137                               &bbio, 0);
2138         /* Error condition is -ENOMEM */
2139         if (!ret) {
2140                 struct btrfs_bio_stripe *stripe = bbio->stripes;
2141                 int i;
2142
2143
2144                 for (i = 0; i < bbio->num_stripes; i++, stripe++) {
2145                         u64 bytes;
2146                         struct request_queue *req_q;
2147
2148                         if (!stripe->dev->bdev) {
2149                                 ASSERT(btrfs_test_opt(fs_info, DEGRADED));
2150                                 continue;
2151                         }
2152                         req_q = bdev_get_queue(stripe->dev->bdev);
2153                         if (!blk_queue_discard(req_q))
2154                                 continue;
2155
2156                         ret = btrfs_issue_discard(stripe->dev->bdev,
2157                                                   stripe->physical,
2158                                                   stripe->length,
2159                                                   &bytes);
2160                         if (!ret)
2161                                 discarded_bytes += bytes;
2162                         else if (ret != -EOPNOTSUPP)
2163                                 break; /* Logic errors or -ENOMEM, or -EIO but I don't know how that could happen JDM */
2164
2165                         /*
2166                          * Just in case we get back EOPNOTSUPP for some reason,
2167                          * just ignore the return value so we don't screw up
2168                          * people calling discard_extent.
2169                          */
2170                         ret = 0;
2171                 }
2172                 btrfs_put_bbio(bbio);
2173         }
2174         btrfs_bio_counter_dec(fs_info);
2175
2176         if (actual_bytes)
2177                 *actual_bytes = discarded_bytes;
2178
2179
2180         if (ret == -EOPNOTSUPP)
2181                 ret = 0;
2182         return ret;
2183 }
2184
2185 /* Can return -ENOMEM */
2186 int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
2187                          struct btrfs_root *root,
2188                          u64 bytenr, u64 num_bytes, u64 parent,
2189                          u64 root_objectid, u64 owner, u64 offset)
2190 {
2191         struct btrfs_fs_info *fs_info = root->fs_info;
2192         int old_ref_mod, new_ref_mod;
2193         int ret;
2194
2195         BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID &&
2196                root_objectid == BTRFS_TREE_LOG_OBJECTID);
2197
2198         btrfs_ref_tree_mod(root, bytenr, num_bytes, parent, root_objectid,
2199                            owner, offset, BTRFS_ADD_DELAYED_REF);
2200
2201         if (owner < BTRFS_FIRST_FREE_OBJECTID) {
2202                 ret = btrfs_add_delayed_tree_ref(fs_info, trans, bytenr,
2203                                                  num_bytes, parent,
2204                                                  root_objectid, (int)owner,
2205                                                  BTRFS_ADD_DELAYED_REF, NULL,
2206                                                  &old_ref_mod, &new_ref_mod);
2207         } else {
2208                 ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr,
2209                                                  num_bytes, parent,
2210                                                  root_objectid, owner, offset,
2211                                                  0, BTRFS_ADD_DELAYED_REF,
2212                                                  &old_ref_mod, &new_ref_mod);
2213         }
2214
2215         if (ret == 0 && old_ref_mod < 0 && new_ref_mod >= 0)
2216                 add_pinned_bytes(fs_info, -num_bytes, owner, root_objectid);
2217
2218         return ret;
2219 }
2220
2221 static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
2222                                   struct btrfs_fs_info *fs_info,
2223                                   struct btrfs_delayed_ref_node *node,
2224                                   u64 parent, u64 root_objectid,
2225                                   u64 owner, u64 offset, int refs_to_add,
2226                                   struct btrfs_delayed_extent_op *extent_op)
2227 {
2228         struct btrfs_path *path;
2229         struct extent_buffer *leaf;
2230         struct btrfs_extent_item *item;
2231         struct btrfs_key key;
2232         u64 bytenr = node->bytenr;
2233         u64 num_bytes = node->num_bytes;
2234         u64 refs;
2235         int ret;
2236
2237         path = btrfs_alloc_path();
2238         if (!path)
2239                 return -ENOMEM;
2240
2241         path->reada = READA_FORWARD;
2242         path->leave_spinning = 1;
2243         /* this will setup the path even if it fails to insert the back ref */
2244         ret = insert_inline_extent_backref(trans, fs_info, path, bytenr,
2245                                            num_bytes, parent, root_objectid,
2246                                            owner, offset,
2247                                            refs_to_add, extent_op);
2248         if ((ret < 0 && ret != -EAGAIN) || !ret)
2249                 goto out;
2250
2251         /*
2252          * Ok we had -EAGAIN which means we didn't have space to insert and
2253          * inline extent ref, so just update the reference count and add a
2254          * normal backref.
2255          */
2256         leaf = path->nodes[0];
2257         btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
2258         item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
2259         refs = btrfs_extent_refs(leaf, item);
2260         btrfs_set_extent_refs(leaf, item, refs + refs_to_add);
2261         if (extent_op)
2262                 __run_delayed_extent_op(extent_op, leaf, item);
2263
2264         btrfs_mark_buffer_dirty(leaf);
2265         btrfs_release_path(path);
2266
2267         path->reada = READA_FORWARD;
2268         path->leave_spinning = 1;
2269         /* now insert the actual backref */
2270         ret = insert_extent_backref(trans, fs_info, path, bytenr, parent,
2271                                     root_objectid, owner, offset, refs_to_add);
2272         if (ret)
2273                 btrfs_abort_transaction(trans, ret);
2274 out:
2275         btrfs_free_path(path);
2276         return ret;
2277 }
2278
2279 static int run_delayed_data_ref(struct btrfs_trans_handle *trans,
2280                                 struct btrfs_fs_info *fs_info,
2281                                 struct btrfs_delayed_ref_node *node,
2282                                 struct btrfs_delayed_extent_op *extent_op,
2283                                 int insert_reserved)
2284 {
2285         int ret = 0;
2286         struct btrfs_delayed_data_ref *ref;
2287         struct btrfs_key ins;
2288         u64 parent = 0;
2289         u64 ref_root = 0;
2290         u64 flags = 0;
2291
2292         ins.objectid = node->bytenr;
2293         ins.offset = node->num_bytes;
2294         ins.type = BTRFS_EXTENT_ITEM_KEY;
2295
2296         ref = btrfs_delayed_node_to_data_ref(node);
2297         trace_run_delayed_data_ref(fs_info, node, ref, node->action);
2298
2299         if (node->type == BTRFS_SHARED_DATA_REF_KEY)
2300                 parent = ref->parent;
2301         ref_root = ref->root;
2302
2303         if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) {
2304                 if (extent_op)
2305                         flags |= extent_op->flags_to_set;
2306                 ret = alloc_reserved_file_extent(trans, fs_info,
2307                                                  parent, ref_root, flags,
2308                                                  ref->objectid, ref->offset,
2309                                                  &ins, node->ref_mod);
2310         } else if (node->action == BTRFS_ADD_DELAYED_REF) {
2311                 ret = __btrfs_inc_extent_ref(trans, fs_info, node, parent,
2312                                              ref_root, ref->objectid,
2313                                              ref->offset, node->ref_mod,
2314                                              extent_op);
2315         } else if (node->action == BTRFS_DROP_DELAYED_REF) {
2316                 ret = __btrfs_free_extent(trans, fs_info, node, parent,
2317                                           ref_root, ref->objectid,
2318                                           ref->offset, node->ref_mod,
2319                                           extent_op);
2320         } else {
2321                 BUG();
2322         }
2323         return ret;
2324 }
2325
2326 static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op,
2327                                     struct extent_buffer *leaf,
2328                                     struct btrfs_extent_item *ei)
2329 {
2330         u64 flags = btrfs_extent_flags(leaf, ei);
2331         if (extent_op->update_flags) {
2332                 flags |= extent_op->flags_to_set;
2333                 btrfs_set_extent_flags(leaf, ei, flags);
2334         }
2335
2336         if (extent_op->update_key) {
2337                 struct btrfs_tree_block_info *bi;
2338                 BUG_ON(!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK));
2339                 bi = (struct btrfs_tree_block_info *)(ei + 1);
2340                 btrfs_set_tree_block_key(leaf, bi, &extent_op->key);
2341         }
2342 }
2343
2344 static int run_delayed_extent_op(struct btrfs_trans_handle *trans,
2345                                  struct btrfs_fs_info *fs_info,
2346                                  struct btrfs_delayed_ref_head *head,
2347                                  struct btrfs_delayed_extent_op *extent_op)
2348 {
2349         struct btrfs_key key;
2350         struct btrfs_path *path;
2351         struct btrfs_extent_item *ei;
2352         struct extent_buffer *leaf;
2353         u32 item_size;
2354         int ret;
2355         int err = 0;
2356         int metadata = !extent_op->is_data;
2357
2358         if (trans->aborted)
2359                 return 0;
2360
2361         if (metadata && !btrfs_fs_incompat(fs_info, SKINNY_METADATA))
2362                 metadata = 0;
2363
2364         path = btrfs_alloc_path();
2365         if (!path)
2366                 return -ENOMEM;
2367
2368         key.objectid = head->bytenr;
2369
2370         if (metadata) {
2371                 key.type = BTRFS_METADATA_ITEM_KEY;
2372                 key.offset = extent_op->level;
2373         } else {
2374                 key.type = BTRFS_EXTENT_ITEM_KEY;
2375                 key.offset = head->num_bytes;
2376         }
2377
2378 again:
2379         path->reada = READA_FORWARD;
2380         path->leave_spinning = 1;
2381         ret = btrfs_search_slot(trans, fs_info->extent_root, &key, path, 0, 1);
2382         if (ret < 0) {
2383                 err = ret;
2384                 goto out;
2385         }
2386         if (ret > 0) {
2387                 if (metadata) {
2388                         if (path->slots[0] > 0) {
2389                                 path->slots[0]--;
2390                                 btrfs_item_key_to_cpu(path->nodes[0], &key,
2391                                                       path->slots[0]);
2392                                 if (key.objectid == head->bytenr &&
2393                                     key.type == BTRFS_EXTENT_ITEM_KEY &&
2394                                     key.offset == head->num_bytes)
2395                                         ret = 0;
2396                         }
2397                         if (ret > 0) {
2398                                 btrfs_release_path(path);
2399                                 metadata = 0;
2400
2401                                 key.objectid = head->bytenr;
2402                                 key.offset = head->num_bytes;
2403                                 key.type = BTRFS_EXTENT_ITEM_KEY;
2404                                 goto again;
2405                         }
2406                 } else {
2407                         err = -EIO;
2408                         goto out;
2409                 }
2410         }
2411
2412         leaf = path->nodes[0];
2413         item_size = btrfs_item_size_nr(leaf, path->slots[0]);
2414 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
2415         if (item_size < sizeof(*ei)) {
2416                 ret = convert_extent_item_v0(trans, fs_info, path, (u64)-1, 0);
2417                 if (ret < 0) {
2418                         err = ret;
2419                         goto out;
2420                 }
2421                 leaf = path->nodes[0];
2422                 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
2423         }
2424 #endif
2425         BUG_ON(item_size < sizeof(*ei));
2426         ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
2427         __run_delayed_extent_op(extent_op, leaf, ei);
2428
2429         btrfs_mark_buffer_dirty(leaf);
2430 out:
2431         btrfs_free_path(path);
2432         return err;
2433 }
2434
2435 static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
2436                                 struct btrfs_fs_info *fs_info,
2437                                 struct btrfs_delayed_ref_node *node,
2438                                 struct btrfs_delayed_extent_op *extent_op,
2439                                 int insert_reserved)
2440 {
2441         int ret = 0;
2442         struct btrfs_delayed_tree_ref *ref;
2443         struct btrfs_key ins;
2444         u64 parent = 0;
2445         u64 ref_root = 0;
2446         bool skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
2447
2448         ref = btrfs_delayed_node_to_tree_ref(node);
2449         trace_run_delayed_tree_ref(fs_info, node, ref, node->action);
2450
2451         if (node->type == BTRFS_SHARED_BLOCK_REF_KEY)
2452                 parent = ref->parent;
2453         ref_root = ref->root;
2454
2455         ins.objectid = node->bytenr;
2456         if (skinny_metadata) {
2457                 ins.offset = ref->level;
2458                 ins.type = BTRFS_METADATA_ITEM_KEY;
2459         } else {
2460                 ins.offset = node->num_bytes;
2461                 ins.type = BTRFS_EXTENT_ITEM_KEY;
2462         }
2463
2464         if (node->ref_mod != 1) {
2465                 btrfs_err(fs_info,
2466         "btree block(%llu) has %d references rather than 1: action %d ref_root %llu parent %llu",
2467                           node->bytenr, node->ref_mod, node->action, ref_root,
2468                           parent);
2469                 return -EIO;
2470         }
2471         if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) {
2472                 BUG_ON(!extent_op || !extent_op->update_flags);
2473                 ret = alloc_reserved_tree_block(trans, fs_info,
2474                                                 parent, ref_root,
2475                                                 extent_op->flags_to_set,
2476                                                 &extent_op->key,
2477                                                 ref->level, &ins);
2478         } else if (node->action == BTRFS_ADD_DELAYED_REF) {
2479                 ret = __btrfs_inc_extent_ref(trans, fs_info, node,
2480                                              parent, ref_root,
2481                                              ref->level, 0, 1,
2482                                              extent_op);
2483         } else if (node->action == BTRFS_DROP_DELAYED_REF) {
2484                 ret = __btrfs_free_extent(trans, fs_info, node,
2485                                           parent, ref_root,
2486                                           ref->level, 0, 1, extent_op);
2487         } else {
2488                 BUG();
2489         }
2490         return ret;
2491 }
2492
2493 /* helper function to actually process a single delayed ref entry */
2494 static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
2495                                struct btrfs_fs_info *fs_info,
2496                                struct btrfs_delayed_ref_node *node,
2497                                struct btrfs_delayed_extent_op *extent_op,
2498                                int insert_reserved)
2499 {
2500         int ret = 0;
2501
2502         if (trans->aborted) {
2503                 if (insert_reserved)
2504                         btrfs_pin_extent(fs_info, node->bytenr,
2505                                          node->num_bytes, 1);
2506                 return 0;
2507         }
2508
2509         if (node->type == BTRFS_TREE_BLOCK_REF_KEY ||
2510             node->type == BTRFS_SHARED_BLOCK_REF_KEY)
2511                 ret = run_delayed_tree_ref(trans, fs_info, node, extent_op,
2512                                            insert_reserved);
2513         else if (node->type == BTRFS_EXTENT_DATA_REF_KEY ||
2514                  node->type == BTRFS_SHARED_DATA_REF_KEY)
2515                 ret = run_delayed_data_ref(trans, fs_info, node, extent_op,
2516                                            insert_reserved);
2517         else
2518                 BUG();
2519         return ret;
2520 }
2521
2522 static inline struct btrfs_delayed_ref_node *
2523 select_delayed_ref(struct btrfs_delayed_ref_head *head)
2524 {
2525         struct btrfs_delayed_ref_node *ref;
2526
2527         if (RB_EMPTY_ROOT(&head->ref_tree))
2528                 return NULL;
2529
2530         /*
2531          * Select a delayed ref of type BTRFS_ADD_DELAYED_REF first.
2532          * This is to prevent a ref count from going down to zero, which deletes
2533          * the extent item from the extent tree, when there still are references
2534          * to add, which would fail because they would not find the extent item.
2535          */
2536         if (!list_empty(&head->ref_add_list))
2537                 return list_first_entry(&head->ref_add_list,
2538                                 struct btrfs_delayed_ref_node, add_list);
2539
2540         ref = rb_entry(rb_first(&head->ref_tree),
2541                        struct btrfs_delayed_ref_node, ref_node);
2542         ASSERT(list_empty(&ref->add_list));
2543         return ref;
2544 }
2545
2546 static void unselect_delayed_ref_head(struct btrfs_delayed_ref_root *delayed_refs,
2547                                       struct btrfs_delayed_ref_head *head)
2548 {
2549         spin_lock(&delayed_refs->lock);
2550         head->processing = 0;
2551         delayed_refs->num_heads_ready++;
2552         spin_unlock(&delayed_refs->lock);
2553         btrfs_delayed_ref_unlock(head);
2554 }
2555
2556 static int cleanup_extent_op(struct btrfs_trans_handle *trans,
2557                              struct btrfs_fs_info *fs_info,
2558                              struct btrfs_delayed_ref_head *head)
2559 {
2560         struct btrfs_delayed_extent_op *extent_op = head->extent_op;
2561         int ret;
2562
2563         if (!extent_op)
2564                 return 0;
2565         head->extent_op = NULL;
2566         if (head->must_insert_reserved) {
2567                 btrfs_free_delayed_extent_op(extent_op);
2568                 return 0;
2569         }
2570         spin_unlock(&head->lock);
2571         ret = run_delayed_extent_op(trans, fs_info, head, extent_op);
2572         btrfs_free_delayed_extent_op(extent_op);
2573         return ret ? ret : 1;
2574 }
2575
2576 static int cleanup_ref_head(struct btrfs_trans_handle *trans,
2577                             struct btrfs_fs_info *fs_info,
2578                             struct btrfs_delayed_ref_head *head)
2579 {
2580         struct btrfs_delayed_ref_root *delayed_refs;
2581         int ret;
2582
2583         delayed_refs = &trans->transaction->delayed_refs;
2584
2585         ret = cleanup_extent_op(trans, fs_info, head);
2586         if (ret < 0) {
2587                 unselect_delayed_ref_head(delayed_refs, head);
2588                 btrfs_debug(fs_info, "run_delayed_extent_op returned %d", ret);
2589                 return ret;
2590         } else if (ret) {
2591                 return ret;
2592         }
2593
2594         /*
2595          * Need to drop our head ref lock and re-acquire the delayed ref lock
2596          * and then re-check to make sure nobody got added.
2597          */
2598         spin_unlock(&head->lock);
2599         spin_lock(&delayed_refs->lock);
2600         spin_lock(&head->lock);
2601         if (!RB_EMPTY_ROOT(&head->ref_tree) || head->extent_op) {
2602                 spin_unlock(&head->lock);
2603                 spin_unlock(&delayed_refs->lock);
2604                 return 1;
2605         }
2606         delayed_refs->num_heads--;
2607         rb_erase(&head->href_node, &delayed_refs->href_root);
2608         RB_CLEAR_NODE(&head->href_node);
2609         spin_unlock(&delayed_refs->lock);
2610         spin_unlock(&head->lock);
2611         atomic_dec(&delayed_refs->num_entries);
2612
2613         trace_run_delayed_ref_head(fs_info, head, 0);
2614
2615         if (head->total_ref_mod < 0) {
2616                 struct btrfs_block_group_cache *cache;
2617
2618                 cache = btrfs_lookup_block_group(fs_info, head->bytenr);
2619                 ASSERT(cache);
2620                 percpu_counter_add(&cache->space_info->total_bytes_pinned,
2621                                    -head->num_bytes);
2622                 btrfs_put_block_group(cache);
2623
2624                 if (head->is_data) {
2625                         spin_lock(&delayed_refs->lock);
2626                         delayed_refs->pending_csums -= head->num_bytes;
2627                         spin_unlock(&delayed_refs->lock);
2628                 }
2629         }
2630
2631         if (head->must_insert_reserved) {
2632                 btrfs_pin_extent(fs_info, head->bytenr,
2633                                  head->num_bytes, 1);
2634                 if (head->is_data) {
2635                         ret = btrfs_del_csums(trans, fs_info, head->bytenr,
2636                                               head->num_bytes);
2637                 }
2638         }
2639
2640         /* Also free its reserved qgroup space */
2641         btrfs_qgroup_free_delayed_ref(fs_info, head->qgroup_ref_root,
2642                                       head->qgroup_reserved);
2643         btrfs_delayed_ref_unlock(head);
2644         btrfs_put_delayed_ref_head(head);
2645         return 0;
2646 }
2647
2648 /*
2649  * Returns 0 on success or if called with an already aborted transaction.
2650  * Returns -ENOMEM or -EIO on failure and will abort the transaction.
2651  */
2652 static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
2653                                              unsigned long nr)
2654 {
2655         struct btrfs_fs_info *fs_info = trans->fs_info;
2656         struct btrfs_delayed_ref_root *delayed_refs;
2657         struct btrfs_delayed_ref_node *ref;
2658         struct btrfs_delayed_ref_head *locked_ref = NULL;
2659         struct btrfs_delayed_extent_op *extent_op;
2660         ktime_t start = ktime_get();
2661         int ret;
2662         unsigned long count = 0;
2663         unsigned long actual_count = 0;
2664         int must_insert_reserved = 0;
2665
2666         delayed_refs = &trans->transaction->delayed_refs;
2667         while (1) {
2668                 if (!locked_ref) {
2669                         if (count >= nr)
2670                                 break;
2671
2672                         spin_lock(&delayed_refs->lock);
2673                         locked_ref = btrfs_select_ref_head(trans);
2674                         if (!locked_ref) {
2675                                 spin_unlock(&delayed_refs->lock);
2676                                 break;
2677                         }
2678
2679                         /* grab the lock that says we are going to process
2680                          * all the refs for this head */
2681                         ret = btrfs_delayed_ref_lock(trans, locked_ref);
2682                         spin_unlock(&delayed_refs->lock);
2683                         /*
2684                          * we may have dropped the spin lock to get the head
2685                          * mutex lock, and that might have given someone else
2686                          * time to free the head.  If that's true, it has been
2687                          * removed from our list and we can move on.
2688                          */
2689                         if (ret == -EAGAIN) {
2690                                 locked_ref = NULL;
2691                                 count++;
2692                                 continue;
2693                         }
2694                 }
2695
2696                 /*
2697                  * We need to try and merge add/drops of the same ref since we
2698                  * can run into issues with relocate dropping the implicit ref
2699                  * and then it being added back again before the drop can
2700                  * finish.  If we merged anything we need to re-loop so we can
2701                  * get a good ref.
2702                  * Or we can get node references of the same type that weren't
2703                  * merged when created due to bumps in the tree mod seq, and
2704                  * we need to merge them to prevent adding an inline extent
2705                  * backref before dropping it (triggering a BUG_ON at
2706                  * insert_inline_extent_backref()).
2707                  */
2708                 spin_lock(&locked_ref->lock);
2709                 btrfs_merge_delayed_refs(trans, fs_info, delayed_refs,
2710                                          locked_ref);
2711
2712                 /*
2713                  * locked_ref is the head node, so we have to go one
2714                  * node back for any delayed ref updates
2715                  */
2716                 ref = select_delayed_ref(locked_ref);
2717
2718                 if (ref && ref->seq &&
2719                     btrfs_check_delayed_seq(fs_info, delayed_refs, ref->seq)) {
2720                         spin_unlock(&locked_ref->lock);
2721                         unselect_delayed_ref_head(delayed_refs, locked_ref);
2722                         locked_ref = NULL;
2723                         cond_resched();
2724                         count++;
2725                         continue;
2726                 }
2727
2728                 /*
2729                  * We're done processing refs in this ref_head, clean everything
2730                  * up and move on to the next ref_head.
2731                  */
2732                 if (!ref) {
2733                         ret = cleanup_ref_head(trans, fs_info, locked_ref);
2734                         if (ret > 0 ) {
2735                                 /* We dropped our lock, we need to loop. */
2736                                 ret = 0;
2737                                 continue;
2738                         } else if (ret) {
2739                                 return ret;
2740                         }
2741                         locked_ref = NULL;
2742                         count++;
2743                         continue;
2744                 }
2745
2746                 actual_count++;
2747                 ref->in_tree = 0;
2748                 rb_erase(&ref->ref_node, &locked_ref->ref_tree);
2749                 RB_CLEAR_NODE(&ref->ref_node);
2750                 if (!list_empty(&ref->add_list))
2751                         list_del(&ref->add_list);
2752                 /*
2753                  * When we play the delayed ref, also correct the ref_mod on
2754                  * head
2755                  */
2756                 switch (ref->action) {
2757                 case BTRFS_ADD_DELAYED_REF:
2758                 case BTRFS_ADD_DELAYED_EXTENT:
2759                         locked_ref->ref_mod -= ref->ref_mod;
2760                         break;
2761                 case BTRFS_DROP_DELAYED_REF:
2762                         locked_ref->ref_mod += ref->ref_mod;
2763                         break;
2764                 default:
2765                         WARN_ON(1);
2766                 }
2767                 atomic_dec(&delayed_refs->num_entries);
2768
2769                 /*
2770                  * Record the must-insert_reserved flag before we drop the spin
2771                  * lock.
2772                  */
2773                 must_insert_reserved = locked_ref->must_insert_reserved;
2774                 locked_ref->must_insert_reserved = 0;
2775
2776                 extent_op = locked_ref->extent_op;
2777                 locked_ref->extent_op = NULL;
2778                 spin_unlock(&locked_ref->lock);
2779
2780                 ret = run_one_delayed_ref(trans, fs_info, ref, extent_op,
2781                                           must_insert_reserved);
2782
2783                 btrfs_free_delayed_extent_op(extent_op);
2784                 if (ret) {
2785                         unselect_delayed_ref_head(delayed_refs, locked_ref);
2786                         btrfs_put_delayed_ref(ref);
2787                         btrfs_debug(fs_info, "run_one_delayed_ref returned %d",
2788                                     ret);
2789                         return ret;
2790                 }
2791
2792                 btrfs_put_delayed_ref(ref);
2793                 count++;
2794                 cond_resched();
2795         }
2796
2797         /*
2798          * We don't want to include ref heads since we can have empty ref heads
2799          * and those will drastically skew our runtime down since we just do
2800          * accounting, no actual extent tree updates.
2801          */
2802         if (actual_count > 0) {
2803                 u64 runtime = ktime_to_ns(ktime_sub(ktime_get(), start));
2804                 u64 avg;
2805
2806                 /*
2807                  * We weigh the current average higher than our current runtime
2808                  * to avoid large swings in the average.
2809                  */
2810                 spin_lock(&delayed_refs->lock);
2811                 avg = fs_info->avg_delayed_ref_runtime * 3 + runtime;
2812                 fs_info->avg_delayed_ref_runtime = avg >> 2;    /* div by 4 */
2813                 spin_unlock(&delayed_refs->lock);
2814         }
2815         return 0;
2816 }
2817
2818 #ifdef SCRAMBLE_DELAYED_REFS
2819 /*
2820  * Normally delayed refs get processed in ascending bytenr order. This
2821  * correlates in most cases to the order added. To expose dependencies on this
2822  * order, we start to process the tree in the middle instead of the beginning
2823  */
2824 static u64 find_middle(struct rb_root *root)
2825 {
2826         struct rb_node *n = root->rb_node;
2827         struct btrfs_delayed_ref_node *entry;
2828         int alt = 1;
2829         u64 middle;
2830         u64 first = 0, last = 0;
2831
2832         n = rb_first(root);
2833         if (n) {
2834                 entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
2835                 first = entry->bytenr;
2836         }
2837         n = rb_last(root);
2838         if (n) {
2839                 entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
2840                 last = entry->bytenr;
2841         }
2842         n = root->rb_node;
2843
2844         while (n) {
2845                 entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
2846                 WARN_ON(!entry->in_tree);
2847
2848                 middle = entry->bytenr;
2849
2850                 if (alt)
2851                         n = n->rb_left;
2852                 else
2853                         n = n->rb_right;
2854
2855                 alt = 1 - alt;
2856         }
2857         return middle;
2858 }
2859 #endif
2860
2861 static inline u64 heads_to_leaves(struct btrfs_fs_info *fs_info, u64 heads)
2862 {
2863         u64 num_bytes;
2864
2865         num_bytes = heads * (sizeof(struct btrfs_extent_item) +
2866                              sizeof(struct btrfs_extent_inline_ref));
2867         if (!btrfs_fs_incompat(fs_info, SKINNY_METADATA))
2868                 num_bytes += heads * sizeof(struct btrfs_tree_block_info);
2869
2870         /*
2871          * We don't ever fill up leaves all the way so multiply by 2 just to be
2872          * closer to what we're really going to want to use.
2873          */
2874         return div_u64(num_bytes, BTRFS_LEAF_DATA_SIZE(fs_info));
2875 }
2876
2877 /*
2878  * Takes the number of bytes to be csumm'ed and figures out how many leaves it
2879  * would require to store the csums for that many bytes.
2880  */
2881 u64 btrfs_csum_bytes_to_leaves(struct btrfs_fs_info *fs_info, u64 csum_bytes)
2882 {
2883         u64 csum_size;
2884         u64 num_csums_per_leaf;
2885         u64 num_csums;
2886
2887         csum_size = BTRFS_MAX_ITEM_SIZE(fs_info);
2888         num_csums_per_leaf = div64_u64(csum_size,
2889                         (u64)btrfs_super_csum_size(fs_info->super_copy));
2890         num_csums = div64_u64(csum_bytes, fs_info->sectorsize);
2891         num_csums += num_csums_per_leaf - 1;
2892         num_csums = div64_u64(num_csums, num_csums_per_leaf);
2893         return num_csums;
2894 }
2895
2896 int btrfs_check_space_for_delayed_refs(struct btrfs_trans_handle *trans,
2897                                        struct btrfs_fs_info *fs_info)
2898 {
2899         struct btrfs_block_rsv *global_rsv;
2900         u64 num_heads = trans->transaction->delayed_refs.num_heads_ready;
2901         u64 csum_bytes = trans->transaction->delayed_refs.pending_csums;
2902         unsigned int num_dirty_bgs = trans->transaction->num_dirty_bgs;
2903         u64 num_bytes, num_dirty_bgs_bytes;
2904         int ret = 0;
2905
2906         num_bytes = btrfs_calc_trans_metadata_size(fs_info, 1);
2907         num_heads = heads_to_leaves(fs_info, num_heads);
2908         if (num_heads > 1)
2909                 num_bytes += (num_heads - 1) * fs_info->nodesize;
2910         num_bytes <<= 1;
2911         num_bytes += btrfs_csum_bytes_to_leaves(fs_info, csum_bytes) *
2912                                                         fs_info->nodesize;
2913         num_dirty_bgs_bytes = btrfs_calc_trans_metadata_size(fs_info,
2914                                                              num_dirty_bgs);
2915         global_rsv = &fs_info->global_block_rsv;
2916
2917         /*
2918          * If we can't allocate any more chunks lets make sure we have _lots_ of
2919          * wiggle room since running delayed refs can create more delayed refs.
2920          */
2921         if (global_rsv->space_info->full) {
2922                 num_dirty_bgs_bytes <<= 1;
2923                 num_bytes <<= 1;
2924         }
2925
2926         spin_lock(&global_rsv->lock);
2927         if (global_rsv->reserved <= num_bytes + num_dirty_bgs_bytes)
2928                 ret = 1;
2929         spin_unlock(&global_rsv->lock);
2930         return ret;
2931 }
2932
2933 int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans,
2934                                        struct btrfs_fs_info *fs_info)
2935 {
2936         u64 num_entries =
2937                 atomic_read(&trans->transaction->delayed_refs.num_entries);
2938         u64 avg_runtime;
2939         u64 val;
2940
2941         smp_mb();
2942         avg_runtime = fs_info->avg_delayed_ref_runtime;
2943         val = num_entries * avg_runtime;
2944         if (val >= NSEC_PER_SEC)
2945                 return 1;
2946         if (val >= NSEC_PER_SEC / 2)
2947                 return 2;
2948
2949         return btrfs_check_space_for_delayed_refs(trans, fs_info);
2950 }
2951
2952 struct async_delayed_refs {
2953         struct btrfs_root *root;
2954         u64 transid;
2955         int count;
2956         int error;
2957         int sync;
2958         struct completion wait;
2959         struct btrfs_work work;
2960 };
2961
2962 static inline struct async_delayed_refs *
2963 to_async_delayed_refs(struct btrfs_work *work)
2964 {
2965         return container_of(work, struct async_delayed_refs, work);
2966 }
2967
2968 static void delayed_ref_async_start(struct btrfs_work *work)
2969 {
2970         struct async_delayed_refs *async = to_async_delayed_refs(work);
2971         struct btrfs_trans_handle *trans;
2972         struct btrfs_fs_info *fs_info = async->root->fs_info;
2973         int ret;
2974
2975         /* if the commit is already started, we don't need to wait here */
2976         if (btrfs_transaction_blocked(fs_info))
2977                 goto done;
2978
2979         trans = btrfs_join_transaction(async->root);
2980         if (IS_ERR(trans)) {
2981                 async->error = PTR_ERR(trans);
2982                 goto done;
2983         }
2984
2985         /*
2986          * trans->sync means that when we call end_transaction, we won't
2987          * wait on delayed refs
2988          */
2989         trans->sync = true;
2990
2991         /* Don't bother flushing if we got into a different transaction */
2992         if (trans->transid > async->transid)
2993                 goto end;
2994
2995         ret = btrfs_run_delayed_refs(trans, async->count);
2996         if (ret)
2997                 async->error = ret;
2998 end:
2999         ret = btrfs_end_transaction(trans);
3000         if (ret && !async->error)
3001                 async->error = ret;
3002 done:
3003         if (async->sync)
3004                 complete(&async->wait);
3005         else
3006                 kfree(async);
3007 }
3008
3009 int btrfs_async_run_delayed_refs(struct btrfs_fs_info *fs_info,
3010                                  unsigned long count, u64 transid, int wait)
3011 {
3012         struct async_delayed_refs *async;
3013         int ret;
3014
3015         async = kmalloc(sizeof(*async), GFP_NOFS);
3016         if (!async)
3017                 return -ENOMEM;
3018
3019         async->root = fs_info->tree_root;
3020         async->count = count;
3021         async->error = 0;
3022         async->transid = transid;
3023         if (wait)
3024                 async->sync = 1;
3025         else
3026                 async->sync = 0;
3027         init_completion(&async->wait);
3028
3029         btrfs_init_work(&async->work, btrfs_extent_refs_helper,
3030                         delayed_ref_async_start, NULL, NULL);
3031
3032         btrfs_queue_work(fs_info->extent_workers, &async->work);
3033
3034         if (wait) {
3035                 wait_for_completion(&async->wait);
3036                 ret = async->error;
3037                 kfree(async);
3038                 return ret;
3039         }
3040         return 0;
3041 }
3042
3043 /*
3044  * this starts processing the delayed reference count updates and
3045  * extent insertions we have queued up so far.  count can be
3046  * 0, which means to process everything in the tree at the start
3047  * of the run (but not newly added entries), or it can be some target
3048  * number you'd like to process.
3049  *
3050  * Returns 0 on success or if called with an aborted transaction
3051  * Returns <0 on error and aborts the transaction
3052  */
3053 int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
3054                            unsigned long count)
3055 {
3056         struct btrfs_fs_info *fs_info = trans->fs_info;
3057         struct rb_node *node;
3058         struct btrfs_delayed_ref_root *delayed_refs;
3059         struct btrfs_delayed_ref_head *head;
3060         int ret;
3061         int run_all = count == (unsigned long)-1;
3062         bool can_flush_pending_bgs = trans->can_flush_pending_bgs;
3063
3064         /* We'll clean this up in btrfs_cleanup_transaction */
3065         if (trans->aborted)
3066                 return 0;
3067
3068         if (test_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags))
3069                 return 0;
3070
3071         delayed_refs = &trans->transaction->delayed_refs;
3072         if (count == 0)
3073                 count = atomic_read(&delayed_refs->num_entries) * 2;
3074
3075 again:
3076 #ifdef SCRAMBLE_DELAYED_REFS
3077         delayed_refs->run_delayed_start = find_middle(&delayed_refs->root);
3078 #endif
3079         trans->can_flush_pending_bgs = false;
3080         ret = __btrfs_run_delayed_refs(trans, count);
3081         if (ret < 0) {
3082                 btrfs_abort_transaction(trans, ret);
3083                 return ret;
3084         }
3085
3086         if (run_all) {
3087                 if (!list_empty(&trans->new_bgs))
3088                         btrfs_create_pending_block_groups(trans);
3089
3090                 spin_lock(&delayed_refs->lock);
3091                 node = rb_first(&delayed_refs->href_root);
3092                 if (!node) {
3093                         spin_unlock(&delayed_refs->lock);
3094                         goto out;
3095                 }
3096                 head = rb_entry(node, struct btrfs_delayed_ref_head,
3097                                 href_node);
3098                 refcount_inc(&head->refs);
3099                 spin_unlock(&delayed_refs->lock);
3100
3101                 /* Mutex was contended, block until it's released and retry. */
3102                 mutex_lock(&head->mutex);
3103                 mutex_unlock(&head->mutex);
3104
3105                 btrfs_put_delayed_ref_head(head);
3106                 cond_resched();
3107                 goto again;
3108         }
3109 out:
3110         trans->can_flush_pending_bgs = can_flush_pending_bgs;
3111         return 0;
3112 }
3113
3114 int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
3115                                 struct btrfs_fs_info *fs_info,
3116                                 u64 bytenr, u64 num_bytes, u64 flags,
3117                                 int level, int is_data)
3118 {
3119         struct btrfs_delayed_extent_op *extent_op;
3120         int ret;
3121
3122         extent_op = btrfs_alloc_delayed_extent_op();
3123         if (!extent_op)
3124                 return -ENOMEM;
3125
3126         extent_op->flags_to_set = flags;
3127         extent_op->update_flags = true;
3128         extent_op->update_key = false;
3129         extent_op->is_data = is_data ? true : false;
3130         extent_op->level = level;
3131
3132         ret = btrfs_add_delayed_extent_op(fs_info, trans, bytenr,
3133                                           num_bytes, extent_op);
3134         if (ret)
3135                 btrfs_free_delayed_extent_op(extent_op);
3136         return ret;
3137 }
3138
3139 static noinline int check_delayed_ref(struct btrfs_root *root,
3140                                       struct btrfs_path *path,
3141                                       u64 objectid, u64 offset, u64 bytenr)
3142 {
3143         struct btrfs_delayed_ref_head *head;
3144         struct btrfs_delayed_ref_node *ref;
3145         struct btrfs_delayed_data_ref *data_ref;
3146         struct btrfs_delayed_ref_root *delayed_refs;
3147         struct btrfs_transaction *cur_trans;
3148         struct rb_node *node;
3149         int ret = 0;
3150
3151         cur_trans = root->fs_info->running_transaction;
3152         if (!cur_trans)
3153                 return 0;
3154
3155         delayed_refs = &cur_trans->delayed_refs;
3156         spin_lock(&delayed_refs->lock);
3157         head = btrfs_find_delayed_ref_head(delayed_refs, bytenr);
3158         if (!head) {
3159                 spin_unlock(&delayed_refs->lock);
3160                 return 0;
3161         }
3162
3163         if (!mutex_trylock(&head->mutex)) {
3164                 refcount_inc(&head->refs);
3165                 spin_unlock(&delayed_refs->lock);
3166
3167                 btrfs_release_path(path);
3168
3169                 /*
3170                  * Mutex was contended, block until it's released and let
3171                  * caller try again
3172                  */
3173                 mutex_lock(&head->mutex);
3174                 mutex_unlock(&head->mutex);
3175                 btrfs_put_delayed_ref_head(head);
3176                 return -EAGAIN;
3177         }
3178         spin_unlock(&delayed_refs->lock);
3179
3180         spin_lock(&head->lock);
3181         /*
3182          * XXX: We should replace this with a proper search function in the
3183          * future.
3184          */
3185         for (node = rb_first(&head->ref_tree); node; node = rb_next(node)) {
3186                 ref = rb_entry(node, struct btrfs_delayed_ref_node, ref_node);
3187                 /* If it's a shared ref we know a cross reference exists */
3188                 if (ref->type != BTRFS_EXTENT_DATA_REF_KEY) {
3189                         ret = 1;
3190                         break;
3191                 }
3192
3193                 data_ref = btrfs_delayed_node_to_data_ref(ref);
3194
3195                 /*
3196                  * If our ref doesn't match the one we're currently looking at
3197                  * then we have a cross reference.
3198                  */
3199                 if (data_ref->root != root->root_key.objectid ||
3200                     data_ref->objectid != objectid ||
3201                     data_ref->offset != offset) {
3202                         ret = 1;
3203                         break;
3204                 }
3205         }
3206         spin_unlock(&head->lock);
3207         mutex_unlock(&head->mutex);
3208         return ret;
3209 }
3210
3211 static noinline int check_committed_ref(struct btrfs_root *root,
3212                                         struct btrfs_path *path,
3213                                         u64 objectid, u64 offset, u64 bytenr)
3214 {
3215         struct btrfs_fs_info *fs_info = root->fs_info;
3216         struct btrfs_root *extent_root = fs_info->extent_root;