Newer
Older
/*
* Copyright (C) 2007 Oracle. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License v2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this program; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 021110-1307, USA.
*/
#include <linux/pagemap.h>
#include <linux/writeback.h>
#include <linux/blkdev.h>
#include <linux/sort.h>
#include <linux/rcupdate.h>
#include "ctree.h"
#include "disk-io.h"
#include "print-tree.h"
#include "volumes.h"
#define PENDING_EXTENT_INSERT 0
#define PENDING_EXTENT_DELETE 1
#define PENDING_BACKREF_UPDATE 2
struct pending_extent_op {
int type;
u64 bytenr;
u64 num_bytes;
u64 parent;
u64 orig_parent;
u64 generation;
u64 orig_generation;
int level;
struct list_head list;
int del;
static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 parent,
u64 root_objectid, u64 ref_generation,
u64 owner, struct btrfs_key *ins,
int ref_mod);
static int update_reserved_extents(struct btrfs_root *root,
u64 bytenr, u64 num, int reserve);
static int update_block_group(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 bytenr, u64 num_bytes, int alloc,
int mark_free);
static noinline int __btrfs_free_extent(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 bytenr, u64 num_bytes, u64 parent,
u64 root_objectid, u64 ref_generation,
u64 owner_objectid, int pin,
int ref_to_drop);
static int do_chunk_alloc(struct btrfs_trans_handle *trans,
struct btrfs_root *extent_root, u64 alloc_bytes,
u64 flags, int force);
static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits)
{
return (cache->flags & bits) == bits;
}
/*
* this adds the block group to the fs_info rb tree for the block group
* cache
*/
static int btrfs_add_block_group_cache(struct btrfs_fs_info *info,
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
struct btrfs_block_group_cache *block_group)
{
struct rb_node **p;
struct rb_node *parent = NULL;
struct btrfs_block_group_cache *cache;
spin_lock(&info->block_group_cache_lock);
p = &info->block_group_cache_tree.rb_node;
while (*p) {
parent = *p;
cache = rb_entry(parent, struct btrfs_block_group_cache,
cache_node);
if (block_group->key.objectid < cache->key.objectid) {
p = &(*p)->rb_left;
} else if (block_group->key.objectid > cache->key.objectid) {
p = &(*p)->rb_right;
} else {
spin_unlock(&info->block_group_cache_lock);
return -EEXIST;
}
}
rb_link_node(&block_group->cache_node, parent, p);
rb_insert_color(&block_group->cache_node,
&info->block_group_cache_tree);
spin_unlock(&info->block_group_cache_lock);
return 0;
}
/*
* This will return the block group at or after bytenr if contains is 0, else
* it will return the block group that contains the bytenr
*/
static struct btrfs_block_group_cache *
block_group_cache_tree_search(struct btrfs_fs_info *info, u64 bytenr,
int contains)
{
struct btrfs_block_group_cache *cache, *ret = NULL;
struct rb_node *n;
u64 end, start;
spin_lock(&info->block_group_cache_lock);
n = info->block_group_cache_tree.rb_node;
while (n) {
cache = rb_entry(n, struct btrfs_block_group_cache,
cache_node);
end = cache->key.objectid + cache->key.offset - 1;
start = cache->key.objectid;
if (bytenr < start) {
if (!contains && (!ret || start < ret->key.objectid))
ret = cache;
n = n->rb_left;
} else if (bytenr > start) {
if (contains && bytenr <= end) {
ret = cache;
break;
}
n = n->rb_right;
} else {
ret = cache;
break;
}
}
if (ret)
atomic_inc(&ret->count);
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
spin_unlock(&info->block_group_cache_lock);
return ret;
}
/*
* this is only called by cache_block_group, since we could have freed extents
* we need to check the pinned_extents for any extents that can't be used yet
* since their free space will be released as soon as the transaction commits.
*/
static int add_new_free_space(struct btrfs_block_group_cache *block_group,
struct btrfs_fs_info *info, u64 start, u64 end)
{
u64 extent_start, extent_end, size;
int ret;
while (start < end) {
ret = find_first_extent_bit(&info->pinned_extents, start,
&extent_start, &extent_end,
EXTENT_DIRTY);
if (ret)
break;
if (extent_start == start) {
start = extent_end + 1;
} else if (extent_start > start && extent_start < end) {
size = extent_start - start;
ret = btrfs_add_free_space(block_group, start,
size);
BUG_ON(ret);
start = extent_end + 1;
} else {
break;
}
}
if (start < end) {
size = end - start;
ret = btrfs_add_free_space(block_group, start, size);
BUG_ON(ret);
}
return 0;
}
static int remove_sb_from_cache(struct btrfs_root *root,
struct btrfs_block_group_cache *cache)
{
u64 bytenr;
u64 *logical;
int stripe_len;
int i, nr, ret;
for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
bytenr = btrfs_sb_offset(i);
ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
cache->key.objectid, bytenr, 0,
&logical, &nr, &stripe_len);
BUG_ON(ret);
while (nr--) {
btrfs_remove_free_space(cache, logical[nr],
stripe_len);
}
kfree(logical);
}
return 0;
}
static int cache_block_group(struct btrfs_root *root,
struct btrfs_block_group_cache *block_group)
{
struct btrfs_path *path;
struct extent_buffer *leaf;
if (!block_group)
return 0;
root = root->fs_info->extent_root;
if (block_group->cached)
return 0;
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;

Chris Mason
committed
/*
* we get into deadlocks with paths held by callers of this function.
* since the alloc_mutex is protecting things right now, just
* skip the locking here
*/
path->skip_locking = 1;
last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET);
key.objectid = last;
key.offset = 0;
btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
if (ret < 0)
leaf = path->nodes[0];
if (slot >= btrfs_header_nritems(leaf)) {
ret = btrfs_next_leaf(root, path);
if (ret < 0)
goto err;
btrfs_item_key_to_cpu(leaf, &key, slot);
if (key.objectid < block_group->key.objectid)
if (key.objectid >= block_group->key.objectid +
if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) {
add_new_free_space(block_group, root->fs_info, last,
key.objectid);
last = key.objectid + key.offset;
add_new_free_space(block_group, root->fs_info, last,
block_group->key.objectid +
block_group->key.offset);
err:
/*
* return the block group that starts at or after bytenr
*/
static struct btrfs_block_group_cache *
btrfs_lookup_first_block_group(struct btrfs_fs_info *info, u64 bytenr)
cache = block_group_cache_tree_search(info, bytenr, 0);
/*
* return the block group that contains teh given bytenr
*/
struct btrfs_block_group_cache *btrfs_lookup_block_group(
struct btrfs_fs_info *info,
u64 bytenr)
cache = block_group_cache_tree_search(info, bytenr, 1);
void btrfs_put_block_group(struct btrfs_block_group_cache *cache)
{
if (atomic_dec_and_test(&cache->count))
kfree(cache);
}
static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
u64 flags)
struct list_head *head = &info->space_info;
struct btrfs_space_info *found;
rcu_read_lock();
list_for_each_entry_rcu(found, head, list) {
if (found->flags == flags) {
rcu_read_unlock();
/*
* after adding space to the filesystem, we need to clear the full flags
* on all the space infos.
*/
void btrfs_clear_space_info_full(struct btrfs_fs_info *info)
{
struct list_head *head = &info->space_info;
struct btrfs_space_info *found;
rcu_read_lock();
list_for_each_entry_rcu(found, head, list)
found->full = 0;
rcu_read_unlock();
}
static u64 div_factor(u64 num, int factor)
{
if (factor == 10)
return num;
num *= factor;
do_div(num, 10);
return num;
}
u64 btrfs_find_block_group(struct btrfs_root *root,
u64 search_start, u64 search_hint, int owner)
struct btrfs_block_group_cache *cache;
u64 last = max(search_hint, search_start);
u64 group_start = 0;
while (1) {
cache = btrfs_lookup_first_block_group(root->fs_info, last);
last = cache->key.objectid + cache->key.offset;
used = btrfs_block_group_used(&cache->item);
if ((full_search || !cache->ro) &&
block_group_bits(cache, BTRFS_BLOCK_GROUP_METADATA)) {
if (used + cache->pinned + cache->reserved <
div_factor(cache->key.offset, factor)) {
group_start = cache->key.objectid;
if (!wrapped) {
last = search_start;
wrapped = 1;
goto again;
}
if (!full_search && factor < 10) {
/* simple helper to search for an existing extent at a given offset */
int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len)
{
int ret;
struct btrfs_key key;
path = btrfs_alloc_path();
BUG_ON(!path);
key.objectid = start;
key.offset = len;
btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
0, 0);
return ret;
}

Chris Mason
committed
/*
* Back reference rules. Back refs have three main goals:
*
* 1) differentiate between all holders of references to an extent so that
* when a reference is dropped we can make sure it was a valid reference
* before freeing the extent.
*
* 2) Provide enough information to quickly find the holders of an extent
* if we notice a given block is corrupted or bad.
*
* 3) Make it easy to migrate blocks for FS shrinking or storage pool
* maintenance. This is actually the same as #2, but with a slightly
* different use case.
*
* File extents can be referenced by:
*
* - multiple snapshots, subvolumes, or different generations in one subvol
* - different files inside a single subvolume

Chris Mason
committed
* - different offsets inside a file (bookend extents in file.c)
*
* The extent ref structure has fields for:
*
* - Objectid of the subvolume root
* - Generation number of the tree holding the reference
* - objectid of the file holding the reference
* - number of references holding by parent node (alway 1 for tree blocks)
*
* Btree leaf may hold multiple references to a file extent. In most cases,
* these references are from same file and the corresponding offsets inside
* the file are close together.

Chris Mason
committed
*
* When a file extent is allocated the fields are filled in:
* (root_key.objectid, trans->transid, inode objectid, 1)

Chris Mason
committed
*
* When a leaf is cow'd new references are added for every file extent found
* in the leaf. It looks similar to the create case, but trans->transid will
* be different when the block is cow'd.

Chris Mason
committed
*
* (root_key.objectid, trans->transid, inode objectid,

Chris Mason
committed
*
* When a file extent is removed either during snapshot deletion or
* file truncation, we find the corresponding back reference and check
* the following fields:

Chris Mason
committed
*
* (btrfs_header_owner(leaf), btrfs_header_generation(leaf),
* inode objectid)

Chris Mason
committed
*
* Btree extents can be referenced by:
*
* - Different subvolumes
* - Different generations of the same subvolume
*
* When a tree block is created, back references are inserted:
*
* (root->root_key.objectid, trans->transid, level, 1)

Chris Mason
committed
*
* When a tree block is cow'd, new back references are added for all the
* blocks it points to. If the tree block isn't in reference counted root,
* the old back references are removed. These new back references are of
* the form (trans->transid will have increased since creation):

Chris Mason
committed
*
* (root->root_key.objectid, trans->transid, level, 1)

Chris Mason
committed
*
* When a backref is in deleting, the following fields are checked:

Chris Mason
committed
*
* if backref was for a tree root:
* (btrfs_header_owner(itself), btrfs_header_generation(itself), level)

Chris Mason
committed
* else
* (btrfs_header_owner(parent), btrfs_header_generation(parent), level)

Chris Mason
committed
*

Chris Mason
committed
*
* The key objectid corresponds to the first byte in the extent, the key
* type is set to BTRFS_EXTENT_REF_KEY, and the key offset is the first
* byte of parent extent. If a extent is tree root, the key offset is set
* to the key objectid.

Chris Mason
committed
*/
static noinline int lookup_extent_backref(struct btrfs_trans_handle *trans,
struct btrfs_path *path,
u64 bytenr, u64 parent,
u64 ref_root, u64 ref_generation,
u64 owner_objectid, int del)
{
struct btrfs_key key;
struct btrfs_extent_ref *ref;
struct extent_buffer *leaf;
key.objectid = bytenr;
key.type = BTRFS_EXTENT_REF_KEY;
key.offset = parent;
ret = btrfs_search_slot(trans, root, &key, path, del ? -1 : 0, 1);
if (ret < 0)
goto out;
if (ret > 0) {
ret = -ENOENT;
goto out;
}
leaf = path->nodes[0];
ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref);
ref_objectid = btrfs_ref_objectid(leaf, ref);
if (btrfs_ref_root(leaf, ref) != ref_root ||
btrfs_ref_generation(leaf, ref) != ref_generation ||
(ref_objectid != owner_objectid &&
ref_objectid != BTRFS_MULTIPLE_OBJECTIDS)) {
ret = -EIO;
WARN_ON(1);
goto out;
}
ret = 0;
out:
return ret;
}
static noinline int insert_extent_backref(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path,
u64 bytenr, u64 parent,
u64 ref_root, u64 ref_generation,
u64 owner_objectid,
int refs_to_add)
{
struct btrfs_key key;
struct extent_buffer *leaf;
struct btrfs_extent_ref *ref;
u32 num_refs;
int ret;
key.objectid = bytenr;
key.type = BTRFS_EXTENT_REF_KEY;
ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(*ref));
if (ret == 0) {
leaf = path->nodes[0];
ref = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_extent_ref);
btrfs_set_ref_root(leaf, ref, ref_root);
btrfs_set_ref_generation(leaf, ref, ref_generation);
btrfs_set_ref_objectid(leaf, ref, owner_objectid);
btrfs_set_ref_num_refs(leaf, ref, refs_to_add);
} else if (ret == -EEXIST) {
u64 existing_owner;
BUG_ON(owner_objectid < BTRFS_FIRST_FREE_OBJECTID);
leaf = path->nodes[0];
ref = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_extent_ref);
if (btrfs_ref_root(leaf, ref) != ref_root ||
btrfs_ref_generation(leaf, ref) != ref_generation) {
ret = -EIO;
WARN_ON(1);
goto out;
}
num_refs = btrfs_ref_num_refs(leaf, ref);
BUG_ON(num_refs == 0);
btrfs_set_ref_num_refs(leaf, ref, num_refs + refs_to_add);
existing_owner = btrfs_ref_objectid(leaf, ref);
if (existing_owner != owner_objectid &&
existing_owner != BTRFS_MULTIPLE_OBJECTIDS) {
btrfs_set_ref_objectid(leaf, ref,
BTRFS_MULTIPLE_OBJECTIDS);
}
ret = 0;
} else {
goto out;
btrfs_unlock_up_safe(path, 1);
btrfs_mark_buffer_dirty(path->nodes[0]);
out:
btrfs_release_path(root, path);
return ret;
static noinline int remove_extent_backref(struct btrfs_trans_handle *trans,
struct btrfs_path *path,
int refs_to_drop)
{
struct extent_buffer *leaf;
struct btrfs_extent_ref *ref;
u32 num_refs;
int ret = 0;
leaf = path->nodes[0];
ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref);
num_refs = btrfs_ref_num_refs(leaf, ref);
BUG_ON(num_refs < refs_to_drop);
num_refs -= refs_to_drop;
if (num_refs == 0) {
ret = btrfs_del_item(trans, root, path);
} else {
btrfs_set_ref_num_refs(leaf, ref, num_refs);
btrfs_mark_buffer_dirty(leaf);
}
btrfs_release_path(root, path);
return ret;
}
static void btrfs_issue_discard(struct block_device *bdev,
u64 start, u64 len)
{
blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL);
}
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
u64 num_bytes)
{
#ifdef BIO_RW_DISCARD
int ret;
u64 map_length = num_bytes;
struct btrfs_multi_bio *multi = NULL;
/* Tell the block device(s) that the sectors can be discarded */
ret = btrfs_map_block(&root->fs_info->mapping_tree, READ,
bytenr, &map_length, &multi, 0);
if (!ret) {
struct btrfs_bio_stripe *stripe = multi->stripes;
int i;
if (map_length > num_bytes)
map_length = num_bytes;
for (i = 0; i < multi->num_stripes; i++, stripe++) {
btrfs_issue_discard(stripe->dev->bdev,
stripe->physical,
map_length);
}
kfree(multi);
}
return ret;
#else
return 0;
#endif
}
static int __btrfs_update_extent_ref(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 bytenr,
u64 num_bytes,
u64 orig_parent, u64 parent,
u64 orig_root, u64 ref_root,
u64 orig_generation, u64 ref_generation,
int pin = owner_objectid < BTRFS_FIRST_FREE_OBJECTID;
ret = btrfs_update_delayed_ref(trans, bytenr, num_bytes,
orig_parent, parent, orig_root,
ref_root, orig_generation,
ref_generation, owner_objectid, pin);
BUG_ON(ret);
return ret;
}
int btrfs_update_extent_ref(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 bytenr,
u64 num_bytes, u64 orig_parent, u64 parent,
{
int ret;
if (ref_root == BTRFS_TREE_LOG_OBJECTID &&
owner_objectid < BTRFS_FIRST_FREE_OBJECTID)
return 0;
ret = __btrfs_update_extent_ref(trans, root, bytenr, num_bytes,
orig_parent, parent, ref_root,
ref_root, ref_generation,
ref_generation, owner_objectid);
static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
u64 num_bytes,
u64 orig_parent, u64 parent,
u64 orig_root, u64 ref_root,
u64 orig_generation, u64 ref_generation,
{
int ret;
ret = btrfs_add_delayed_ref(trans, bytenr, num_bytes, parent, ref_root,
ref_generation, owner_objectid,
BTRFS_ADD_DELAYED_REF, 0);
BUG_ON(ret);
return ret;
}
static noinline_for_stack int add_extent_ref(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 bytenr,
u64 num_bytes, u64 parent, u64 ref_root,
u64 ref_generation, u64 owner_objectid,
int refs_to_add)
struct extent_buffer *l;
if (!path)
return -ENOMEM;
key.offset = num_bytes;
/* first find the extent item and update its reference count */
ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key,
path, 0, 1);
if (ret < 0) {
btrfs_set_path_blocking(path);
return ret;
if (ret > 0) {
WARN_ON(1);
btrfs_free_path(path);
return -EIO;
}
l = path->nodes[0];
btrfs_item_key_to_cpu(l, &key, path->slots[0]);
if (key.objectid != bytenr) {
btrfs_print_leaf(root->fs_info->extent_root, path->nodes[0]);
printk(KERN_ERR "btrfs wanted %llu found %llu\n",
(unsigned long long)bytenr,
(unsigned long long)key.objectid);
BUG_ON(key.type != BTRFS_EXTENT_ITEM_KEY);
item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
refs = btrfs_extent_refs(l, item);
btrfs_set_extent_refs(l, item, refs + refs_to_add);
btrfs_unlock_up_safe(path, 1);
btrfs_mark_buffer_dirty(path->nodes[0]);
btrfs_release_path(root->fs_info->extent_root, path);
path->reada = 1;
path->leave_spinning = 1;
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
/* now insert the actual backref */
ret = insert_extent_backref(trans, root->fs_info->extent_root,
path, bytenr, parent,
ref_root, ref_generation,
owner_objectid, refs_to_add);
BUG_ON(ret);
btrfs_free_path(path);
return 0;
}
int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 bytenr, u64 num_bytes, u64 parent,
u64 ref_root, u64 ref_generation,
u64 owner_objectid)
{
int ret;
if (ref_root == BTRFS_TREE_LOG_OBJECTID &&
owner_objectid < BTRFS_FIRST_FREE_OBJECTID)
return 0;
ret = __btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, 0, parent,
0, ref_root, 0, ref_generation,
owner_objectid);
return ret;
}
static int drop_delayed_ref(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_delayed_ref_node *node)
{
int ret = 0;
struct btrfs_delayed_ref *ref = btrfs_delayed_node_to_ref(node);
BUG_ON(node->ref_mod == 0);
ret = __btrfs_free_extent(trans, root, node->bytenr, node->num_bytes,
node->parent, ref->root, ref->generation,
ref->owner_objectid, ref->pin, node->ref_mod);
return ret;
}
/* helper function to actually process a single delayed ref entry */
static noinline int run_one_delayed_ref(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_delayed_ref_node *node,
int insert_reserved)
{
int ret;
struct btrfs_delayed_ref *ref;
if (node->parent == (u64)-1) {
struct btrfs_delayed_ref_head *head;
/*
* we've hit the end of the chain and we were supposed
* to insert this extent into the tree. But, it got
* deleted before we ever needed to insert it, so all
* we have to do is clean up the accounting
*/
if (insert_reserved) {
update_reserved_extents(root, node->bytenr,
node->num_bytes, 0);
}
head = btrfs_delayed_node_to_head(node);
mutex_unlock(&head->mutex);
return 0;
}
ref = btrfs_delayed_node_to_ref(node);
if (ref->action == BTRFS_ADD_DELAYED_REF) {
if (insert_reserved) {
struct btrfs_key ins;
ins.objectid = node->bytenr;
ins.offset = node->num_bytes;
ins.type = BTRFS_EXTENT_ITEM_KEY;
/* record the full extent allocation */
ret = __btrfs_alloc_reserved_extent(trans, root,
node->parent, ref->root,
ref->generation, ref->owner_objectid,
&ins, node->ref_mod);
update_reserved_extents(root, node->bytenr,
node->num_bytes, 0);
} else {
/* just add one backref */
ret = add_extent_ref(trans, root, node->bytenr,
node->num_bytes,
node->parent, ref->root, ref->generation,
ref->owner_objectid, node->ref_mod);
}
BUG_ON(ret);
} else if (ref->action == BTRFS_DROP_DELAYED_REF) {
WARN_ON(insert_reserved);
ret = drop_delayed_ref(trans, root, node);
}
return 0;
}
static noinline struct btrfs_delayed_ref_node *
select_delayed_ref(struct btrfs_delayed_ref_head *head)
{
struct rb_node *node;
struct btrfs_delayed_ref_node *ref;
int action = BTRFS_ADD_DELAYED_REF;
again:
/*
* select delayed ref of type BTRFS_ADD_DELAYED_REF first.
* this prevents ref count from going down to zero when
* there still are pending delayed ref.
*/
node = rb_prev(&head->node.rb_node);
while (1) {
if (!node)
break;
ref = rb_entry(node, struct btrfs_delayed_ref_node,
rb_node);
if (ref->bytenr != head->node.bytenr)
break;
if (btrfs_delayed_node_to_ref(ref)->action == action)
return ref;
node = rb_prev(node);
}
if (action == BTRFS_ADD_DELAYED_REF) {
action = BTRFS_DROP_DELAYED_REF;
goto again;
}
return NULL;
}
static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct list_head *cluster)
{
struct btrfs_delayed_ref_root *delayed_refs;
struct btrfs_delayed_ref_node *ref;
struct btrfs_delayed_ref_head *locked_ref = NULL;
int ret;
int must_insert_reserved = 0;
delayed_refs = &trans->transaction->delayed_refs;
while (1) {
if (!locked_ref) {
/* pick a new head ref from the cluster list */
if (list_empty(cluster))
break;
locked_ref = list_entry(cluster->next,
struct btrfs_delayed_ref_head, cluster);
/* grab the lock that says we are going to process
* all the refs for this head */
ret = btrfs_delayed_ref_lock(trans, locked_ref);
/*
* we may have dropped the spin lock to get the head
* mutex lock, and that might have given someone else
* time to free the head. If that's true, it has been
* removed from our list and we can move on.
*/
if (ret == -EAGAIN) {
locked_ref = NULL;
count++;
continue;
}
}
/*
* record the must insert reserved flag before we
* drop the spin lock.
*/
must_insert_reserved = locked_ref->must_insert_reserved;
locked_ref->must_insert_reserved = 0;
/*
* locked_ref is the head node, so we have to go one
* node back for any delayed ref updates
*/
ref = select_delayed_ref(locked_ref);
if (!ref) {
/* All delayed refs have been processed, Go ahead
* and send the head node to run_one_delayed_ref,
* so that any accounting fixes can happen
*/
ref = &locked_ref->node;
list_del_init(&locked_ref->cluster);
locked_ref = NULL;
}
ref->in_tree = 0;
rb_erase(&ref->rb_node, &delayed_refs->root);
delayed_refs->num_entries--;
spin_unlock(&delayed_refs->lock);
ret = run_one_delayed_ref(trans, root, ref,
must_insert_reserved);
BUG_ON(ret);