Newer
Older
/*
* Copyright (C) 2007 Oracle. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License v2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this program; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 021110-1307, USA.
*/
#include <linux/pagemap.h>
#include <linux/writeback.h>
#include <linux/blkdev.h>
#include <linux/sort.h>
#include <linux/rcupdate.h>
#include <linux/slab.h>
#include "ctree.h"
#include "disk-io.h"
#include "print-tree.h"
#include "volumes.h"
#undef SCRAMBLE_DELAYED_REFS
/*
* control flags for do_chunk_alloc's force field
* CHUNK_ALLOC_NO_FORCE means to only allocate a chunk
* if we really need one.
*
* CHUNK_ALLOC_LIMITED means to only try and allocate one
* if we have very few chunks already allocated. This is
* used as part of the clustering code to help make sure
* we have a good pool of storage to cluster in, without
* filling the FS with empty chunks
*
* CHUNK_ALLOC_FORCE means it must try to allocate one
*
*/
enum {
CHUNK_ALLOC_NO_FORCE = 0,
CHUNK_ALLOC_LIMITED = 1,
CHUNK_ALLOC_FORCE = 2,
/*
* Control how reservations are dealt with.
*
* RESERVE_FREE - freeing a reservation.
* RESERVE_ALLOC - allocating space and we need to update bytes_may_use for
* ENOSPC accounting
* RESERVE_ALLOC_NO_ACCOUNT - allocating space and we should not update
* bytes_may_use as the ENOSPC accounting is done elsewhere
*/
enum {
RESERVE_FREE = 0,
RESERVE_ALLOC = 1,
RESERVE_ALLOC_NO_ACCOUNT = 2,
};
static int update_block_group(struct btrfs_root *root,
u64 bytenr, u64 num_bytes, int alloc);
static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 bytenr, u64 num_bytes, u64 parent,
u64 root_objectid, u64 owner_objectid,
u64 owner_offset, int refs_to_drop,
struct btrfs_delayed_extent_op *extra_op);
static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op,
struct extent_buffer *leaf,
struct btrfs_extent_item *ei);
static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 parent, u64 root_objectid,
u64 flags, u64 owner, u64 offset,
struct btrfs_key *ins, int ref_mod);
static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 parent, u64 root_objectid,
u64 flags, struct btrfs_disk_key *key,
int level, struct btrfs_key *ins);
static int do_chunk_alloc(struct btrfs_trans_handle *trans,
struct btrfs_root *extent_root, u64 flags,
int force);
static int find_next_key(struct btrfs_path *path, int level,
struct btrfs_key *key);
static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
int dump_block_groups);
static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache,
u64 num_bytes, int reserve);
static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv,
u64 num_bytes);
static noinline int
block_group_cache_done(struct btrfs_block_group_cache *cache)
{
smp_mb();
return cache->cached == BTRFS_CACHE_FINISHED;
}
static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits)
{
return (cache->flags & bits) == bits;
}
static void btrfs_get_block_group(struct btrfs_block_group_cache *cache)
{
atomic_inc(&cache->count);
}
void btrfs_put_block_group(struct btrfs_block_group_cache *cache)
{
if (atomic_dec_and_test(&cache->count)) {
WARN_ON(cache->pinned > 0);
WARN_ON(cache->reserved > 0);
/*
* this adds the block group to the fs_info rb tree for the block group
* cache
*/
static int btrfs_add_block_group_cache(struct btrfs_fs_info *info,
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
struct btrfs_block_group_cache *block_group)
{
struct rb_node **p;
struct rb_node *parent = NULL;
struct btrfs_block_group_cache *cache;
spin_lock(&info->block_group_cache_lock);
p = &info->block_group_cache_tree.rb_node;
while (*p) {
parent = *p;
cache = rb_entry(parent, struct btrfs_block_group_cache,
cache_node);
if (block_group->key.objectid < cache->key.objectid) {
p = &(*p)->rb_left;
} else if (block_group->key.objectid > cache->key.objectid) {
p = &(*p)->rb_right;
} else {
spin_unlock(&info->block_group_cache_lock);
return -EEXIST;
}
}
rb_link_node(&block_group->cache_node, parent, p);
rb_insert_color(&block_group->cache_node,
&info->block_group_cache_tree);
if (info->first_logical_byte > block_group->key.objectid)
info->first_logical_byte = block_group->key.objectid;
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
spin_unlock(&info->block_group_cache_lock);
return 0;
}
/*
* This will return the block group at or after bytenr if contains is 0, else
* it will return the block group that contains the bytenr
*/
static struct btrfs_block_group_cache *
block_group_cache_tree_search(struct btrfs_fs_info *info, u64 bytenr,
int contains)
{
struct btrfs_block_group_cache *cache, *ret = NULL;
struct rb_node *n;
u64 end, start;
spin_lock(&info->block_group_cache_lock);
n = info->block_group_cache_tree.rb_node;
while (n) {
cache = rb_entry(n, struct btrfs_block_group_cache,
cache_node);
end = cache->key.objectid + cache->key.offset - 1;
start = cache->key.objectid;
if (bytenr < start) {
if (!contains && (!ret || start < ret->key.objectid))
ret = cache;
n = n->rb_left;
} else if (bytenr > start) {
if (contains && bytenr <= end) {
ret = cache;
break;
}
n = n->rb_right;
} else {
ret = cache;
break;
}
}
if (bytenr == 0 && info->first_logical_byte > ret->key.objectid)
info->first_logical_byte = ret->key.objectid;
}
spin_unlock(&info->block_group_cache_lock);
return ret;
}
static int add_excluded_extent(struct btrfs_root *root,
u64 start, u64 num_bytes)
u64 end = start + num_bytes - 1;
set_extent_bits(&root->fs_info->freed_extents[0],
start, end, EXTENT_UPTODATE, GFP_NOFS);
set_extent_bits(&root->fs_info->freed_extents[1],
start, end, EXTENT_UPTODATE, GFP_NOFS);
return 0;
}
static void free_excluded_extents(struct btrfs_root *root,
struct btrfs_block_group_cache *cache)
{
u64 start, end;
start = cache->key.objectid;
end = start + cache->key.offset - 1;
clear_extent_bits(&root->fs_info->freed_extents[0],
start, end, EXTENT_UPTODATE, GFP_NOFS);
clear_extent_bits(&root->fs_info->freed_extents[1],
start, end, EXTENT_UPTODATE, GFP_NOFS);
static int exclude_super_stripes(struct btrfs_root *root,
struct btrfs_block_group_cache *cache)
{
u64 bytenr;
u64 *logical;
int stripe_len;
int i, nr, ret;
if (cache->key.objectid < BTRFS_SUPER_INFO_OFFSET) {
stripe_len = BTRFS_SUPER_INFO_OFFSET - cache->key.objectid;
cache->bytes_super += stripe_len;
ret = add_excluded_extent(root, cache->key.objectid,
stripe_len);
BUG_ON(ret); /* -ENOMEM */
for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
bytenr = btrfs_sb_offset(i);
ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
cache->key.objectid, bytenr,
0, &logical, &nr, &stripe_len);
BUG_ON(ret); /* -ENOMEM */
cache->bytes_super += stripe_len;
ret = add_excluded_extent(root, logical[nr],
stripe_len);
BUG_ON(ret); /* -ENOMEM */
kfree(logical);
}
return 0;
}
static struct btrfs_caching_control *
get_caching_control(struct btrfs_block_group_cache *cache)
{
struct btrfs_caching_control *ctl;
spin_lock(&cache->lock);
if (cache->cached != BTRFS_CACHE_STARTED) {
spin_unlock(&cache->lock);
return NULL;
}
/* We're loading it the fast way, so we don't have a caching_ctl. */
if (!cache->caching_ctl) {
spin_unlock(&cache->lock);
return NULL;
}
ctl = cache->caching_ctl;
atomic_inc(&ctl->count);
spin_unlock(&cache->lock);
return ctl;
}
static void put_caching_control(struct btrfs_caching_control *ctl)
{
if (atomic_dec_and_test(&ctl->count))
kfree(ctl);
}
/*
* this is only called by cache_block_group, since we could have freed extents
* we need to check the pinned_extents for any extents that can't be used yet
* since their free space will be released as soon as the transaction commits.
*/
static u64 add_new_free_space(struct btrfs_block_group_cache *block_group,
struct btrfs_fs_info *info, u64 start, u64 end)
{
u64 extent_start, extent_end, size, total_added = 0;
ret = find_first_extent_bit(info->pinned_extents, start,
EXTENT_DIRTY | EXTENT_UPTODATE,
NULL);
if (extent_start <= start) {
start = extent_end + 1;
} else if (extent_start > start && extent_start < end) {
size = extent_start - start;
ret = btrfs_add_free_space(block_group, start,
size);
BUG_ON(ret); /* -ENOMEM or logic error */
start = extent_end + 1;
} else {
break;
}
}
if (start < end) {
size = end - start;
ret = btrfs_add_free_space(block_group, start, size);
BUG_ON(ret); /* -ENOMEM or logic error */
static noinline void caching_thread(struct btrfs_work *work)
struct btrfs_block_group_cache *block_group;
struct btrfs_fs_info *fs_info;
struct btrfs_caching_control *caching_ctl;
struct btrfs_root *extent_root;
struct extent_buffer *leaf;
u64 last = 0;
u32 nritems;
int ret = 0;
caching_ctl = container_of(work, struct btrfs_caching_control, work);
block_group = caching_ctl->block_group;
fs_info = block_group->fs_info;
extent_root = fs_info->extent_root;
path = btrfs_alloc_path();
if (!path)
last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET);

Chris Mason
committed
/*
* We don't want to deadlock with somebody trying to allocate a new
* extent for the extent root while also trying to search the extent
* root to add free space. So we skip locking and search the commit
* root, since its read-only

Chris Mason
committed
*/
path->skip_locking = 1;
/* need to make sure the commit_root doesn't disappear */
down_read(&fs_info->extent_commit_sem);
ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
leaf = path->nodes[0];
nritems = btrfs_header_nritems(leaf);
if (btrfs_fs_closing(fs_info) > 1) {
if (path->slots[0] < nritems) {
btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
} else {
ret = find_next_key(path, 0, &key);
if (ret)
if (need_resched() ||
btrfs_next_leaf(extent_root, path)) {
caching_ctl->progress = last;
up_read(&fs_info->extent_commit_sem);
mutex_unlock(&caching_ctl->mutex);
goto again;
}
leaf = path->nodes[0];
nritems = btrfs_header_nritems(leaf);
continue;
if (key.objectid < block_group->key.objectid) {
path->slots[0]++;
if (key.objectid >= block_group->key.objectid +
if (key.type == BTRFS_EXTENT_ITEM_KEY) {
total_found += add_new_free_space(block_group,
fs_info, last,
key.objectid);
last = key.objectid + key.offset;
if (total_found > (1024 * 1024 * 2)) {
total_found = 0;
wake_up(&caching_ctl->wait);
}
total_found += add_new_free_space(block_group, fs_info, last,
block_group->key.objectid +
block_group->key.offset);
block_group->cached = BTRFS_CACHE_FINISHED;
spin_unlock(&block_group->lock);
err:
up_read(&fs_info->extent_commit_sem);
free_excluded_extents(extent_root, block_group);
mutex_unlock(&caching_ctl->mutex);
wake_up(&caching_ctl->wait);
put_caching_control(caching_ctl);
static int cache_block_group(struct btrfs_block_group_cache *cache,
int load_cache_only)
DEFINE_WAIT(wait);
struct btrfs_fs_info *fs_info = cache->fs_info;
struct btrfs_caching_control *caching_ctl;
caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_NOFS);
if (!caching_ctl)
return -ENOMEM;
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
INIT_LIST_HEAD(&caching_ctl->list);
mutex_init(&caching_ctl->mutex);
init_waitqueue_head(&caching_ctl->wait);
caching_ctl->block_group = cache;
caching_ctl->progress = cache->key.objectid;
atomic_set(&caching_ctl->count, 1);
caching_ctl->work.func = caching_thread;
spin_lock(&cache->lock);
/*
* This should be a rare occasion, but this could happen I think in the
* case where one thread starts to load the space cache info, and then
* some other thread starts a transaction commit which tries to do an
* allocation while the other thread is still loading the space cache
* info. The previous loop should have kept us from choosing this block
* group, but if we've moved to the state where we will wait on caching
* block groups we need to first check if we're doing a fast load here,
* so we can wait for it to finish, otherwise we could end up allocating
* from a block group who's cache gets evicted for one reason or
* another.
*/
while (cache->cached == BTRFS_CACHE_FAST) {
struct btrfs_caching_control *ctl;
ctl = cache->caching_ctl;
atomic_inc(&ctl->count);
prepare_to_wait(&ctl->wait, &wait, TASK_UNINTERRUPTIBLE);
spin_unlock(&cache->lock);
schedule();
finish_wait(&ctl->wait, &wait);
put_caching_control(ctl);
spin_lock(&cache->lock);
}
if (cache->cached != BTRFS_CACHE_NO) {
spin_unlock(&cache->lock);
kfree(caching_ctl);
}
WARN_ON(cache->caching_ctl);
cache->caching_ctl = caching_ctl;
cache->cached = BTRFS_CACHE_FAST;
spin_unlock(&cache->lock);
if (fs_info->mount_opt & BTRFS_MOUNT_SPACE_CACHE) {
ret = load_free_space_cache(fs_info, cache);
spin_lock(&cache->lock);
if (ret == 1) {
cache->caching_ctl = NULL;
cache->cached = BTRFS_CACHE_FINISHED;
cache->last_byte_to_unpin = (u64)-1;
} else {
if (load_cache_only) {
cache->caching_ctl = NULL;
cache->cached = BTRFS_CACHE_NO;
} else {
cache->cached = BTRFS_CACHE_STARTED;
}
}
spin_unlock(&cache->lock);
wake_up(&caching_ctl->wait);
put_caching_control(caching_ctl);
free_excluded_extents(fs_info->extent_root, cache);
} else {
/*
* We are not going to do the fast caching, set cached to the
* appropriate value and wakeup any waiters.
*/
spin_lock(&cache->lock);
if (load_cache_only) {
cache->caching_ctl = NULL;
cache->cached = BTRFS_CACHE_NO;
} else {
cache->cached = BTRFS_CACHE_STARTED;
}
spin_unlock(&cache->lock);
wake_up(&caching_ctl->wait);
if (load_cache_only) {
put_caching_control(caching_ctl);
down_write(&fs_info->extent_commit_sem);
atomic_inc(&caching_ctl->count);
list_add_tail(&caching_ctl->list, &fs_info->caching_block_groups);
up_write(&fs_info->extent_commit_sem);
btrfs_queue_worker(&fs_info->caching_workers, &caching_ctl->work);
/*
* return the block group that starts at or after bytenr
*/
static struct btrfs_block_group_cache *
btrfs_lookup_first_block_group(struct btrfs_fs_info *info, u64 bytenr)
cache = block_group_cache_tree_search(info, bytenr, 0);
* return the block group that contains the given bytenr
struct btrfs_block_group_cache *btrfs_lookup_block_group(
struct btrfs_fs_info *info,
u64 bytenr)
cache = block_group_cache_tree_search(info, bytenr, 1);
static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
u64 flags)
struct list_head *head = &info->space_info;
struct btrfs_space_info *found;
flags &= BTRFS_BLOCK_GROUP_TYPE_MASK;
rcu_read_lock();
list_for_each_entry_rcu(found, head, list) {
if (found->flags & flags) {
/*
* after adding space to the filesystem, we need to clear the full flags
* on all the space infos.
*/
void btrfs_clear_space_info_full(struct btrfs_fs_info *info)
{
struct list_head *head = &info->space_info;
struct btrfs_space_info *found;
rcu_read_lock();
list_for_each_entry_rcu(found, head, list)
found->full = 0;
rcu_read_unlock();
}
u64 btrfs_find_block_group(struct btrfs_root *root,
u64 search_start, u64 search_hint, int owner)
struct btrfs_block_group_cache *cache;
u64 last = max(search_hint, search_start);
u64 group_start = 0;
while (1) {
cache = btrfs_lookup_first_block_group(root->fs_info, last);
last = cache->key.objectid + cache->key.offset;
used = btrfs_block_group_used(&cache->item);
if ((full_search || !cache->ro) &&
block_group_bits(cache, BTRFS_BLOCK_GROUP_METADATA)) {
if (used + cache->pinned + cache->reserved <
div_factor(cache->key.offset, factor)) {
group_start = cache->key.objectid;
if (!wrapped) {
last = search_start;
wrapped = 1;
goto again;
}
if (!full_search && factor < 10) {
/* simple helper to search for an existing extent at a given offset */
int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len)
{
int ret;
struct btrfs_key key;
if (!path)
return -ENOMEM;
key.objectid = start;
key.offset = len;
btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
0, 0);
return ret;
}
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
/*
* helper function to lookup reference count and flags of extent.
*
* the head node for delayed ref is used to store the sum of all the
* reference count modifications queued up in the rbtree. the head
* node may also store the extent flags to set. This way you can check
* to see what the reference count and extent flags would be if all of
* the delayed refs are not processed.
*/
int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 bytenr,
u64 num_bytes, u64 *refs, u64 *flags)
{
struct btrfs_delayed_ref_head *head;
struct btrfs_delayed_ref_root *delayed_refs;
struct btrfs_path *path;
struct btrfs_extent_item *ei;
struct extent_buffer *leaf;
struct btrfs_key key;
u32 item_size;
u64 num_refs;
u64 extent_flags;
int ret;
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
key.objectid = bytenr;
key.type = BTRFS_EXTENT_ITEM_KEY;
key.offset = num_bytes;
if (!trans) {
path->skip_locking = 1;
path->search_commit_root = 1;
}
again:
ret = btrfs_search_slot(trans, root->fs_info->extent_root,
&key, path, 0, 0);
if (ret < 0)
goto out_free;
if (ret == 0) {
leaf = path->nodes[0];
item_size = btrfs_item_size_nr(leaf, path->slots[0]);
if (item_size >= sizeof(*ei)) {
ei = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_extent_item);
num_refs = btrfs_extent_refs(leaf, ei);
extent_flags = btrfs_extent_flags(leaf, ei);
} else {
#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
struct btrfs_extent_item_v0 *ei0;
BUG_ON(item_size != sizeof(*ei0));
ei0 = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_extent_item_v0);
num_refs = btrfs_extent_refs_v0(leaf, ei0);
/* FIXME: this isn't correct for data */
extent_flags = BTRFS_BLOCK_FLAG_FULL_BACKREF;
#else
BUG();
#endif
}
BUG_ON(num_refs == 0);
} else {
num_refs = 0;
extent_flags = 0;
ret = 0;
}
if (!trans)
goto out;
delayed_refs = &trans->transaction->delayed_refs;
spin_lock(&delayed_refs->lock);
head = btrfs_find_delayed_ref_head(trans, bytenr);
if (head) {
if (!mutex_trylock(&head->mutex)) {
atomic_inc(&head->node.refs);
spin_unlock(&delayed_refs->lock);
btrfs_release_path(path);
/*
* Mutex was contended, block until it's released and try
* again
*/
mutex_lock(&head->mutex);
mutex_unlock(&head->mutex);
btrfs_put_delayed_ref(&head->node);
goto again;
}
if (head->extent_op && head->extent_op->update_flags)
extent_flags |= head->extent_op->flags_to_set;
else
BUG_ON(num_refs == 0);
num_refs += head->node.ref_mod;
mutex_unlock(&head->mutex);
}
spin_unlock(&delayed_refs->lock);
out:
WARN_ON(num_refs == 0);
if (refs)
*refs = num_refs;
if (flags)
*flags = extent_flags;
out_free:
btrfs_free_path(path);
return ret;
}

Chris Mason
committed
/*
* Back reference rules. Back refs have three main goals:
*
* 1) differentiate between all holders of references to an extent so that
* when a reference is dropped we can make sure it was a valid reference
* before freeing the extent.
*
* 2) Provide enough information to quickly find the holders of an extent
* if we notice a given block is corrupted or bad.
*
* 3) Make it easy to migrate blocks for FS shrinking or storage pool
* maintenance. This is actually the same as #2, but with a slightly
* different use case.
*
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
* There are two kinds of back refs. The implicit back refs is optimized
* for pointers in non-shared tree blocks. For a given pointer in a block,
* back refs of this kind provide information about the block's owner tree
* and the pointer's key. These information allow us to find the block by
* b-tree searching. The full back refs is for pointers in tree blocks not
* referenced by their owner trees. The location of tree block is recorded
* in the back refs. Actually the full back refs is generic, and can be
* used in all cases the implicit back refs is used. The major shortcoming
* of the full back refs is its overhead. Every time a tree block gets
* COWed, we have to update back refs entry for all pointers in it.
*
* For a newly allocated tree block, we use implicit back refs for
* pointers in it. This means most tree related operations only involve
* implicit back refs. For a tree block created in old transaction, the
* only way to drop a reference to it is COW it. So we can detect the
* event that tree block loses its owner tree's reference and do the
* back refs conversion.
*
* When a tree block is COW'd through a tree, there are four cases:
*
* The reference count of the block is one and the tree is the block's
* owner tree. Nothing to do in this case.
*
* The reference count of the block is one and the tree is not the
* block's owner tree. In this case, full back refs is used for pointers
* in the block. Remove these full back refs, add implicit back refs for
* every pointers in the new block.
*
* The reference count of the block is greater than one and the tree is
* the block's owner tree. In this case, implicit back refs is used for
* pointers in the block. Add full back refs for every pointers in the
* block, increase lower level extents' reference counts. The original
* implicit back refs are entailed to the new block.
*
* The reference count of the block is greater than one and the tree is
* not the block's owner tree. Add implicit back refs for every pointer in
* the new block, increase lower level extents' reference count.
*
* Back Reference Key composing:
*
* The key objectid corresponds to the first byte in the extent,
* The key type is used to differentiate between types of back refs.
* There are different meanings of the key offset for different types
* of back refs.
*

Chris Mason
committed
* File extents can be referenced by:
*
* - multiple snapshots, subvolumes, or different generations in one subvol
* - different files inside a single subvolume

Chris Mason
committed
* - different offsets inside a file (bookend extents in file.c)
*
* The extent ref structure for the implicit back refs has fields for:

Chris Mason
committed
*
* - Objectid of the subvolume root
* - objectid of the file holding the reference
* - original offset in the file
* - how many bookend extents

Chris Mason
committed
*
* The key offset for the implicit back refs is hash of the first
* three fields.

Chris Mason
committed
*
* The extent ref structure for the full back refs has field for:

Chris Mason
committed
*
* - number of pointers in the tree leaf

Chris Mason
committed
*
* The key offset for the implicit back refs is the first byte of
* the tree leaf

Chris Mason
committed
*
* When a file extent is allocated, The implicit back refs is used.
* the fields are filled in:

Chris Mason
committed
*
* (root_key.objectid, inode objectid, offset in file, 1)

Chris Mason
committed
*
* When a file extent is removed file truncation, we find the
* corresponding implicit back refs and check the following fields:

Chris Mason
committed
*
* (btrfs_header_owner(leaf), inode objectid, offset in file)

Chris Mason
committed
*
* Btree extents can be referenced by:

Chris Mason
committed
*
* - Different subvolumes

Chris Mason
committed
*
* Both the implicit back refs and the full back refs for tree blocks
* only consist of key. The key offset for the implicit back refs is
* objectid of block's owner tree. The key offset for the full back refs
* is the first byte of parent block.

Chris Mason
committed
*
* When implicit back refs is used, information about the lowest key and
* level of the tree block are required. These information are stored in
* tree block info structure.

Chris Mason
committed
*/
#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
static int convert_extent_item_v0(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path,
u64 owner, u32 extra_size)
{
struct btrfs_extent_item *item;
struct btrfs_extent_item_v0 *ei0;
struct btrfs_extent_ref_v0 *ref0;
struct btrfs_tree_block_info *bi;
struct extent_buffer *leaf;
struct btrfs_key key;
struct btrfs_key found_key;
u32 new_size = sizeof(*item);
u64 refs;
int ret;
leaf = path->nodes[0];
BUG_ON(btrfs_item_size_nr(leaf, path->slots[0]) != sizeof(*ei0));
btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
ei0 = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_extent_item_v0);
refs = btrfs_extent_refs_v0(leaf, ei0);
if (owner == (u64)-1) {
while (1) {
if (path->slots[0] >= btrfs_header_nritems(leaf)) {
ret = btrfs_next_leaf(root, path);
if (ret < 0)
return ret;
BUG_ON(ret > 0); /* Corruption */
leaf = path->nodes[0];
}
btrfs_item_key_to_cpu(leaf, &found_key,
path->slots[0]);
BUG_ON(key.objectid != found_key.objectid);
if (found_key.type != BTRFS_EXTENT_REF_V0_KEY) {
path->slots[0]++;
continue;
}
ref0 = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_extent_ref_v0);
owner = btrfs_ref_objectid_v0(leaf, ref0);
break;
}
}
btrfs_release_path(path);
if (owner < BTRFS_FIRST_FREE_OBJECTID)
new_size += sizeof(*bi);
new_size -= sizeof(*ei0);
ret = btrfs_search_slot(trans, root, &key, path,
new_size + extra_size, 1);
if (ret < 0)
return ret;
BUG_ON(ret); /* Corruption */
btrfs_extend_item(trans, root, path, new_size);