Skip to content
Snippets Groups Projects
extent-tree.c 154 KiB
Newer Older
  • Learn to ignore specific revisions
  • Chris Mason's avatar
    Chris Mason committed
    /*
     * Copyright (C) 2007 Oracle.  All rights reserved.
     *
     * This program is free software; you can redistribute it and/or
     * modify it under the terms of the GNU General Public
     * License v2 as published by the Free Software Foundation.
     *
     * This program is distributed in the hope that it will be useful,
     * but WITHOUT ANY WARRANTY; without even the implied warranty of
     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     * General Public License for more details.
     *
     * You should have received a copy of the GNU General Public
     * License along with this program; if not, write to the
     * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
     * Boston, MA 021110-1307, USA.
     */
    
    #include <linux/sched.h>
    
    #include <linux/pagemap.h>
    
    #include <linux/writeback.h>
    
    #include <linux/blkdev.h>
    
    Chris Mason's avatar
    Chris Mason committed
    #include <linux/version.h>
    #include "compat.h"
    
    #include "crc32c.h"
    
    #include "ctree.h"
    #include "disk-io.h"
    #include "print-tree.h"
    
    #include "transaction.h"
    
    #include "locking.h"
    
    #include "ref-cache.h"
    
    #include "compat.h"
    
    #define PENDING_EXTENT_INSERT 0
    #define PENDING_EXTENT_DELETE 1
    #define PENDING_BACKREF_UPDATE 2
    
    struct pending_extent_op {
    	int type;
    	u64 bytenr;
    	u64 num_bytes;
    	u64 parent;
    	u64 orig_parent;
    	u64 generation;
    	u64 orig_generation;
    	int level;
    
    	struct list_head list;
    	int del;
    
    static int finish_current_insert(struct btrfs_trans_handle *trans, struct
    
    static int del_pending_extents(struct btrfs_trans_handle *trans, struct
    
    static struct btrfs_block_group_cache *
    __btrfs_find_block_group(struct btrfs_root *root,
    			 struct btrfs_block_group_cache *hint,
    			 u64 search_start, int data, int owner);
    
    static int pin_down_bytes(struct btrfs_trans_handle *trans,
    			  struct btrfs_root *root,
    			  u64 bytenr, u64 num_bytes, int is_data);
    static int update_block_group(struct btrfs_trans_handle *trans,
    			      struct btrfs_root *root,
    			      u64 bytenr, u64 num_bytes, int alloc,
    			      int mark_free);
    
    static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits)
    {
    	return (cache->flags & bits) == bits;
    }
    
    /*
     * this adds the block group to the fs_info rb tree for the block group
     * cache
     */
    
    static int btrfs_add_block_group_cache(struct btrfs_fs_info *info,
    
    				struct btrfs_block_group_cache *block_group)
    {
    	struct rb_node **p;
    	struct rb_node *parent = NULL;
    	struct btrfs_block_group_cache *cache;
    
    	spin_lock(&info->block_group_cache_lock);
    	p = &info->block_group_cache_tree.rb_node;
    
    	while (*p) {
    		parent = *p;
    		cache = rb_entry(parent, struct btrfs_block_group_cache,
    				 cache_node);
    		if (block_group->key.objectid < cache->key.objectid) {
    			p = &(*p)->rb_left;
    		} else if (block_group->key.objectid > cache->key.objectid) {
    			p = &(*p)->rb_right;
    		} else {
    			spin_unlock(&info->block_group_cache_lock);
    			return -EEXIST;
    		}
    	}
    
    	rb_link_node(&block_group->cache_node, parent, p);
    	rb_insert_color(&block_group->cache_node,
    			&info->block_group_cache_tree);
    	spin_unlock(&info->block_group_cache_lock);
    
    	return 0;
    }
    
    /*
     * This will return the block group at or after bytenr if contains is 0, else
     * it will return the block group that contains the bytenr
     */
    static struct btrfs_block_group_cache *
    block_group_cache_tree_search(struct btrfs_fs_info *info, u64 bytenr,
    			      int contains)
    {
    	struct btrfs_block_group_cache *cache, *ret = NULL;
    	struct rb_node *n;
    	u64 end, start;
    
    	spin_lock(&info->block_group_cache_lock);
    	n = info->block_group_cache_tree.rb_node;
    
    	while (n) {
    		cache = rb_entry(n, struct btrfs_block_group_cache,
    				 cache_node);
    		end = cache->key.objectid + cache->key.offset - 1;
    		start = cache->key.objectid;
    
    		if (bytenr < start) {
    			if (!contains && (!ret || start < ret->key.objectid))
    				ret = cache;
    			n = n->rb_left;
    		} else if (bytenr > start) {
    			if (contains && bytenr <= end) {
    				ret = cache;
    				break;
    			}
    			n = n->rb_right;
    		} else {
    			ret = cache;
    			break;
    		}
    	}
    	spin_unlock(&info->block_group_cache_lock);
    
    	return ret;
    }
    
    /*
     * this is only called by cache_block_group, since we could have freed extents
     * we need to check the pinned_extents for any extents that can't be used yet
     * since their free space will be released as soon as the transaction commits.
     */
    static int add_new_free_space(struct btrfs_block_group_cache *block_group,
    			      struct btrfs_fs_info *info, u64 start, u64 end)
    {
    	u64 extent_start, extent_end, size;
    	int ret;
    
    
    	mutex_lock(&info->pinned_mutex);
    
    	while (start < end) {
    		ret = find_first_extent_bit(&info->pinned_extents, start,
    					    &extent_start, &extent_end,
    					    EXTENT_DIRTY);
    		if (ret)
    			break;
    
    		if (extent_start == start) {
    			start = extent_end + 1;
    		} else if (extent_start > start && extent_start < end) {
    			size = extent_start - start;
    
    			ret = btrfs_add_free_space(block_group, start,
    						   size);
    
    			BUG_ON(ret);
    			start = extent_end + 1;
    		} else {
    			break;
    		}
    	}
    
    	if (start < end) {
    		size = end - start;
    
    		ret = btrfs_add_free_space(block_group, start, size);
    
    		BUG_ON(ret);
    	}
    
    	mutex_unlock(&info->pinned_mutex);
    
    Yan Zheng's avatar
    Yan Zheng committed
    static int remove_sb_from_cache(struct btrfs_root *root,
    				struct btrfs_block_group_cache *cache)
    {
    	u64 bytenr;
    	u64 *logical;
    	int stripe_len;
    	int i, nr, ret;
    
    	for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
    		bytenr = btrfs_sb_offset(i);
    		ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
    				       cache->key.objectid, bytenr, 0,
    				       &logical, &nr, &stripe_len);
    		BUG_ON(ret);
    		while (nr--) {
    			btrfs_remove_free_space(cache, logical[nr],
    						stripe_len);
    		}
    		kfree(logical);
    	}
    	return 0;
    }
    
    
    static int cache_block_group(struct btrfs_root *root,
    			     struct btrfs_block_group_cache *block_group)
    {
    	struct btrfs_path *path;
    
    	struct btrfs_key key;
    
    	struct extent_buffer *leaf;
    
    Yan Zheng's avatar
    Yan Zheng committed
    	u64 last = block_group->key.objectid;
    
    	root = root->fs_info->extent_root;
    
    	if (block_group->cached)
    		return 0;
    
    	path = btrfs_alloc_path();
    	if (!path)
    		return -ENOMEM;
    
    	/*
    	 * we get into deadlocks with paths held by callers of this function.
    	 * since the alloc_mutex is protecting things right now, just
    	 * skip the locking here
    	 */
    	path->skip_locking = 1;
    
    	key.objectid = max_t(u64, last, BTRFS_SUPER_INFO_OFFSET);
    
    	key.offset = 0;
    	btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
    	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
    	if (ret < 0)
    
    		slot = path->slots[0];
    
    		if (slot >= btrfs_header_nritems(leaf)) {
    
    			ret = btrfs_next_leaf(root, path);
    
    			if (ret == 0)
    
    		btrfs_item_key_to_cpu(leaf, &key, slot);
    
    		if (key.objectid < block_group->key.objectid)
    
    		if (key.objectid >= block_group->key.objectid +
    
    		    block_group->key.offset)
    
    		if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) {
    
    			add_new_free_space(block_group, root->fs_info, last,
    					   key.objectid);
    
    
    			last = key.objectid + key.offset;
    
    	add_new_free_space(block_group, root->fs_info, last,
    			   block_group->key.objectid +
    			   block_group->key.offset);
    
    
    Yan Zheng's avatar
    Yan Zheng committed
    	remove_sb_from_cache(root, block_group);
    
    	block_group->cached = 1;
    
    	btrfs_free_path(path);
    
    /*
     * return the block group that starts at or after bytenr
     */
    
    static struct btrfs_block_group_cache *btrfs_lookup_first_block_group(struct
    
    						       btrfs_fs_info *info,
    							 u64 bytenr)
    {
    
    	struct btrfs_block_group_cache *cache;
    
    	cache = block_group_cache_tree_search(info, bytenr, 0);
    
    	return cache;
    
    /*
     * return the block group that contains teh given bytenr
     */
    
    struct btrfs_block_group_cache *btrfs_lookup_block_group(struct
    							 btrfs_fs_info *info,
    
    	struct btrfs_block_group_cache *cache;
    
    	cache = block_group_cache_tree_search(info, bytenr, 1);
    
    	return cache;
    
    static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
    						  u64 flags)
    
    	struct list_head *head = &info->space_info;
    	struct list_head *cur;
    	struct btrfs_space_info *found;
    	list_for_each(cur, head) {
    		found = list_entry(cur, struct btrfs_space_info, list);
    		if (found->flags == flags)
    			return found;
    	}
    	return NULL;
    
    static u64 div_factor(u64 num, int factor)
    {
    	if (factor == 10)
    		return num;
    	num *= factor;
    	do_div(num, 10);
    	return num;
    }
    
    
    static struct btrfs_block_group_cache *
    __btrfs_find_block_group(struct btrfs_root *root,
    			 struct btrfs_block_group_cache *hint,
    			 u64 search_start, int data, int owner)
    
    	struct btrfs_block_group_cache *cache;
    
    	struct btrfs_block_group_cache *found_group = NULL;
    
    	struct btrfs_fs_info *info = root->fs_info;
    	u64 used;
    
    	int full_search = 0;
    
    	int wrapped = 0;
    
    	if (data & BTRFS_BLOCK_GROUP_METADATA)
    		factor = 9;
    
    	if (search_start) {
    
    		struct btrfs_block_group_cache *shint;
    
    		shint = btrfs_lookup_first_block_group(info, search_start);
    
    Yan Zheng's avatar
    Yan Zheng committed
    		if (shint && block_group_bits(shint, data)) {
    
    			spin_lock(&shint->lock);
    
    			used = btrfs_block_group_used(&shint->item);
    
    			if (used + shint->pinned + shint->reserved <
    
    			    div_factor(shint->key.offset, factor)) {
    
    				spin_unlock(&shint->lock);
    
    				return shint;
    			}
    
    			spin_unlock(&shint->lock);
    
    Yan Zheng's avatar
    Yan Zheng committed
    	if (hint && block_group_bits(hint, data)) {
    
    		spin_lock(&hint->lock);
    
    		used = btrfs_block_group_used(&hint->item);
    
    		if (used + hint->pinned + hint->reserved <
    
    		    div_factor(hint->key.offset, factor)) {
    
    			spin_unlock(&hint->lock);
    
    		spin_unlock(&hint->lock);
    
    		last = hint->key.objectid + hint->key.offset;
    
    			last = max(hint->key.objectid, search_start);
    
    			last = search_start;
    
    	while (1) {
    		cache = btrfs_lookup_first_block_group(root->fs_info, last);
    
    		if (!cache)
    			break;
    
    		spin_lock(&cache->lock);
    
    		last = cache->key.objectid + cache->key.offset;
    		used = btrfs_block_group_used(&cache->item);
    
    
    Yan Zheng's avatar
    Yan Zheng committed
    		if (block_group_bits(cache, data)) {
    
    			free_check = div_factor(cache->key.offset, factor);
    
    			if (used + cache->pinned + cache->reserved <
    			    free_check) {
    
    				found_group = cache;
    
    				spin_unlock(&cache->lock);
    
    		spin_unlock(&cache->lock);
    
    	if (!wrapped) {
    		last = search_start;
    		wrapped = 1;
    		goto again;
    	}
    	if (!full_search && factor < 10) {
    
    		last = search_start;
    
    		factor = 10;
    
    	return found_group;
    
    struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root,
    						 struct btrfs_block_group_cache
    						 *hint, u64 search_start,
    						 int data, int owner)
    {
    
    	struct btrfs_block_group_cache *ret;
    	ret = __btrfs_find_block_group(root, hint, search_start, data, owner);
    	return ret;
    }
    
    /* simple helper to search for an existing extent at a given offset */
    
    int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len)
    
    	struct btrfs_path *path;
    
    	path = btrfs_alloc_path();
    	BUG_ON(!path);
    
    	key.objectid = start;
    	key.offset = len;
    	btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
    	ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
    				0, 0);
    
    	btrfs_free_path(path);
    
    /*
     * Back reference rules.  Back refs have three main goals:
     *
     * 1) differentiate between all holders of references to an extent so that
     *    when a reference is dropped we can make sure it was a valid reference
     *    before freeing the extent.
     *
     * 2) Provide enough information to quickly find the holders of an extent
     *    if we notice a given block is corrupted or bad.
     *
     * 3) Make it easy to migrate blocks for FS shrinking or storage pool
     *    maintenance.  This is actually the same as #2, but with a slightly
     *    different use case.
     *
     * File extents can be referenced by:
     *
     * - multiple snapshots, subvolumes, or different generations in one subvol
    
     * - different files inside a single subvolume
    
     * - different offsets inside a file (bookend extents in file.c)
     *
     * The extent ref structure has fields for:
     *
     * - Objectid of the subvolume root
     * - Generation number of the tree holding the reference
     * - objectid of the file holding the reference
    
     * - number of references holding by parent node (alway 1 for tree blocks)
     *
     * Btree leaf may hold multiple references to a file extent. In most cases,
     * these references are from same file and the corresponding offsets inside
    
     * the file are close together.
    
     *
     * When a file extent is allocated the fields are filled in:
    
     *     (root_key.objectid, trans->transid, inode objectid, 1)
    
     *
     * When a leaf is cow'd new references are added for every file extent found
    
     * in the leaf.  It looks similar to the create case, but trans->transid will
     * be different when the block is cow'd.
    
     *     (root_key.objectid, trans->transid, inode objectid,
    
     *      number of references in the leaf)
    
     * When a file extent is removed either during snapshot deletion or
     * file truncation, we find the corresponding back reference and check
     * the following fields:
    
     *     (btrfs_header_owner(leaf), btrfs_header_generation(leaf),
     *      inode objectid)
    
     *
     * Btree extents can be referenced by:
     *
     * - Different subvolumes
     * - Different generations of the same subvolume
     *
     * When a tree block is created, back references are inserted:
     *
    
     * (root->root_key.objectid, trans->transid, level, 1)
    
     * When a tree block is cow'd, new back references are added for all the
     * blocks it points to. If the tree block isn't in reference counted root,
     * the old back references are removed. These new back references are of
     * the form (trans->transid will have increased since creation):
    
     * (root->root_key.objectid, trans->transid, level, 1)
    
     * When a backref is in deleting, the following fields are checked:
    
     *     (btrfs_header_owner(itself), btrfs_header_generation(itself), level)
    
     *     (btrfs_header_owner(parent), btrfs_header_generation(parent), level)
    
     * Back Reference Key composing:
    
     * The key objectid corresponds to the first byte in the extent, the key
     * type is set to BTRFS_EXTENT_REF_KEY, and the key offset is the first
     * byte of parent extent. If a extent is tree root, the key offset is set
     * to the key objectid.
    
    
    static int noinline lookup_extent_backref(struct btrfs_trans_handle *trans,
    					  struct btrfs_root *root,
    
    					  struct btrfs_path *path,
    					  u64 bytenr, u64 parent,
    					  u64 ref_root, u64 ref_generation,
    					  u64 owner_objectid, int del)
    
    	struct btrfs_extent_ref *ref;
    	struct extent_buffer *leaf;
    
    	key.objectid = bytenr;
    	key.type = BTRFS_EXTENT_REF_KEY;
    	key.offset = parent;
    
    	ret = btrfs_search_slot(trans, root, &key, path, del ? -1 : 0, 1);
    	if (ret < 0)
    		goto out;
    	if (ret > 0) {
    		ret = -ENOENT;
    		goto out;
    	}
    
    	leaf = path->nodes[0];
    	ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref);
    
    	ref_objectid = btrfs_ref_objectid(leaf, ref);
    
    	if (btrfs_ref_root(leaf, ref) != ref_root ||
    
    	    btrfs_ref_generation(leaf, ref) != ref_generation ||
    	    (ref_objectid != owner_objectid &&
    	     ref_objectid != BTRFS_MULTIPLE_OBJECTIDS)) {
    
    		ret = -EIO;
    		WARN_ON(1);
    		goto out;
    	}
    	ret = 0;
    out:
    	return ret;
    }
    
    
    /*
     * updates all the backrefs that are pending on update_list for the
     * extent_root
     */
    static int noinline update_backrefs(struct btrfs_trans_handle *trans,
    				    struct btrfs_root *extent_root,
    				    struct btrfs_path *path,
    				    struct list_head *update_list)
    {
    	struct btrfs_key key;
    	struct btrfs_extent_ref *ref;
    	struct btrfs_fs_info *info = extent_root->fs_info;
    	struct pending_extent_op *op;
    	struct extent_buffer *leaf;
    	int ret = 0;
    	struct list_head *cur = update_list->next;
    	u64 ref_objectid;
    	u64 ref_root = extent_root->root_key.objectid;
    
    	op = list_entry(cur, struct pending_extent_op, list);
    
    search:
    	key.objectid = op->bytenr;
    	key.type = BTRFS_EXTENT_REF_KEY;
    	key.offset = op->orig_parent;
    
    	ret = btrfs_search_slot(trans, extent_root, &key, path, 0, 1);
    	BUG_ON(ret);
    
    	leaf = path->nodes[0];
    
    loop:
    	ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref);
    
    	ref_objectid = btrfs_ref_objectid(leaf, ref);
    
    	if (btrfs_ref_root(leaf, ref) != ref_root ||
    	    btrfs_ref_generation(leaf, ref) != op->orig_generation ||
    	    (ref_objectid != op->level &&
    	     ref_objectid != BTRFS_MULTIPLE_OBJECTIDS)) {
    		printk(KERN_ERR "couldn't find %Lu, parent %Lu, root %Lu, "
    		       "owner %u\n", op->bytenr, op->orig_parent,
    		       ref_root, op->level);
    		btrfs_print_leaf(extent_root, leaf);
    		BUG();
    	}
    
    	key.objectid = op->bytenr;
    	key.offset = op->parent;
    	key.type = BTRFS_EXTENT_REF_KEY;
    	ret = btrfs_set_item_key_safe(trans, extent_root, path, &key);
    	BUG_ON(ret);
    	ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref);
    	btrfs_set_ref_generation(leaf, ref, op->generation);
    
    	cur = cur->next;
    
    	list_del_init(&op->list);
    	unlock_extent(&info->extent_ins, op->bytenr,
    		      op->bytenr + op->num_bytes - 1, GFP_NOFS);
    	kfree(op);
    
    	if (cur == update_list) {
    		btrfs_mark_buffer_dirty(path->nodes[0]);
    		btrfs_release_path(extent_root, path);
    		goto out;
    	}
    
    	op = list_entry(cur, struct pending_extent_op, list);
    
    	path->slots[0]++;
    	while (path->slots[0] < btrfs_header_nritems(leaf)) {
    		btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
    		if (key.objectid == op->bytenr &&
    		    key.type == BTRFS_EXTENT_REF_KEY)
    			goto loop;
    		path->slots[0]++;
    	}
    
    	btrfs_mark_buffer_dirty(path->nodes[0]);
    	btrfs_release_path(extent_root, path);
    	goto search;
    
    out:
    	return 0;
    }
    
    static int noinline insert_extents(struct btrfs_trans_handle *trans,
    				   struct btrfs_root *extent_root,
    				   struct btrfs_path *path,
    				   struct list_head *insert_list, int nr)
    {
    	struct btrfs_key *keys;
    	u32 *data_size;
    	struct pending_extent_op *op;
    	struct extent_buffer *leaf;
    	struct list_head *cur = insert_list->next;
    	struct btrfs_fs_info *info = extent_root->fs_info;
    	u64 ref_root = extent_root->root_key.objectid;
    	int i = 0, last = 0, ret;
    	int total = nr * 2;
    
    	if (!nr)
    		return 0;
    
    	keys = kzalloc(total * sizeof(struct btrfs_key), GFP_NOFS);
    	if (!keys)
    		return -ENOMEM;
    
    	data_size = kzalloc(total * sizeof(u32), GFP_NOFS);
    	if (!data_size) {
    		kfree(keys);
    		return -ENOMEM;
    	}
    
    	list_for_each_entry(op, insert_list, list) {
    		keys[i].objectid = op->bytenr;
    		keys[i].offset = op->num_bytes;
    		keys[i].type = BTRFS_EXTENT_ITEM_KEY;
    		data_size[i] = sizeof(struct btrfs_extent_item);
    		i++;
    
    		keys[i].objectid = op->bytenr;
    		keys[i].offset = op->parent;
    		keys[i].type = BTRFS_EXTENT_REF_KEY;
    		data_size[i] = sizeof(struct btrfs_extent_ref);
    		i++;
    	}
    
    	op = list_entry(cur, struct pending_extent_op, list);
    	i = 0;
    	while (i < total) {
    		int c;
    		ret = btrfs_insert_some_items(trans, extent_root, path,
    					      keys+i, data_size+i, total-i);
    		BUG_ON(ret < 0);
    
    		if (last && ret > 1)
    			BUG();
    
    		leaf = path->nodes[0];
    		for (c = 0; c < ret; c++) {
    			int ref_first = keys[i].type == BTRFS_EXTENT_REF_KEY;
    
    			/*
    			 * if the first item we inserted was a backref, then
    			 * the EXTENT_ITEM will be the odd c's, else it will
    			 * be the even c's
    			 */
    			if ((ref_first && (c % 2)) ||
    			    (!ref_first && !(c % 2))) {
    				struct btrfs_extent_item *itm;
    
    				itm = btrfs_item_ptr(leaf, path->slots[0] + c,
    						     struct btrfs_extent_item);
    				btrfs_set_extent_refs(path->nodes[0], itm, 1);
    				op->del++;
    			} else {
    				struct btrfs_extent_ref *ref;
    
    				ref = btrfs_item_ptr(leaf, path->slots[0] + c,
    						     struct btrfs_extent_ref);
    				btrfs_set_ref_root(leaf, ref, ref_root);
    				btrfs_set_ref_generation(leaf, ref,
    							 op->generation);
    				btrfs_set_ref_objectid(leaf, ref, op->level);
    				btrfs_set_ref_num_refs(leaf, ref, 1);
    				op->del++;
    			}
    
    			/*
    			 * using del to see when its ok to free up the
    			 * pending_extent_op.  In the case where we insert the
    			 * last item on the list in order to help do batching
    			 * we need to not free the extent op until we actually
    			 * insert the extent_item
    			 */
    			if (op->del == 2) {
    				unlock_extent(&info->extent_ins, op->bytenr,
    					      op->bytenr + op->num_bytes - 1,
    					      GFP_NOFS);
    				cur = cur->next;
    				list_del_init(&op->list);
    				kfree(op);
    				if (cur != insert_list)
    					op = list_entry(cur,
    						struct pending_extent_op,
    						list);
    			}
    		}
    		btrfs_mark_buffer_dirty(leaf);
    		btrfs_release_path(extent_root, path);
    
    		/*
    		 * Ok backref's and items usually go right next to eachother,
    		 * but if we could only insert 1 item that means that we
    		 * inserted on the end of a leaf, and we have no idea what may
    		 * be on the next leaf so we just play it safe.  In order to
    		 * try and help this case we insert the last thing on our
    		 * insert list so hopefully it will end up being the last
    		 * thing on the leaf and everything else will be before it,
    		 * which will let us insert a whole bunch of items at the same
    		 * time.
    		 */
    		if (ret == 1 && !last && (i + ret < total)) {
    			/*
    			 * last: where we will pick up the next time around
    			 * i: our current key to insert, will be total - 1
    			 * cur: the current op we are screwing with
    			 * op: duh
    			 */
    			last = i + ret;
    			i = total - 1;
    			cur = insert_list->prev;
    			op = list_entry(cur, struct pending_extent_op, list);
    		} else if (last) {
    			/*
    			 * ok we successfully inserted the last item on the
    			 * list, lets reset everything
    			 *
    			 * i: our current key to insert, so where we left off
    			 *    last time
    			 * last: done with this
    			 * cur: the op we are messing with
    			 * op: duh
    			 * total: since we inserted the last key, we need to
    			 *        decrement total so we dont overflow
    			 */
    			i = last;
    			last = 0;
    			total--;
    
    			if (i < total) {
    				cur = insert_list->next;
    				op = list_entry(cur, struct pending_extent_op,
    						list);
    			}
    
    		} else {
    			i += ret;
    		}
    
    		cond_resched();
    	}
    	ret = 0;
    	kfree(keys);
    	kfree(data_size);
    	return ret;
    }
    
    
    static int noinline insert_extent_backref(struct btrfs_trans_handle *trans,
    					  struct btrfs_root *root,
    					  struct btrfs_path *path,
    					  u64 bytenr, u64 parent,
    					  u64 ref_root, u64 ref_generation,
    
    {
    	struct btrfs_key key;
    	struct extent_buffer *leaf;
    	struct btrfs_extent_ref *ref;
    	u32 num_refs;
    	int ret;
    
    
    	key.objectid = bytenr;
    	key.type = BTRFS_EXTENT_REF_KEY;
    
    	key.offset = parent;
    
    	ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(*ref));
    	if (ret == 0) {
    		leaf = path->nodes[0];
    		ref = btrfs_item_ptr(leaf, path->slots[0],
    				     struct btrfs_extent_ref);
    		btrfs_set_ref_root(leaf, ref, ref_root);
    		btrfs_set_ref_generation(leaf, ref, ref_generation);
    		btrfs_set_ref_objectid(leaf, ref, owner_objectid);
    		btrfs_set_ref_num_refs(leaf, ref, 1);
    	} else if (ret == -EEXIST) {
    		u64 existing_owner;
    		BUG_ON(owner_objectid < BTRFS_FIRST_FREE_OBJECTID);
    		leaf = path->nodes[0];
    		ref = btrfs_item_ptr(leaf, path->slots[0],
    				     struct btrfs_extent_ref);
    		if (btrfs_ref_root(leaf, ref) != ref_root ||
    		    btrfs_ref_generation(leaf, ref) != ref_generation) {
    			ret = -EIO;
    			WARN_ON(1);
    
    		}
    
    		num_refs = btrfs_ref_num_refs(leaf, ref);
    		BUG_ON(num_refs == 0);
    		btrfs_set_ref_num_refs(leaf, ref, num_refs + 1);
    
    		existing_owner = btrfs_ref_objectid(leaf, ref);
    
    		if (existing_owner != owner_objectid &&
    		    existing_owner != BTRFS_MULTIPLE_OBJECTIDS) {
    
    			btrfs_set_ref_objectid(leaf, ref,
    					BTRFS_MULTIPLE_OBJECTIDS);
    		}
    		ret = 0;
    	} else {
    
    	btrfs_mark_buffer_dirty(path->nodes[0]);
    out:
    	btrfs_release_path(root, path);
    	return ret;
    
    static int noinline remove_extent_backref(struct btrfs_trans_handle *trans,
    					  struct btrfs_root *root,
    					  struct btrfs_path *path)
    {
    	struct extent_buffer *leaf;
    	struct btrfs_extent_ref *ref;
    	u32 num_refs;
    	int ret = 0;
    
    	leaf = path->nodes[0];
    	ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref);
    	num_refs = btrfs_ref_num_refs(leaf, ref);
    	BUG_ON(num_refs == 0);
    	num_refs -= 1;
    	if (num_refs == 0) {
    		ret = btrfs_del_item(trans, root, path);
    	} else {
    		btrfs_set_ref_num_refs(leaf, ref, num_refs);
    		btrfs_mark_buffer_dirty(leaf);
    	}
    	btrfs_release_path(root, path);
    	return ret;
    }
    
    
    Chris Mason's avatar
    Chris Mason committed
    #ifdef BIO_RW_DISCARD
    
    static void btrfs_issue_discard(struct block_device *bdev,
    				u64 start, u64 len)
    {
    #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,28)
    	blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL);
    #else
    	blkdev_issue_discard(bdev, start >> 9, len >> 9);
    #endif
    }
    
    Chris Mason's avatar
    Chris Mason committed
    #endif
    
    static int noinline free_extents(struct btrfs_trans_handle *trans,
    				 struct btrfs_root *extent_root,
    				 struct list_head *del_list)
    {
    	struct btrfs_fs_info *info = extent_root->fs_info;
    	struct btrfs_path *path;
    	struct btrfs_key key, found_key;
    	struct extent_buffer *leaf;
    	struct list_head *cur;
    	struct pending_extent_op *op;
    	struct btrfs_extent_item *ei;
    	int ret, num_to_del, extent_slot = 0, found_extent = 0;
    	u32 refs;
    	u64 bytes_freed = 0;
    
    	path = btrfs_alloc_path();
    	if (!path)
    		return -ENOMEM;
    	path->reada = 1;
    
    search:
    	/* search for the backref for the current ref we want to delete */
    	cur = del_list->next;
    	op = list_entry(cur, struct pending_extent_op, list);
    	ret = lookup_extent_backref(trans, extent_root, path, op->bytenr,
    				    op->orig_parent,
    				    extent_root->root_key.objectid,
    				    op->orig_generation, op->level, 1);
    	if (ret) {
    		printk("Unable to find backref byte nr %Lu root %Lu gen %Lu "
    		       "owner %u\n", op->bytenr,
    		       extent_root->root_key.objectid, op->orig_generation,
    		       op->level);
    		btrfs_print_leaf(extent_root, path->nodes[0]);
    		WARN_ON(1);
    		goto out;
    	}
    
    	extent_slot = path->slots[0];
    	num_to_del = 1;
    	found_extent = 0;
    
    	/*
    	 * if we aren't the first item on the leaf we can move back one and see
    	 * if our ref is right next to our extent item
    	 */
    	if (likely(extent_slot)) {
    		extent_slot--;
    		btrfs_item_key_to_cpu(path->nodes[0], &found_key,
    				      extent_slot);
    		if (found_key.objectid == op->bytenr &&
    		    found_key.type == BTRFS_EXTENT_ITEM_KEY &&
    		    found_key.offset == op->num_bytes) {
    			num_to_del++;
    			found_extent = 1;
    		}
    	}
    
    	/*
    	 * if we didn't find the extent we need to delete the backref and then
    	 * search for the extent item key so we can update its ref count
    	 */
    	if (!found_extent) {
    		key.objectid = op->bytenr;
    		key.type = BTRFS_EXTENT_ITEM_KEY;
    		key.offset = op->num_bytes;
    
    		ret = remove_extent_backref(trans, extent_root, path);
    		BUG_ON(ret);
    		btrfs_release_path(extent_root, path);
    		ret = btrfs_search_slot(trans, extent_root, &key, path, -1, 1);
    		BUG_ON(ret);
    		extent_slot = path->slots[0];
    	}
    
    	/* this is where we update the ref count for the extent */
    	leaf = path->nodes[0];
    	ei = btrfs_item_ptr(leaf, extent_slot, struct btrfs_extent_item);
    	refs = btrfs_extent_refs(leaf, ei);
    	BUG_ON(refs == 0);