extent-tree.c

/*
 * Copyright (C) 2007 Oracle.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public
 * License v2 as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public
 * License along with this program; if not, write to the
 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
 * Boston, MA 021110-1307, USA.
 */
#include <linux/sched.h>
#include <linux/pagemap.h>
#include <linux/writeback.h>
#include <linux/blkdev.h>
#include "compat.h"
#include "hash.h"
#include "crc32c.h"
#include "ctree.h"
#include "disk-io.h"
#include "print-tree.h"
#include "transaction.h"
#include "volumes.h"
#include "locking.h"
#include "ref-cache.h"

#define PENDING_EXTENT_INSERT 0
#define PENDING_EXTENT_DELETE 1
#define PENDING_BACKREF_UPDATE 2

struct pending_extent_op {
	int type;
	u64 bytenr;
	u64 num_bytes;
	u64 parent;
	u64 orig_parent;
	u64 generation;
	u64 orig_generation;
	int level;
	struct list_head list;
	int del;
};

static int finish_current_insert(struct btrfs_trans_handle *trans,
				 struct btrfs_root *extent_root, int all);
static int del_pending_extents(struct btrfs_trans_handle *trans,
			       struct btrfs_root *extent_root, int all);
static int pin_down_bytes(struct btrfs_trans_handle *trans,
			  struct btrfs_root *root,
			  u64 bytenr, u64 num_bytes, int is_data);
static int update_block_group(struct btrfs_trans_handle *trans,
			      struct btrfs_root *root,
			      u64 bytenr, u64 num_bytes, int alloc,
			      int mark_free);

static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits)
{
	return (cache->flags & bits) == bits;
}

/*
 * this adds the block group to the fs_info rb tree for the block group
 * cache
 */
static int btrfs_add_block_group_cache(struct btrfs_fs_info *info,
				struct btrfs_block_group_cache *block_group)
{
	struct rb_node **p;
	struct rb_node *parent = NULL;
	struct btrfs_block_group_cache *cache;

	spin_lock(&info->block_group_cache_lock);
	p = &info->block_group_cache_tree.rb_node;

	while (*p) {
		parent = *p;
		cache = rb_entry(parent, struct btrfs_block_group_cache,
				 cache_node);
		if (block_group->key.objectid < cache->key.objectid) {
			p = &(*p)->rb_left;
		} else if (block_group->key.objectid > cache->key.objectid) {
			p = &(*p)->rb_right;
		} else {
			spin_unlock(&info->block_group_cache_lock);
			return -EEXIST;
		}
	}

	rb_link_node(&block_group->cache_node, parent, p);
	rb_insert_color(&block_group->cache_node,
			&info->block_group_cache_tree);
	spin_unlock(&info->block_group_cache_lock);

	return 0;
}

/*
 * This will return the block group at or after bytenr if contains is 0, else
 * it will return the block group that contains the bytenr
 */
static struct btrfs_block_group_cache *
block_group_cache_tree_search(struct btrfs_fs_info *info, u64 bytenr,
			      int contains)
{
	struct btrfs_block_group_cache *cache, *ret = NULL;
	struct rb_node *n;
	u64 end, start;

	spin_lock(&info->block_group_cache_lock);
	n = info->block_group_cache_tree.rb_node;

	while (n) {
		cache = rb_entry(n, struct btrfs_block_group_cache,
				 cache_node);
		end = cache->key.objectid + cache->key.offset - 1;
		start = cache->key.objectid;

		if (bytenr < start) {
			if (!contains && (!ret || start < ret->key.objectid))
				ret = cache;
			n = n->rb_left;
		} else if (bytenr > start) {
			if (contains && bytenr <= end) {
				ret = cache;
				break;
			}
			n = n->rb_right;
		} else {
			ret = cache;
			break;
		}
	}
	if (ret)
		atomic_inc(&ret->count);
	spin_unlock(&info->block_group_cache_lock);

	return ret;
}

/*
 * this is only called by cache_block_group, since we could have freed extents
 * we need to check the pinned_extents for any extents that can't be used yet
 * since their free space will be released as soon as the transaction commits.
 */
static int add_new_free_space(struct btrfs_block_group_cache *block_group,
			      struct btrfs_fs_info *info, u64 start, u64 end)
{
	u64 extent_start, extent_end, size;
	int ret;

	mutex_lock(&info->pinned_mutex);
	while (start < end) {
		ret = find_first_extent_bit(&info->pinned_extents, start,
					    &extent_start, &extent_end,
					    EXTENT_DIRTY);
		if (ret)
			break;

		if (extent_start == start) {
			start = extent_end + 1;
		} else if (extent_start > start && extent_start < end) {
			size = extent_start - start;
			ret = btrfs_add_free_space(block_group, start,
						   size);
			BUG_ON(ret);
			start = extent_end + 1;
		} else {
			break;
		}
	}

	if (start < end) {
		size = end - start;
		ret = btrfs_add_free_space(block_group, start, size);
		BUG_ON(ret);
	}
	mutex_unlock(&info->pinned_mutex);

	return 0;
}

static int remove_sb_from_cache(struct btrfs_root *root,
				struct btrfs_block_group_cache *cache)
{
	u64 bytenr;
	u64 *logical;
	int stripe_len;
	int i, nr, ret;

	for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
		bytenr = btrfs_sb_offset(i);
		ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
				       cache->key.objectid, bytenr, 0,
				       &logical, &nr, &stripe_len);
		BUG_ON(ret);
		while (nr--) {
			btrfs_remove_free_space(cache, logical[nr],
						stripe_len);
		}
		kfree(logical);
	}
	return 0;
}

static int cache_block_group(struct btrfs_root *root,
			     struct btrfs_block_group_cache *block_group)
{
	struct btrfs_path *path;
	int ret = 0;
	struct btrfs_key key;
	struct extent_buffer *leaf;
	int slot;
	u64 last;

	if (!block_group)
		return 0;

	root = root->fs_info->extent_root;

	if (block_group->cached)
		return 0;

	path = btrfs_alloc_path();
	if (!path)
		return -ENOMEM;

	path->reada = 2;
	/*
	 * we get into deadlocks with paths held by callers of this function.
	 * since the alloc_mutex is protecting things right now, just
	 * skip the locking here
	 */
	path->skip_locking = 1;
	last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET);
	key.objectid = last;
	key.offset = 0;
	btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
	if (ret < 0)
		goto err;

	while (1) {
		leaf = path->nodes[0];
		slot = path->slots[0];
		if (slot >= btrfs_header_nritems(leaf)) {
			ret = btrfs_next_leaf(root, path);
			if (ret < 0)
				goto err;
			if (ret == 0)
				continue;
			else
				break;
		}
		btrfs_item_key_to_cpu(leaf, &key, slot);
		if (key.objectid < block_group->key.objectid)
			goto next;

		if (key.objectid >= block_group->key.objectid +
		    block_group->key.offset)
			break;

		if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) {
			add_new_free_space(block_group, root->fs_info, last,
					   key.objectid);

			last = key.objectid + key.offset;
		}
next:
		path->slots[0]++;
	}

	add_new_free_space(block_group, root->fs_info, last,
			   block_group->key.objectid +
			   block_group->key.offset);

	remove_sb_from_cache(root, block_group);
	block_group->cached = 1;
	ret = 0;
err:
	btrfs_free_path(path);
	return ret;
}

/*
 * return the block group that starts at or after bytenr
 */
static struct btrfs_block_group_cache *
btrfs_lookup_first_block_group(struct btrfs_fs_info *info, u64 bytenr)
{
	struct btrfs_block_group_cache *cache;

	cache = block_group_cache_tree_search(info, bytenr, 0);

	return cache;
}

/*
 * return the block group that contains teh given bytenr
 */
struct btrfs_block_group_cache *btrfs_lookup_block_group(
						 struct btrfs_fs_info *info,
						 u64 bytenr)
{
	struct btrfs_block_group_cache *cache;

	cache = block_group_cache_tree_search(info, bytenr, 1);

	return cache;
}

static inline void put_block_group(struct btrfs_block_group_cache *cache)
{
	if (atomic_dec_and_test(&cache->count))
		kfree(cache);
}

static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
						  u64 flags)
{
	struct list_head *head = &info->space_info;
	struct btrfs_space_info *found;
	list_for_each_entry(found, head, list) {
		if (found->flags == flags)
			return found;
	}
	return NULL;
}

static u64 div_factor(u64 num, int factor)
{
	if (factor == 10)
		return num;
	num *= factor;
	do_div(num, 10);
	return num;
}

u64 btrfs_find_block_group(struct btrfs_root *root,
			   u64 search_start, u64 search_hint, int owner)
{
	struct btrfs_block_group_cache *cache;
	u64 used;
	u64 last = max(search_hint, search_start);
	u64 group_start = 0;
	int full_search = 0;
	int factor = 9;
	int wrapped = 0;
again:
	while (1) {
		cache = btrfs_lookup_first_block_group(root->fs_info, last);
		if (!cache)
			break;

		spin_lock(&cache->lock);
		last = cache->key.objectid + cache->key.offset;
		used = btrfs_block_group_used(&cache->item);

		if ((full_search || !cache->ro) &&
		    block_group_bits(cache, BTRFS_BLOCK_GROUP_METADATA)) {
			if (used + cache->pinned + cache->reserved <
			    div_factor(cache->key.offset, factor)) {
				group_start = cache->key.objectid;
				spin_unlock(&cache->lock);
				put_block_group(cache);
				goto found;
			}
		}
		spin_unlock(&cache->lock);
		put_block_group(cache);
		cond_resched();
	}
	if (!wrapped) {
		last = search_start;
		wrapped = 1;
		goto again;
	}
	if (!full_search && factor < 10) {
		last = search_start;
		full_search = 1;
		factor = 10;
		goto again;
	}
found:
	return group_start;
}

/* simple helper to search for an existing extent at a given offset */
int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len)
{
	int ret;
	struct btrfs_key key;
	struct btrfs_path *path;

	path = btrfs_alloc_path();
	BUG_ON(!path);
	key.objectid = start;
	key.offset = len;
	btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
	ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
				0, 0);
	btrfs_free_path(path);
	return ret;
}

/*
 * Back reference rules.  Back refs have three main goals:
 *
 * 1) differentiate between all holders of references to an extent so that
 *    when a reference is dropped we can make sure it was a valid reference
 *    before freeing the extent.
 *
 * 2) Provide enough information to quickly find the holders of an extent
 *    if we notice a given block is corrupted or bad.
 *
 * 3) Make it easy to migrate blocks for FS shrinking or storage pool
 *    maintenance.  This is actually the same as #2, but with a slightly
 *    different use case.
 *
 * File extents can be referenced by:
 *
 * - multiple snapshots, subvolumes, or different generations in one subvol
 * - different files inside a single subvolume
 * - different offsets inside a file (bookend extents in file.c)
 *
 * The extent ref structure has fields for:
 *
 * - Objectid of the subvolume root
 * - Generation number of the tree holding the reference
 * - objectid of the file holding the reference
 * - number of references holding by parent node (alway 1 for tree blocks)
 *
 * Btree leaf may hold multiple references to a file extent. In most cases,
 * these references are from same file and the corresponding offsets inside
 * the file are close together.
 *
 * When a file extent is allocated the fields are filled in:
 *     (root_key.objectid, trans->transid, inode objectid, 1)
 *
 * When a leaf is cow'd new references are added for every file extent found
 * in the leaf.  It looks similar to the create case, but trans->transid will
 * be different when the block is cow'd.
 *
 *     (root_key.objectid, trans->transid, inode objectid,
 *      number of references in the leaf)
 *
 * When a file extent is removed either during snapshot deletion or
 * file truncation, we find the corresponding back reference and check
 * the following fields:
 *
 *     (btrfs_header_owner(leaf), btrfs_header_generation(leaf),
 *      inode objectid)
 *
 * Btree extents can be referenced by:
 *
 * - Different subvolumes
 * - Different generations of the same subvolume
 *
 * When a tree block is created, back references are inserted:
 *
 * (root->root_key.objectid, trans->transid, level, 1)
 *
 * When a tree block is cow'd, new back references are added for all the
 * blocks it points to. If the tree block isn't in reference counted root,
 * the old back references are removed. These new back references are of
 * the form (trans->transid will have increased since creation):
 *
 * (root->root_key.objectid, trans->transid, level, 1)
 *
 * When a backref is in deleting, the following fields are checked:
 *
 * if backref was for a tree root:
 *     (btrfs_header_owner(itself), btrfs_header_generation(itself), level)
 * else
 *     (btrfs_header_owner(parent), btrfs_header_generation(parent), level)
 *
 * Back Reference Key composing:
 *
 * The key objectid corresponds to the first byte in the extent, the key
 * type is set to BTRFS_EXTENT_REF_KEY, and the key offset is the first
 * byte of parent extent. If a extent is tree root, the key offset is set
 * to the key objectid.
 */

static noinline int lookup_extent_backref(struct btrfs_trans_handle *trans,
					  struct btrfs_root *root,
					  struct btrfs_path *path,
					  u64 bytenr, u64 parent,
					  u64 ref_root, u64 ref_generation,
					  u64 owner_objectid, int del)
{
	struct btrfs_key key;
	struct btrfs_extent_ref *ref;
	struct extent_buffer *leaf;
	u64 ref_objectid;
	int ret;

	key.objectid = bytenr;
	key.type = BTRFS_EXTENT_REF_KEY;
	key.offset = parent;

	ret = btrfs_search_slot(trans, root, &key, path, del ? -1 : 0, 1);
	if (ret < 0)
		goto out;
	if (ret > 0) {
		ret = -ENOENT;
		goto out;
	}

	leaf = path->nodes[0];
	ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref);
	ref_objectid = btrfs_ref_objectid(leaf, ref);
	if (btrfs_ref_root(leaf, ref) != ref_root ||
	    btrfs_ref_generation(leaf, ref) != ref_generation ||
	    (ref_objectid != owner_objectid &&
	     ref_objectid != BTRFS_MULTIPLE_OBJECTIDS)) {
		ret = -EIO;
		WARN_ON(1);
		goto out;
	}
	ret = 0;
out:
	return ret;
}

/*
 * updates all the backrefs that are pending on update_list for the
 * extent_root
 */
static noinline int update_backrefs(struct btrfs_trans_handle *trans,
				    struct btrfs_root *extent_root,
				    struct btrfs_path *path,
				    struct list_head *update_list)
{
	struct btrfs_key key;
	struct btrfs_extent_ref *ref;
	struct btrfs_fs_info *info = extent_root->fs_info;
	struct pending_extent_op *op;
	struct extent_buffer *leaf;
	int ret = 0;
	struct list_head *cur = update_list->next;
	u64 ref_objectid;
	u64 ref_root = extent_root->root_key.objectid;

	op = list_entry(cur, struct pending_extent_op, list);

search:
	key.objectid = op->bytenr;
	key.type = BTRFS_EXTENT_REF_KEY;
	key.offset = op->orig_parent;

	ret = btrfs_search_slot(trans, extent_root, &key, path, 0, 1);
	BUG_ON(ret);

	leaf = path->nodes[0];

loop:
	ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref);

	ref_objectid = btrfs_ref_objectid(leaf, ref);

	if (btrfs_ref_root(leaf, ref) != ref_root ||
	    btrfs_ref_generation(leaf, ref) != op->orig_generation ||
	    (ref_objectid != op->level &&
	     ref_objectid != BTRFS_MULTIPLE_OBJECTIDS)) {
		printk(KERN_ERR "btrfs couldn't find %llu, parent %llu, "
		       "root %llu, owner %u\n",
		       (unsigned long long)op->bytenr,
		       (unsigned long long)op->orig_parent,
		       (unsigned long long)ref_root, op->level);
		btrfs_print_leaf(extent_root, leaf);
		BUG();
	}

	key.objectid = op->bytenr;
	key.offset = op->parent;
	key.type = BTRFS_EXTENT_REF_KEY;
	ret = btrfs_set_item_key_safe(trans, extent_root, path, &key);
	BUG_ON(ret);
	ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref);
	btrfs_set_ref_generation(leaf, ref, op->generation);

	cur = cur->next;

	list_del_init(&op->list);
	unlock_extent(&info->extent_ins, op->bytenr,
		      op->bytenr + op->num_bytes - 1, GFP_NOFS);
	kfree(op);

	if (cur == update_list) {
		btrfs_mark_buffer_dirty(path->nodes[0]);
		btrfs_release_path(extent_root, path);
		goto out;
	}

	op = list_entry(cur, struct pending_extent_op, list);

	path->slots[0]++;
	while (path->slots[0] < btrfs_header_nritems(leaf)) {
		btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
		if (key.objectid == op->bytenr &&
		    key.type == BTRFS_EXTENT_REF_KEY)
			goto loop;
		path->slots[0]++;
	}

	btrfs_mark_buffer_dirty(path->nodes[0]);
	btrfs_release_path(extent_root, path);
	goto search;

out:
	return 0;
}

static noinline int insert_extents(struct btrfs_trans_handle *trans,
				   struct btrfs_root *extent_root,
				   struct btrfs_path *path,
				   struct list_head *insert_list, int nr)
{
	struct btrfs_key *keys;
	u32 *data_size;
	struct pending_extent_op *op;
	struct extent_buffer *leaf;
	struct list_head *cur = insert_list->next;
	struct btrfs_fs_info *info = extent_root->fs_info;
	u64 ref_root = extent_root->root_key.objectid;
	int i = 0, last = 0, ret;
	int total = nr * 2;

	if (!nr)
		return 0;

	keys = kzalloc(total * sizeof(struct btrfs_key), GFP_NOFS);
	if (!keys)
		return -ENOMEM;

	data_size = kzalloc(total * sizeof(u32), GFP_NOFS);
	if (!data_size) {
		kfree(keys);
		return -ENOMEM;
	}

	list_for_each_entry(op, insert_list, list) {
		keys[i].objectid = op->bytenr;
		keys[i].offset = op->num_bytes;
		keys[i].type = BTRFS_EXTENT_ITEM_KEY;
		data_size[i] = sizeof(struct btrfs_extent_item);
		i++;

		keys[i].objectid = op->bytenr;
		keys[i].offset = op->parent;
		keys[i].type = BTRFS_EXTENT_REF_KEY;
		data_size[i] = sizeof(struct btrfs_extent_ref);
		i++;
	}

	op = list_entry(cur, struct pending_extent_op, list);
	i = 0;
	while (i < total) {
		int c;
		ret = btrfs_insert_some_items(trans, extent_root, path,
					      keys+i, data_size+i, total-i);
		BUG_ON(ret < 0);

		if (last && ret > 1)
			BUG();

		leaf = path->nodes[0];
		for (c = 0; c < ret; c++) {
			int ref_first = keys[i].type == BTRFS_EXTENT_REF_KEY;

			/*
			 * if the first item we inserted was a backref, then
			 * the EXTENT_ITEM will be the odd c's, else it will
			 * be the even c's
			 */
			if ((ref_first && (c % 2)) ||
			    (!ref_first && !(c % 2))) {
				struct btrfs_extent_item *itm;

				itm = btrfs_item_ptr(leaf, path->slots[0] + c,
						     struct btrfs_extent_item);
				btrfs_set_extent_refs(path->nodes[0], itm, 1);
				op->del++;
			} else {
				struct btrfs_extent_ref *ref;

				ref = btrfs_item_ptr(leaf, path->slots[0] + c,
						     struct btrfs_extent_ref);
				btrfs_set_ref_root(leaf, ref, ref_root);
				btrfs_set_ref_generation(leaf, ref,
							 op->generation);
				btrfs_set_ref_objectid(leaf, ref, op->level);
				btrfs_set_ref_num_refs(leaf, ref, 1);
				op->del++;
			}

			/*
			 * using del to see when its ok to free up the
			 * pending_extent_op.  In the case where we insert the
			 * last item on the list in order to help do batching
			 * we need to not free the extent op until we actually
			 * insert the extent_item
			 */
			if (op->del == 2) {
				unlock_extent(&info->extent_ins, op->bytenr,
					      op->bytenr + op->num_bytes - 1,
					      GFP_NOFS);
				cur = cur->next;
				list_del_init(&op->list);
				kfree(op);
				if (cur != insert_list)
					op = list_entry(cur,
						struct pending_extent_op,
						list);
			}
		}
		btrfs_mark_buffer_dirty(leaf);
		btrfs_release_path(extent_root, path);

		/*
		 * Ok backref's and items usually go right next to eachother,
		 * but if we could only insert 1 item that means that we
		 * inserted on the end of a leaf, and we have no idea what may
		 * be on the next leaf so we just play it safe.  In order to
		 * try and help this case we insert the last thing on our
		 * insert list so hopefully it will end up being the last
		 * thing on the leaf and everything else will be before it,
		 * which will let us insert a whole bunch of items at the same
		 * time.
		 */
		if (ret == 1 && !last && (i + ret < total)) {
			/*
			 * last: where we will pick up the next time around
			 * i: our current key to insert, will be total - 1
			 * cur: the current op we are screwing with
			 * op: duh
			 */
			last = i + ret;
			i = total - 1;
			cur = insert_list->prev;
			op = list_entry(cur, struct pending_extent_op, list);
		} else if (last) {
			/*
			 * ok we successfully inserted the last item on the
			 * list, lets reset everything
			 *
			 * i: our current key to insert, so where we left off
			 *    last time
			 * last: done with this
			 * cur: the op we are messing with
			 * op: duh
			 * total: since we inserted the last key, we need to
			 *        decrement total so we dont overflow
			 */
			i = last;
			last = 0;
			total--;
			if (i < total) {
				cur = insert_list->next;
				op = list_entry(cur, struct pending_extent_op,
						list);
			}
		} else {
			i += ret;
		}

		cond_resched();
	}
	ret = 0;
	kfree(keys);
	kfree(data_size);
	return ret;
}

static noinline int insert_extent_backref(struct btrfs_trans_handle *trans,
					  struct btrfs_root *root,
					  struct btrfs_path *path,
					  u64 bytenr, u64 parent,
					  u64 ref_root, u64 ref_generation,
					  u64 owner_objectid)
{
	struct btrfs_key key;
	struct extent_buffer *leaf;
	struct btrfs_extent_ref *ref;
	u32 num_refs;
	int ret;

	key.objectid = bytenr;
	key.type = BTRFS_EXTENT_REF_KEY;
	key.offset = parent;

	ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(*ref));
	if (ret == 0) {
		leaf = path->nodes[0];
		ref = btrfs_item_ptr(leaf, path->slots[0],
				     struct btrfs_extent_ref);
		btrfs_set_ref_root(leaf, ref, ref_root);
		btrfs_set_ref_generation(leaf, ref, ref_generation);
		btrfs_set_ref_objectid(leaf, ref, owner_objectid);
		btrfs_set_ref_num_refs(leaf, ref, 1);
	} else if (ret == -EEXIST) {
		u64 existing_owner;
		BUG_ON(owner_objectid < BTRFS_FIRST_FREE_OBJECTID);
		leaf = path->nodes[0];
		ref = btrfs_item_ptr(leaf, path->slots[0],
				     struct btrfs_extent_ref);
		if (btrfs_ref_root(leaf, ref) != ref_root ||
		    btrfs_ref_generation(leaf, ref) != ref_generation) {
			ret = -EIO;
			WARN_ON(1);
			goto out;
		}

		num_refs = btrfs_ref_num_refs(leaf, ref);
		BUG_ON(num_refs == 0);
		btrfs_set_ref_num_refs(leaf, ref, num_refs + 1);

		existing_owner = btrfs_ref_objectid(leaf, ref);
		if (existing_owner != owner_objectid &&
		    existing_owner != BTRFS_MULTIPLE_OBJECTIDS) {
			btrfs_set_ref_objectid(leaf, ref,
					BTRFS_MULTIPLE_OBJECTIDS);
		}
		ret = 0;
	} else {
		goto out;
	}
	btrfs_mark_buffer_dirty(path->nodes[0]);
out:
	btrfs_release_path(root, path);
	return ret;
}

static noinline int remove_extent_backref(struct btrfs_trans_handle *trans,
					  struct btrfs_root *root,
					  struct btrfs_path *path)
{
	struct extent_buffer *leaf;
	struct btrfs_extent_ref *ref;
	u32 num_refs;
	int ret = 0;

	leaf = path->nodes[0];
	ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref);
	num_refs = btrfs_ref_num_refs(leaf, ref);
	BUG_ON(num_refs == 0);
	num_refs -= 1;
	if (num_refs == 0) {
		ret = btrfs_del_item(trans, root, path);
	} else {
		btrfs_set_ref_num_refs(leaf, ref, num_refs);
		btrfs_mark_buffer_dirty(leaf);
	}
	btrfs_release_path(root, path);
	return ret;
}

#ifdef BIO_RW_DISCARD
static void btrfs_issue_discard(struct block_device *bdev,
				u64 start, u64 len)
{
	blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL);
}
#endif

static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
				u64 num_bytes)
{
#ifdef BIO_RW_DISCARD
	int ret;
	u64 map_length = num_bytes;
	struct btrfs_multi_bio *multi = NULL;

	/* Tell the block device(s) that the sectors can be discarded */
	ret = btrfs_map_block(&root->fs_info->mapping_tree, READ,
			      bytenr, &map_length, &multi, 0);
	if (!ret) {
		struct btrfs_bio_stripe *stripe = multi->stripes;
		int i;

		if (map_length > num_bytes)
			map_length = num_bytes;

		for (i = 0; i < multi->num_stripes; i++, stripe++) {
			btrfs_issue_discard(stripe->dev->bdev,
					    stripe->physical,
					    map_length);
		}
		kfree(multi);
	}

	return ret;
#else
	return 0;
#endif
}

static noinline int free_extents(struct btrfs_trans_handle *trans,
				 struct btrfs_root *extent_root,
				 struct list_head *del_list)
{
	struct btrfs_fs_info *info = extent_root->fs_info;
	struct btrfs_path *path;
	struct btrfs_key key, found_key;
	struct extent_buffer *leaf;
	struct list_head *cur;
	struct pending_extent_op *op;
	struct btrfs_extent_item *ei;
	int ret, num_to_del, extent_slot = 0, found_extent = 0;
	u32 refs;
	u64 bytes_freed = 0;

	path = btrfs_alloc_path();
	if (!path)
		return -ENOMEM;
	path->reada = 1;

search:
	/* search for the backref for the current ref we want to delete */
	cur = del_list->next;
	op = list_entry(cur, struct pending_extent_op, list);
	ret = lookup_extent_backref(trans, extent_root, path, op->bytenr,
				    op->orig_parent,
				    extent_root->root_key.objectid,
				    op->orig_generation, op->level, 1);
	if (ret) {
		printk(KERN_ERR "btrfs unable to find backref byte nr %llu "
		       "root %llu gen %llu owner %u\n",
		       (unsigned long long)op->bytenr,
		       (unsigned long long)extent_root->root_key.objectid,
		       (unsigned long long)op->orig_generation, op->level);
		btrfs_print_leaf(extent_root, path->nodes[0]);
		WARN_ON(1);
		goto out;
	}

	extent_slot = path->slots[0];
	num_to_del = 1;
	found_extent = 0;

	/*
	 * if we aren't the first item on the leaf we can move back one and see
	 * if our ref is right next to our extent item
	 */
	if (likely(extent_slot)) {
		extent_slot--;
		btrfs_item_key_to_cpu(path->nodes[0], &found_key,
				      extent_slot);
		if (found_key.objectid == op->bytenr &&
		    found_key.type == BTRFS_EXTENT_ITEM_KEY &&
		    found_key.offset == op->num_bytes) {
			num_to_del++;
			found_extent = 1;
		}
	}

	/*
	 * if we didn't find the extent we need to delete the backref and then
	 * search for the extent item key so we can update its ref count
	 */
	if (!found_extent) {
		key.objectid = op->bytenr;
		key.type = BTRFS_EXTENT_ITEM_KEY;
		key.offset = op->num_bytes;

		ret = remove_extent_backref(trans, extent_root, path);
		BUG_ON(ret);
		btrfs_release_path(extent_root, path);
		ret = btrfs_search_slot(trans, extent_root, &key, path, -1, 1);
		BUG_ON(ret);
		extent_slot = path->slots[0];
	}

	/* this is where we update the ref count for the extent */
	leaf = path->nodes[0];
	ei = btrfs_item_ptr(leaf, extent_slot, struct btrfs_extent_item);
	refs = btrfs_extent_refs(leaf, ei);
	BUG_ON(refs == 0);
	refs--;
	btrfs_set_extent_refs(leaf, ei, refs);

	btrfs_mark_buffer_dirty(leaf);

	/*
	 * This extent needs deleting.  The reason cur_slot is extent_slot +
	 * num_to_del is because extent_slot points to the slot where the extent
	 * is, and if the backref was not right next to the extent we will be
	 * deleting at least 1 item, and will want to start searching at the
	 * slot directly next to extent_slot.  However if we did find the
	 * backref next to the extent item them we will be deleting at least 2
	 * items and will want to start searching directly after the ref slot
	 */
	if (!refs) {
		struct list_head *pos, *n, *end;