Skip to content
Snippets Groups Projects
alloc.c 180 KiB
Newer Older
  • Learn to ignore specific revisions
  • 		eb_el->l_recs[i] = root_el->l_recs[i];
    
    
    	status = ocfs2_journal_dirty(handle, new_eb_bh);
    	if (status < 0) {
    		mlog_errno(status);
    		goto bail;
    	}
    
    
    	status = ocfs2_journal_access(handle, inode, et->et_root_bh,
    
    				      OCFS2_JOURNAL_ACCESS_WRITE);
    	if (status < 0) {
    		mlog_errno(status);
    		goto bail;
    	}
    
    
    	new_clusters = ocfs2_sum_rightmost_rec(eb_el);
    
    
    	/* update root_bh now */
    	le16_add_cpu(&root_el->l_tree_depth, 1);
    	root_el->l_recs[0].e_cpos = 0;
    	root_el->l_recs[0].e_blkno = eb->h_blkno;
    	root_el->l_recs[0].e_int_clusters = cpu_to_le32(new_clusters);
    	for (i = 1; i < le16_to_cpu(root_el->l_next_free_rec); i++)
    		memset(&root_el->l_recs[i], 0, sizeof(struct ocfs2_extent_rec));
    	root_el->l_next_free_rec = cpu_to_le16(1);
    
    
    	/* If this is our 1st tree depth shift, then last_eb_blk
    	 * becomes the allocated extent block */
    
    	if (root_el->l_tree_depth == cpu_to_le16(1))
    
    		ocfs2_et_set_last_eb_blk(et, le64_to_cpu(eb->h_blkno));
    
    	status = ocfs2_journal_dirty(handle, et->et_root_bh);
    
    	if (status < 0) {
    		mlog_errno(status);
    		goto bail;
    	}
    
    	*ret_new_eb_bh = new_eb_bh;
    	new_eb_bh = NULL;
    	status = 0;
    bail:
    	if (new_eb_bh)
    		brelse(new_eb_bh);
    
    	mlog_exit(status);
    	return status;
    }
    
    /*
     * Should only be called when there is no space left in any of the
     * leaf nodes. What we want to do is find the lowest tree depth
     * non-leaf extent block with room for new records. There are three
     * valid results of this search:
     *
     * 1) a lowest extent block is found, then we pass it back in
     *    *lowest_eb_bh and return '0'
     *
    
     * 2) the search fails to find anything, but the root_el has room. We
    
     *    pass NULL back in *lowest_eb_bh, but still return '0'
     *
    
     * 3) the search fails to find anything AND the root_el is full, in
    
     *    which case we return > 0
     *
     * return status < 0 indicates an error.
     */
    static int ocfs2_find_branch_target(struct ocfs2_super *osb,
    				    struct inode *inode,
    
    				    struct ocfs2_extent_tree *et,
    
    				    struct buffer_head **target_bh)
    {
    	int status = 0, i;
    	u64 blkno;
    	struct ocfs2_extent_block *eb;
    	struct ocfs2_extent_list  *el;
    	struct buffer_head *bh = NULL;
    	struct buffer_head *lowest_bh = NULL;
    
    	mlog_entry_void();
    
    	*target_bh = NULL;
    
    
    	el = et->et_root_el;
    
    
    	while(le16_to_cpu(el->l_tree_depth) > 1) {
    		if (le16_to_cpu(el->l_next_free_rec) == 0) {
    
    			ocfs2_error(inode->i_sb, "Dinode %llu has empty "
    
    				    "extent list (next_free_rec == 0)",
    
    				    (unsigned long long)OCFS2_I(inode)->ip_blkno);
    
    			status = -EIO;
    			goto bail;
    		}
    		i = le16_to_cpu(el->l_next_free_rec) - 1;
    		blkno = le64_to_cpu(el->l_recs[i].e_blkno);
    		if (!blkno) {
    
    			ocfs2_error(inode->i_sb, "Dinode %llu has extent "
    
    				    "list where extent # %d has no physical "
    				    "block start",
    
    				    (unsigned long long)OCFS2_I(inode)->ip_blkno, i);
    
    			status = -EIO;
    			goto bail;
    		}
    
    		if (bh) {
    			brelse(bh);
    			bh = NULL;
    		}
    
    		status = ocfs2_read_block(osb, blkno, &bh, OCFS2_BH_CACHED,
    					  inode);
    		if (status < 0) {
    			mlog_errno(status);
    			goto bail;
    		}
    
    
    		eb = (struct ocfs2_extent_block *) bh->b_data;
    		if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) {
    			OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb);
    			status = -EIO;
    			goto bail;
    		}
    		el = &eb->h_list;
    
    		if (le16_to_cpu(el->l_next_free_rec) <
    		    le16_to_cpu(el->l_count)) {
    			if (lowest_bh)
    				brelse(lowest_bh);
    			lowest_bh = bh;
    			get_bh(lowest_bh);
    		}
    	}
    
    	/* If we didn't find one and the fe doesn't have any room,
    	 * then return '1' */
    
    	el = et->et_root_el;
    
    	if (!lowest_bh && (el->l_next_free_rec == el->l_count))
    
    		status = 1;
    
    	*target_bh = lowest_bh;
    bail:
    	if (bh)
    		brelse(bh);
    
    	mlog_exit(status);
    	return status;
    }
    
    
    /*
     * Grow a b-tree so that it has more records.
     *
     * We might shift the tree depth in which case existing paths should
     * be considered invalid.
     *
     * Tree depth after the grow is returned via *final_depth.
    
     *
     * *last_eb_bh will be updated by ocfs2_add_branch().
    
     */
    static int ocfs2_grow_tree(struct inode *inode, handle_t *handle,
    
    			   struct ocfs2_extent_tree *et, int *final_depth,
    
    			   struct buffer_head **last_eb_bh,
    
    			   struct ocfs2_alloc_context *meta_ac)
    {
    	int ret, shift;
    
    	struct ocfs2_extent_list *el = et->et_root_el;
    
    	int depth = le16_to_cpu(el->l_tree_depth);
    
    	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
    	struct buffer_head *bh = NULL;
    
    	BUG_ON(meta_ac == NULL);
    
    
    	shift = ocfs2_find_branch_target(osb, inode, et, &bh);
    
    	if (shift < 0) {
    		ret = shift;
    		mlog_errno(ret);
    		goto out;
    	}
    
    	/* We traveled all the way to the bottom of the allocation tree
    	 * and didn't find room for any more extents - we need to add
    	 * another tree level */
    	if (shift) {
    		BUG_ON(bh);
    		mlog(0, "need to shift tree depth (current = %d)\n", depth);
    
    		/* ocfs2_shift_tree_depth will return us a buffer with
    		 * the new extent block (so we can pass that to
    		 * ocfs2_add_branch). */
    
    		ret = ocfs2_shift_tree_depth(osb, handle, inode, et,
    
    					     meta_ac, &bh);
    		if (ret < 0) {
    			mlog_errno(ret);
    			goto out;
    		}
    		depth++;
    
    		if (depth == 1) {
    			/*
    			 * Special case: we have room now if we shifted from
    			 * tree_depth 0, so no more work needs to be done.
    			 *
    			 * We won't be calling add_branch, so pass
    			 * back *last_eb_bh as the new leaf. At depth
    			 * zero, it should always be null so there's
    			 * no reason to brelse.
    			 */
    			BUG_ON(*last_eb_bh);
    			get_bh(bh);
    			*last_eb_bh = bh;
    
    			goto out;
    
    	}
    
    	/* call ocfs2_add_branch to add the final part of the tree with
    	 * the new data. */
    	mlog(0, "add branch. bh = %p\n", bh);
    
    	ret = ocfs2_add_branch(osb, handle, inode, et, bh, last_eb_bh,
    
    			       meta_ac);
    	if (ret < 0) {
    		mlog_errno(ret);
    		goto out;
    	}
    
    out:
    	if (final_depth)
    		*final_depth = depth;
    	brelse(bh);
    	return ret;
    }
    
    
    /*
     * This function will discard the rightmost extent record.
     */
    static void ocfs2_shift_records_right(struct ocfs2_extent_list *el)
    {
    	int next_free = le16_to_cpu(el->l_next_free_rec);
    	int count = le16_to_cpu(el->l_count);
    	unsigned int num_bytes;
    
    	BUG_ON(!next_free);
    	/* This will cause us to go off the end of our extent list. */
    	BUG_ON(next_free >= count);
    
    	num_bytes = sizeof(struct ocfs2_extent_rec) * next_free;
    
    	memmove(&el->l_recs[1], &el->l_recs[0], num_bytes);
    }
    
    static void ocfs2_rotate_leaf(struct ocfs2_extent_list *el,
    			      struct ocfs2_extent_rec *insert_rec)
    {
    	int i, insert_index, next_free, has_empty, num_bytes;
    	u32 insert_cpos = le32_to_cpu(insert_rec->e_cpos);
    	struct ocfs2_extent_rec *rec;
    
    	next_free = le16_to_cpu(el->l_next_free_rec);
    	has_empty = ocfs2_is_empty_extent(&el->l_recs[0]);
    
    	BUG_ON(!next_free);
    
    	/* The tree code before us didn't allow enough room in the leaf. */
    
    Julia Lawall's avatar
    Julia Lawall committed
    	BUG_ON(el->l_next_free_rec == el->l_count && !has_empty);
    
    
    	/*
    	 * The easiest way to approach this is to just remove the
    	 * empty extent and temporarily decrement next_free.
    	 */
    	if (has_empty) {
    		/*
    		 * If next_free was 1 (only an empty extent), this
    		 * loop won't execute, which is fine. We still want
    		 * the decrement above to happen.
    		 */
    		for(i = 0; i < (next_free - 1); i++)
    			el->l_recs[i] = el->l_recs[i+1];
    
    		next_free--;
    	}
    
    	/*
    	 * Figure out what the new record index should be.
    	 */
    	for(i = 0; i < next_free; i++) {
    		rec = &el->l_recs[i];
    
    		if (insert_cpos < le32_to_cpu(rec->e_cpos))
    			break;
    	}
    	insert_index = i;
    
    	mlog(0, "ins %u: index %d, has_empty %d, next_free %d, count %d\n",
    	     insert_cpos, insert_index, has_empty, next_free, le16_to_cpu(el->l_count));
    
    	BUG_ON(insert_index < 0);
    	BUG_ON(insert_index >= le16_to_cpu(el->l_count));
    	BUG_ON(insert_index > next_free);
    
    	/*
    	 * No need to memmove if we're just adding to the tail.
    	 */
    	if (insert_index != next_free) {
    		BUG_ON(next_free >= le16_to_cpu(el->l_count));
    
    		num_bytes = next_free - insert_index;
    		num_bytes *= sizeof(struct ocfs2_extent_rec);
    		memmove(&el->l_recs[insert_index + 1],
    			&el->l_recs[insert_index],
    			num_bytes);
    	}
    
    	/*
    	 * Either we had an empty extent, and need to re-increment or
    	 * there was no empty extent on a non full rightmost leaf node,
    	 * in which case we still need to increment.
    	 */
    	next_free++;
    	el->l_next_free_rec = cpu_to_le16(next_free);
    	/*
    	 * Make sure none of the math above just messed up our tree.
    	 */
    	BUG_ON(le16_to_cpu(el->l_next_free_rec) > le16_to_cpu(el->l_count));
    
    	el->l_recs[insert_index] = *insert_rec;
    
    }
    
    
    static void ocfs2_remove_empty_extent(struct ocfs2_extent_list *el)
    {
    	int size, num_recs = le16_to_cpu(el->l_next_free_rec);
    
    	BUG_ON(num_recs == 0);
    
    	if (ocfs2_is_empty_extent(&el->l_recs[0])) {
    		num_recs--;
    		size = num_recs * sizeof(struct ocfs2_extent_rec);
    		memmove(&el->l_recs[0], &el->l_recs[1], size);
    		memset(&el->l_recs[num_recs], 0,
    		       sizeof(struct ocfs2_extent_rec));
    		el->l_next_free_rec = cpu_to_le16(num_recs);
    	}
    }
    
    
    /*
     * Create an empty extent record .
     *
     * l_next_free_rec may be updated.
     *
     * If an empty extent already exists do nothing.
     */
    static void ocfs2_create_empty_extent(struct ocfs2_extent_list *el)
    {
    	int next_free = le16_to_cpu(el->l_next_free_rec);
    
    
    	BUG_ON(le16_to_cpu(el->l_tree_depth) != 0);
    
    
    	if (next_free == 0)
    		goto set_and_inc;
    
    	if (ocfs2_is_empty_extent(&el->l_recs[0]))
    		return;
    
    	mlog_bug_on_msg(el->l_count == el->l_next_free_rec,
    			"Asked to create an empty extent in a full list:\n"
    			"count = %u, tree depth = %u",
    			le16_to_cpu(el->l_count),
    			le16_to_cpu(el->l_tree_depth));
    
    	ocfs2_shift_records_right(el);
    
    set_and_inc:
    	le16_add_cpu(&el->l_next_free_rec, 1);
    	memset(&el->l_recs[0], 0, sizeof(struct ocfs2_extent_rec));
    }
    
    /*
     * For a rotation which involves two leaf nodes, the "root node" is
     * the lowest level tree node which contains a path to both leafs. This
     * resulting set of information can be used to form a complete "subtree"
     *
     * This function is passed two full paths from the dinode down to a
     * pair of adjacent leaves. It's task is to figure out which path
     * index contains the subtree root - this can be the root index itself
     * in a worst-case rotation.
     *
     * The array index of the subtree root is passed back.
     */
    static int ocfs2_find_subtree_root(struct inode *inode,
    				   struct ocfs2_path *left,
    				   struct ocfs2_path *right)
    {
    	int i = 0;
    
    	/*
    	 * Check that the caller passed in two paths from the same tree.
    	 */
    	BUG_ON(path_root_bh(left) != path_root_bh(right));
    
    	do {
    		i++;
    
    		/*
    		 * The caller didn't pass two adjacent paths.
    		 */
    		mlog_bug_on_msg(i > left->p_tree_depth,
    				"Inode %lu, left depth %u, right depth %u\n"
    				"left leaf blk %llu, right leaf blk %llu\n",
    				inode->i_ino, left->p_tree_depth,
    				right->p_tree_depth,
    				(unsigned long long)path_leaf_bh(left)->b_blocknr,
    				(unsigned long long)path_leaf_bh(right)->b_blocknr);
    	} while (left->p_node[i].bh->b_blocknr ==
    		 right->p_node[i].bh->b_blocknr);
    
    	return i - 1;
    }
    
    typedef void (path_insert_t)(void *, struct buffer_head *);
    
    /*
     * Traverse a btree path in search of cpos, starting at root_el.
     *
     * This code can be called with a cpos larger than the tree, in which
     * case it will return the rightmost path.
     */
    static int __ocfs2_find_path(struct inode *inode,
    			     struct ocfs2_extent_list *root_el, u32 cpos,
    			     path_insert_t *func, void *data)
    {
    	int i, ret = 0;
    	u32 range;
    	u64 blkno;
    	struct buffer_head *bh = NULL;
    	struct ocfs2_extent_block *eb;
    	struct ocfs2_extent_list *el;
    	struct ocfs2_extent_rec *rec;
    	struct ocfs2_inode_info *oi = OCFS2_I(inode);
    
    	el = root_el;
    	while (el->l_tree_depth) {
    		if (le16_to_cpu(el->l_next_free_rec) == 0) {
    			ocfs2_error(inode->i_sb,
    				    "Inode %llu has empty extent list at "
    				    "depth %u\n",
    				    (unsigned long long)oi->ip_blkno,
    				    le16_to_cpu(el->l_tree_depth));
    			ret = -EROFS;
    			goto out;
    
    		}
    
    		for(i = 0; i < le16_to_cpu(el->l_next_free_rec) - 1; i++) {
    			rec = &el->l_recs[i];
    
    			/*
    			 * In the case that cpos is off the allocation
    			 * tree, this should just wind up returning the
    			 * rightmost record.
    			 */
    			range = le32_to_cpu(rec->e_cpos) +
    
    				ocfs2_rec_clusters(el, rec);
    
    			if (cpos >= le32_to_cpu(rec->e_cpos) && cpos < range)
    			    break;
    		}
    
    		blkno = le64_to_cpu(el->l_recs[i].e_blkno);
    		if (blkno == 0) {
    			ocfs2_error(inode->i_sb,
    				    "Inode %llu has bad blkno in extent list "
    				    "at depth %u (index %d)\n",
    				    (unsigned long long)oi->ip_blkno,
    				    le16_to_cpu(el->l_tree_depth), i);
    			ret = -EROFS;
    			goto out;
    		}
    
    		brelse(bh);
    		bh = NULL;
    		ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), blkno,
    				       &bh, OCFS2_BH_CACHED, inode);
    		if (ret) {
    			mlog_errno(ret);
    			goto out;
    		}
    
    		eb = (struct ocfs2_extent_block *) bh->b_data;
    		el = &eb->h_list;
    		if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) {
    			OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb);
    			ret = -EIO;
    			goto out;
    		}
    
    		if (le16_to_cpu(el->l_next_free_rec) >
    		    le16_to_cpu(el->l_count)) {
    			ocfs2_error(inode->i_sb,
    				    "Inode %llu has bad count in extent list "
    				    "at block %llu (next free=%u, count=%u)\n",
    				    (unsigned long long)oi->ip_blkno,
    				    (unsigned long long)bh->b_blocknr,
    				    le16_to_cpu(el->l_next_free_rec),
    				    le16_to_cpu(el->l_count));
    			ret = -EROFS;
    			goto out;
    		}
    
    		if (func)
    			func(data, bh);
    	}
    
    out:
    	/*
    	 * Catch any trailing bh that the loop didn't handle.
    	 */
    	brelse(bh);
    
    	return ret;
    }
    
    /*
     * Given an initialized path (that is, it has a valid root extent
     * list), this function will traverse the btree in search of the path
     * which would contain cpos.
     *
     * The path traveled is recorded in the path structure.
     *
     * Note that this will not do any comparisons on leaf node extent
     * records, so it will work fine in the case that we just added a tree
     * branch.
     */
    struct find_path_data {
    	int index;
    	struct ocfs2_path *path;
    };
    static void find_path_ins(void *data, struct buffer_head *bh)
    {
    	struct find_path_data *fp = data;
    
    	get_bh(bh);
    	ocfs2_path_insert_eb(fp->path, fp->index, bh);
    	fp->index++;
    }
    static int ocfs2_find_path(struct inode *inode, struct ocfs2_path *path,
    			   u32 cpos)
    {
    	struct find_path_data data;
    
    	data.index = 1;
    	data.path = path;
    	return __ocfs2_find_path(inode, path_root_el(path), cpos,
    				 find_path_ins, &data);
    }
    
    static void find_leaf_ins(void *data, struct buffer_head *bh)
    {
    	struct ocfs2_extent_block *eb =(struct ocfs2_extent_block *)bh->b_data;
    	struct ocfs2_extent_list *el = &eb->h_list;
    	struct buffer_head **ret = data;
    
    	/* We want to retain only the leaf block. */
    	if (le16_to_cpu(el->l_tree_depth) == 0) {
    		get_bh(bh);
    		*ret = bh;
    	}
    }
    /*
     * Find the leaf block in the tree which would contain cpos. No
     * checking of the actual leaf is done.
     *
     * Some paths want to call this instead of allocating a path structure
     * and calling ocfs2_find_path().
     *
     * This function doesn't handle non btree extent lists.
     */
    
    int ocfs2_find_leaf(struct inode *inode, struct ocfs2_extent_list *root_el,
    		    u32 cpos, struct buffer_head **leaf_bh)
    
    {
    	int ret;
    	struct buffer_head *bh = NULL;
    
    	ret = __ocfs2_find_path(inode, root_el, cpos, find_leaf_ins, &bh);
    	if (ret) {
    		mlog_errno(ret);
    		goto out;
    	}
    
    	*leaf_bh = bh;
    out:
    	return ret;
    }
    
    /*
     * Adjust the adjacent records (left_rec, right_rec) involved in a rotation.
     *
     * Basically, we've moved stuff around at the bottom of the tree and
     * we need to fix up the extent records above the changes to reflect
     * the new changes.
     *
     * left_rec: the record on the left.
     * left_child_el: is the child list pointed to by left_rec
     * right_rec: the record to the right of left_rec
     * right_child_el: is the child list pointed to by right_rec
     *
     * By definition, this only works on interior nodes.
     */
    static void ocfs2_adjust_adjacent_records(struct ocfs2_extent_rec *left_rec,
    				  struct ocfs2_extent_list *left_child_el,
    				  struct ocfs2_extent_rec *right_rec,
    				  struct ocfs2_extent_list *right_child_el)
    {
    	u32 left_clusters, right_end;
    
    	/*
    	 * Interior nodes never have holes. Their cpos is the cpos of
    	 * the leftmost record in their child list. Their cluster
    	 * count covers the full theoretical range of their child list
    	 * - the range between their cpos and the cpos of the record
    	 * immediately to their right.
    	 */
    	left_clusters = le32_to_cpu(right_child_el->l_recs[0].e_cpos);
    
    	if (ocfs2_is_empty_extent(&right_child_el->l_recs[0])) {
    		BUG_ON(le16_to_cpu(right_child_el->l_next_free_rec) <= 1);
    		left_clusters = le32_to_cpu(right_child_el->l_recs[1].e_cpos);
    	}
    
    	left_clusters -= le32_to_cpu(left_rec->e_cpos);
    
    	left_rec->e_int_clusters = cpu_to_le32(left_clusters);
    
    
    	/*
    	 * Calculate the rightmost cluster count boundary before
    
    	 * moving cpos - we will need to adjust clusters after
    
    	 * updating e_cpos to keep the same highest cluster count.
    	 */
    	right_end = le32_to_cpu(right_rec->e_cpos);
    
    	right_end += le32_to_cpu(right_rec->e_int_clusters);
    
    
    	right_rec->e_cpos = left_rec->e_cpos;
    	le32_add_cpu(&right_rec->e_cpos, left_clusters);
    
    	right_end -= le32_to_cpu(right_rec->e_cpos);
    
    	right_rec->e_int_clusters = cpu_to_le32(right_end);
    
    }
    
    /*
     * Adjust the adjacent root node records involved in a
     * rotation. left_el_blkno is passed in as a key so that we can easily
     * find it's index in the root list.
     */
    static void ocfs2_adjust_root_records(struct ocfs2_extent_list *root_el,
    				      struct ocfs2_extent_list *left_el,
    				      struct ocfs2_extent_list *right_el,
    				      u64 left_el_blkno)
    {
    	int i;
    
    	BUG_ON(le16_to_cpu(root_el->l_tree_depth) <=
    	       le16_to_cpu(left_el->l_tree_depth));
    
    	for(i = 0; i < le16_to_cpu(root_el->l_next_free_rec) - 1; i++) {
    		if (le64_to_cpu(root_el->l_recs[i].e_blkno) == left_el_blkno)
    			break;
    	}
    
    	/*
    	 * The path walking code should have never returned a root and
    	 * two paths which are not adjacent.
    	 */
    	BUG_ON(i >= (le16_to_cpu(root_el->l_next_free_rec) - 1));
    
    	ocfs2_adjust_adjacent_records(&root_el->l_recs[i], left_el,
    				      &root_el->l_recs[i + 1], right_el);
    }
    
    /*
     * We've changed a leaf block (in right_path) and need to reflect that
     * change back up the subtree.
     *
     * This happens in multiple places:
     *   - When we've moved an extent record from the left path leaf to the right
     *     path leaf to make room for an empty extent in the left path leaf.
     *   - When our insert into the right path leaf is at the leftmost edge
     *     and requires an update of the path immediately to it's left. This
     *     can occur at the end of some types of rotation and appending inserts.
    
     *   - When we've adjusted the last extent record in the left path leaf and the
     *     1st extent record in the right path leaf during cross extent block merge.
    
     */
    static void ocfs2_complete_edge_insert(struct inode *inode, handle_t *handle,
    				       struct ocfs2_path *left_path,
    				       struct ocfs2_path *right_path,
    				       int subtree_index)
    {
    	int ret, i, idx;
    	struct ocfs2_extent_list *el, *left_el, *right_el;
    	struct ocfs2_extent_rec *left_rec, *right_rec;
    	struct buffer_head *root_bh = left_path->p_node[subtree_index].bh;
    
    	/*
    	 * Update the counts and position values within all the
    	 * interior nodes to reflect the leaf rotation we just did.
    	 *
    	 * The root node is handled below the loop.
    	 *
    	 * We begin the loop with right_el and left_el pointing to the
    	 * leaf lists and work our way up.
    	 *
    	 * NOTE: within this loop, left_el and right_el always refer
    	 * to the *child* lists.
    	 */
    	left_el = path_leaf_el(left_path);
    	right_el = path_leaf_el(right_path);
    	for(i = left_path->p_tree_depth - 1; i > subtree_index; i--) {
    		mlog(0, "Adjust records at index %u\n", i);
    
    		/*
    		 * One nice property of knowing that all of these
    		 * nodes are below the root is that we only deal with
    		 * the leftmost right node record and the rightmost
    		 * left node record.
    		 */
    		el = left_path->p_node[i].el;
    		idx = le16_to_cpu(left_el->l_next_free_rec) - 1;
    		left_rec = &el->l_recs[idx];
    
    		el = right_path->p_node[i].el;
    		right_rec = &el->l_recs[0];
    
    		ocfs2_adjust_adjacent_records(left_rec, left_el, right_rec,
    					      right_el);
    
    		ret = ocfs2_journal_dirty(handle, left_path->p_node[i].bh);
    		if (ret)
    			mlog_errno(ret);
    
    		ret = ocfs2_journal_dirty(handle, right_path->p_node[i].bh);
    		if (ret)
    			mlog_errno(ret);
    
    		/*
    		 * Setup our list pointers now so that the current
    		 * parents become children in the next iteration.
    		 */
    		left_el = left_path->p_node[i].el;
    		right_el = right_path->p_node[i].el;
    	}
    
    	/*
    	 * At the root node, adjust the two adjacent records which
    	 * begin our path to the leaves.
    	 */
    
    	el = left_path->p_node[subtree_index].el;
    	left_el = left_path->p_node[subtree_index + 1].el;
    	right_el = right_path->p_node[subtree_index + 1].el;
    
    	ocfs2_adjust_root_records(el, left_el, right_el,
    				  left_path->p_node[subtree_index + 1].bh->b_blocknr);
    
    	root_bh = left_path->p_node[subtree_index].bh;
    
    	ret = ocfs2_journal_dirty(handle, root_bh);
    	if (ret)
    		mlog_errno(ret);
    }
    
    static int ocfs2_rotate_subtree_right(struct inode *inode,
    				      handle_t *handle,
    				      struct ocfs2_path *left_path,
    				      struct ocfs2_path *right_path,
    				      int subtree_index)
    {
    	int ret, i;
    	struct buffer_head *right_leaf_bh;
    	struct buffer_head *left_leaf_bh = NULL;
    	struct buffer_head *root_bh;
    	struct ocfs2_extent_list *right_el, *left_el;
    	struct ocfs2_extent_rec move_rec;
    
    	left_leaf_bh = path_leaf_bh(left_path);
    	left_el = path_leaf_el(left_path);
    
    	if (left_el->l_next_free_rec != left_el->l_count) {
    		ocfs2_error(inode->i_sb,
    			    "Inode %llu has non-full interior leaf node %llu"
    			    "(next free = %u)",
    			    (unsigned long long)OCFS2_I(inode)->ip_blkno,
    			    (unsigned long long)left_leaf_bh->b_blocknr,
    			    le16_to_cpu(left_el->l_next_free_rec));
    		return -EROFS;
    	}
    
    	/*
    	 * This extent block may already have an empty record, so we
    	 * return early if so.
    	 */
    	if (ocfs2_is_empty_extent(&left_el->l_recs[0]))
    		return 0;
    
    	root_bh = left_path->p_node[subtree_index].bh;
    	BUG_ON(root_bh != right_path->p_node[subtree_index].bh);
    
    	ret = ocfs2_journal_access(handle, inode, root_bh,
    				   OCFS2_JOURNAL_ACCESS_WRITE);
    	if (ret) {
    		mlog_errno(ret);
    		goto out;
    	}
    
    	for(i = subtree_index + 1; i < path_num_items(right_path); i++) {
    		ret = ocfs2_journal_access(handle, inode,
    					   right_path->p_node[i].bh,
    					   OCFS2_JOURNAL_ACCESS_WRITE);
    		if (ret) {
    			mlog_errno(ret);
    			goto out;
    		}
    
    		ret = ocfs2_journal_access(handle, inode,
    					   left_path->p_node[i].bh,
    					   OCFS2_JOURNAL_ACCESS_WRITE);
    		if (ret) {
    			mlog_errno(ret);
    			goto out;
    		}
    	}
    
    	right_leaf_bh = path_leaf_bh(right_path);
    	right_el = path_leaf_el(right_path);
    
    	/* This is a code error, not a disk corruption. */
    	mlog_bug_on_msg(!right_el->l_next_free_rec, "Inode %llu: Rotate fails "
    			"because rightmost leaf block %llu is empty\n",
    			(unsigned long long)OCFS2_I(inode)->ip_blkno,
    			(unsigned long long)right_leaf_bh->b_blocknr);
    
    	ocfs2_create_empty_extent(right_el);
    
    	ret = ocfs2_journal_dirty(handle, right_leaf_bh);
    	if (ret) {
    		mlog_errno(ret);
    		goto out;
    	}
    
    	/* Do the copy now. */
    	i = le16_to_cpu(left_el->l_next_free_rec) - 1;
    	move_rec = left_el->l_recs[i];
    	right_el->l_recs[0] = move_rec;
    
    	/*
    	 * Clear out the record we just copied and shift everything
    	 * over, leaving an empty extent in the left leaf.
    	 *
    	 * We temporarily subtract from next_free_rec so that the
    	 * shift will lose the tail record (which is now defunct).
    	 */
    	le16_add_cpu(&left_el->l_next_free_rec, -1);
    	ocfs2_shift_records_right(left_el);
    	memset(&left_el->l_recs[0], 0, sizeof(struct ocfs2_extent_rec));
    	le16_add_cpu(&left_el->l_next_free_rec, 1);
    
    	ret = ocfs2_journal_dirty(handle, left_leaf_bh);
    	if (ret) {
    		mlog_errno(ret);
    		goto out;
    	}
    
    	ocfs2_complete_edge_insert(inode, handle, left_path, right_path,
    				subtree_index);
    
    out:
    	return ret;
    }
    
    /*
     * Given a full path, determine what cpos value would return us a path
     * containing the leaf immediately to the left of the current one.
     *
     * Will return zero if the path passed in is already the leftmost path.
     */
    static int ocfs2_find_cpos_for_left_leaf(struct super_block *sb,
    					 struct ocfs2_path *path, u32 *cpos)
    {
    	int i, j, ret = 0;
    	u64 blkno;
    	struct ocfs2_extent_list *el;
    
    
    	BUG_ON(path->p_tree_depth == 0);
    
    
    	*cpos = 0;
    
    	blkno = path_leaf_bh(path)->b_blocknr;
    
    	/* Start at the tree node just above the leaf and work our way up. */
    	i = path->p_tree_depth - 1;
    	while (i >= 0) {
    		el = path->p_node[i].el;
    
    		/*
    		 * Find the extent record just before the one in our
    		 * path.
    		 */
    		for(j = 0; j < le16_to_cpu(el->l_next_free_rec); j++) {
    			if (le64_to_cpu(el->l_recs[j].e_blkno) == blkno) {
    				if (j == 0) {
    					if (i == 0) {
    						/*
    						 * We've determined that the
    						 * path specified is already
    						 * the leftmost one - return a
    						 * cpos of zero.
    						 */
    						goto out;
    					}
    					/*
    					 * The leftmost record points to our
    					 * leaf - we need to travel up the
    					 * tree one level.
    					 */
    					goto next_node;
    				}
    
    				*cpos = le32_to_cpu(el->l_recs[j - 1].e_cpos);
    
    				*cpos = *cpos + ocfs2_rec_clusters(el,
    							   &el->l_recs[j - 1]);
    				*cpos = *cpos - 1;
    
    				goto out;
    			}
    		}
    
    		/*
    		 * If we got here, we never found a valid node where
    		 * the tree indicated one should be.
    		 */
    		ocfs2_error(sb,
    			    "Invalid extent tree at extent block %llu\n",
    			    (unsigned long long)blkno);
    		ret = -EROFS;
    		goto out;
    
    next_node:
    		blkno = path->p_node[i].bh->b_blocknr;
    		i--;
    	}
    
    out:
    	return ret;
    }
    
    
    /*
     * Extend the transaction by enough credits to complete the rotation,
     * and still leave at least the original number of credits allocated
     * to this transaction.
     */
    
    static int ocfs2_extend_rotate_transaction(handle_t *handle, int subtree_depth,
    
    					   int op_credits,
    
    					   struct ocfs2_path *path)
    {
    
    	int credits = (path->p_tree_depth - subtree_depth) * 2 + 1 + op_credits;
    
    
    	if (handle->h_buffer_credits < credits)
    		return ocfs2_extend_trans(handle, credits);
    
    	return 0;
    }
    
    /*
     * Trap the case where we're inserting into the theoretical range past
     * the _actual_ left leaf range. Otherwise, we'll rotate a record
     * whose cpos is less than ours into the right leaf.
     *
     * It's only necessary to look at the rightmost record of the left
     * leaf because the logic that calls us should ensure that the
     * theoretical ranges in the path components above the leaves are
     * correct.
     */
    static int ocfs2_rotate_requires_path_adjustment(struct ocfs2_path *left_path,
    						 u32 insert_cpos)
    {
    	struct ocfs2_extent_list *left_el;
    	struct ocfs2_extent_rec *rec;
    	int next_free;
    
    	left_el = path_leaf_el(left_path);
    	next_free = le16_to_cpu(left_el->l_next_free_rec);
    	rec = &left_el->l_recs[next_free - 1];
    
    	if (insert_cpos > le32_to_cpu(rec->e_cpos))
    		return 1;
    	return 0;
    }
    
    
    static int ocfs2_leftmost_rec_contains(struct ocfs2_extent_list *el, u32 cpos)
    {
    	int next_free = le16_to_cpu(el->l_next_free_rec);
    	unsigned int range;
    	struct ocfs2_extent_rec *rec;
    
    	if (next_free == 0)
    		return 0;
    
    	rec = &el->l_recs[0];
    	if (ocfs2_is_empty_extent(rec)) {
    		/* Empty list. */
    		if (next_free == 1)
    			return 0;
    		rec = &el->l_recs[1];