Skip to content
Snippets Groups Projects
alloc.c 181 KiB
Newer Older
  • Learn to ignore specific revisions
  • 			return 0;
    		rec = &el->l_recs[1];
    	}
    
    	range = le32_to_cpu(rec->e_cpos) + ocfs2_rec_clusters(el, rec);
    	if (cpos >= le32_to_cpu(rec->e_cpos) && cpos < range)
    		return 1;
    	return 0;
    }
    
    
    /*
     * Rotate all the records in a btree right one record, starting at insert_cpos.
     *
     * The path to the rightmost leaf should be passed in.
     *
     * The array is assumed to be large enough to hold an entire path (tree depth).
     *
     * Upon succesful return from this function:
     *
     * - The 'right_path' array will contain a path to the leaf block
     *   whose range contains e_cpos.
     * - That leaf block will have a single empty extent in list index 0.
     * - In the case that the rotation requires a post-insert update,
     *   *ret_left_path will contain a valid path which can be passed to
     *   ocfs2_insert_path().
     */
    static int ocfs2_rotate_tree_right(struct inode *inode,
    				   handle_t *handle,
    
    				   enum ocfs2_split_type split,
    
    				   u32 insert_cpos,
    				   struct ocfs2_path *right_path,
    				   struct ocfs2_path **ret_left_path)
    {
    
    	int ret, start, orig_credits = handle->h_buffer_credits;
    
    	u32 cpos;
    	struct ocfs2_path *left_path = NULL;
    
    	*ret_left_path = NULL;
    
    	left_path = ocfs2_new_path(path_root_bh(right_path),
    				   path_root_el(right_path));
    	if (!left_path) {
    		ret = -ENOMEM;
    		mlog_errno(ret);
    		goto out;
    	}
    
    	ret = ocfs2_find_cpos_for_left_leaf(inode->i_sb, right_path, &cpos);
    	if (ret) {
    		mlog_errno(ret);
    		goto out;
    	}
    
    	mlog(0, "Insert: %u, first left path cpos: %u\n", insert_cpos, cpos);
    
    	/*
    	 * What we want to do here is:
    	 *
    	 * 1) Start with the rightmost path.
    	 *
    	 * 2) Determine a path to the leaf block directly to the left
    	 *    of that leaf.
    	 *
    	 * 3) Determine the 'subtree root' - the lowest level tree node
    	 *    which contains a path to both leaves.
    	 *
    	 * 4) Rotate the subtree.
    	 *
    	 * 5) Find the next subtree by considering the left path to be
    	 *    the new right path.
    	 *
    	 * The check at the top of this while loop also accepts
    	 * insert_cpos == cpos because cpos is only a _theoretical_
    	 * value to get us the left path - insert_cpos might very well
    	 * be filling that hole.
    	 *
    	 * Stop at a cpos of '0' because we either started at the
    	 * leftmost branch (i.e., a tree with one branch and a
    	 * rotation inside of it), or we've gone as far as we can in
    	 * rotating subtrees.
    	 */
    	while (cpos && insert_cpos <= cpos) {
    		mlog(0, "Rotating a tree: ins. cpos: %u, left path cpos: %u\n",
    		     insert_cpos, cpos);
    
    		ret = ocfs2_find_path(inode, left_path, cpos);
    		if (ret) {
    			mlog_errno(ret);
    			goto out;
    		}
    
    		mlog_bug_on_msg(path_leaf_bh(left_path) ==
    				path_leaf_bh(right_path),
    				"Inode %lu: error during insert of %u "
    				"(left path cpos %u) results in two identical "
    				"paths ending at %llu\n",
    				inode->i_ino, insert_cpos, cpos,
    				(unsigned long long)
    				path_leaf_bh(left_path)->b_blocknr);
    
    
    		if (split == SPLIT_NONE &&
    		    ocfs2_rotate_requires_path_adjustment(left_path,
    
    							  insert_cpos)) {
    
    			/*
    			 * We've rotated the tree as much as we
    			 * should. The rest is up to
    			 * ocfs2_insert_path() to complete, after the
    			 * record insertion. We indicate this
    			 * situation by returning the left path.
    			 *
    			 * The reason we don't adjust the records here
    			 * before the record insert is that an error
    			 * later might break the rule where a parent
    			 * record e_cpos will reflect the actual
    			 * e_cpos of the 1st nonempty record of the
    			 * child list.
    			 */
    			*ret_left_path = left_path;
    			goto out_ret_path;
    		}
    
    		start = ocfs2_find_subtree_root(inode, left_path, right_path);
    
    		mlog(0, "Subtree root at index %d (blk %llu, depth %d)\n",
    		     start,
    		     (unsigned long long) right_path->p_node[start].bh->b_blocknr,
    		     right_path->p_tree_depth);
    
    		ret = ocfs2_extend_rotate_transaction(handle, start,
    
    						      orig_credits, right_path);
    
    		if (ret) {
    			mlog_errno(ret);
    			goto out;
    		}
    
    		ret = ocfs2_rotate_subtree_right(inode, handle, left_path,
    						 right_path, start);
    		if (ret) {
    			mlog_errno(ret);
    			goto out;
    		}
    
    
    		if (split != SPLIT_NONE &&
    		    ocfs2_leftmost_rec_contains(path_leaf_el(right_path),
    						insert_cpos)) {
    			/*
    			 * A rotate moves the rightmost left leaf
    			 * record over to the leftmost right leaf
    			 * slot. If we're doing an extent split
    			 * instead of a real insert, then we have to
    			 * check that the extent to be split wasn't
    			 * just moved over. If it was, then we can
    			 * exit here, passing left_path back -
    			 * ocfs2_split_extent() is smart enough to
    			 * search both leaves.
    			 */
    			*ret_left_path = left_path;
    			goto out_ret_path;
    		}
    
    
    		/*
    		 * There is no need to re-read the next right path
    		 * as we know that it'll be our current left
    		 * path. Optimize by copying values instead.
    		 */
    		ocfs2_mv_path(right_path, left_path);
    
    		ret = ocfs2_find_cpos_for_left_leaf(inode->i_sb, right_path,
    						    &cpos);
    		if (ret) {
    			mlog_errno(ret);
    			goto out;
    		}
    	}
    
    out:
    	ocfs2_free_path(left_path);
    
    out_ret_path:
    	return ret;
    }
    
    
    static void ocfs2_update_edge_lengths(struct inode *inode, handle_t *handle,
    				      struct ocfs2_path *path)
    
    	struct ocfs2_extent_rec *rec;
    
    	struct ocfs2_extent_list *el;
    	struct ocfs2_extent_block *eb;
    	u32 range;
    
    	/* Path should always be rightmost. */
    	eb = (struct ocfs2_extent_block *)path_leaf_bh(path)->b_data;
    	BUG_ON(eb->h_next_leaf_blk != 0ULL);
    
    	el = &eb->h_list;
    	BUG_ON(le16_to_cpu(el->l_next_free_rec) == 0);
    	idx = le16_to_cpu(el->l_next_free_rec) - 1;
    	rec = &el->l_recs[idx];
    	range = le32_to_cpu(rec->e_cpos) + ocfs2_rec_clusters(el, rec);
    
    	for (i = 0; i < path->p_tree_depth; i++) {
    		el = path->p_node[i].el;
    		idx = le16_to_cpu(el->l_next_free_rec) - 1;
    		rec = &el->l_recs[idx];
    
    		rec->e_int_clusters = cpu_to_le32(range);
    		le32_add_cpu(&rec->e_int_clusters, -le32_to_cpu(rec->e_cpos));
    
    		ocfs2_journal_dirty(handle, path->p_node[i].bh);
    
    static void ocfs2_unlink_path(struct inode *inode, handle_t *handle,
    			      struct ocfs2_cached_dealloc_ctxt *dealloc,
    			      struct ocfs2_path *path, int unlink_start)
    
    	int ret, i;
    	struct ocfs2_extent_block *eb;
    	struct ocfs2_extent_list *el;
    	struct buffer_head *bh;
    
    	for(i = unlink_start; i < path_num_items(path); i++) {
    		bh = path->p_node[i].bh;
    
    		eb = (struct ocfs2_extent_block *)bh->b_data;
    		/*
    		 * Not all nodes might have had their final count
    		 * decremented by the caller - handle this here.
    		 */
    		el = &eb->h_list;
    		if (le16_to_cpu(el->l_next_free_rec) > 1) {
    			mlog(ML_ERROR,
    			     "Inode %llu, attempted to remove extent block "
    			     "%llu with %u records\n",
    			     (unsigned long long)OCFS2_I(inode)->ip_blkno,
    			     (unsigned long long)le64_to_cpu(eb->h_blkno),
    			     le16_to_cpu(el->l_next_free_rec));
    
    			ocfs2_journal_dirty(handle, bh);
    			ocfs2_remove_from_cache(inode, bh);
    			continue;
    		}
    
    		el->l_next_free_rec = 0;
    		memset(&el->l_recs[0], 0, sizeof(struct ocfs2_extent_rec));
    
    		ocfs2_journal_dirty(handle, bh);
    
    		ret = ocfs2_cache_extent_block_free(dealloc, eb);
    		if (ret)
    			mlog_errno(ret);
    
    		ocfs2_remove_from_cache(inode, bh);
    	}
    
    static void ocfs2_unlink_subtree(struct inode *inode, handle_t *handle,
    				 struct ocfs2_path *left_path,
    				 struct ocfs2_path *right_path,
    				 int subtree_index,
    				 struct ocfs2_cached_dealloc_ctxt *dealloc)
    
    	int i;
    	struct buffer_head *root_bh = left_path->p_node[subtree_index].bh;
    	struct ocfs2_extent_list *root_el = left_path->p_node[subtree_index].el;
    
    	struct ocfs2_extent_list *el;
    
    	struct ocfs2_extent_block *eb;
    
    	el = path_leaf_el(left_path);
    
    	eb = (struct ocfs2_extent_block *)right_path->p_node[subtree_index + 1].bh->b_data;
    
    	for(i = 1; i < le16_to_cpu(root_el->l_next_free_rec); i++)
    		if (root_el->l_recs[i].e_blkno == eb->h_blkno)
    			break;
    
    	BUG_ON(i >= le16_to_cpu(root_el->l_next_free_rec));
    
    	memset(&root_el->l_recs[i], 0, sizeof(struct ocfs2_extent_rec));
    	le16_add_cpu(&root_el->l_next_free_rec, -1);
    
    	eb = (struct ocfs2_extent_block *)path_leaf_bh(left_path)->b_data;
    	eb->h_next_leaf_blk = 0;
    
    	ocfs2_journal_dirty(handle, root_bh);
    	ocfs2_journal_dirty(handle, path_leaf_bh(left_path));
    
    	ocfs2_unlink_path(inode, handle, dealloc, right_path,
    			  subtree_index + 1);
    }
    
    static int ocfs2_rotate_subtree_left(struct inode *inode, handle_t *handle,
    				     struct ocfs2_path *left_path,
    				     struct ocfs2_path *right_path,
    				     int subtree_index,
    				     struct ocfs2_cached_dealloc_ctxt *dealloc,
    
    				     int *deleted,
    				     struct ocfs2_extent_tree *et)
    
    {
    	int ret, i, del_right_subtree = 0, right_has_empty = 0;
    
    	struct buffer_head *root_bh, *et_root_bh = path_root_bh(right_path);
    
    	struct ocfs2_extent_list *right_leaf_el, *left_leaf_el;
    	struct ocfs2_extent_block *eb;
    
    	*deleted = 0;
    
    	right_leaf_el = path_leaf_el(right_path);
    	left_leaf_el = path_leaf_el(left_path);
    	root_bh = left_path->p_node[subtree_index].bh;
    	BUG_ON(root_bh != right_path->p_node[subtree_index].bh);
    
    	if (!ocfs2_is_empty_extent(&left_leaf_el->l_recs[0]))
    		return 0;
    
    	eb = (struct ocfs2_extent_block *)path_leaf_bh(right_path)->b_data;
    	if (ocfs2_is_empty_extent(&right_leaf_el->l_recs[0])) {
    
    		 * It's legal for us to proceed if the right leaf is
    		 * the rightmost one and it has an empty extent. There
    		 * are two cases to handle - whether the leaf will be
    		 * empty after removal or not. If the leaf isn't empty
    		 * then just remove the empty extent up front. The
    		 * next block will handle empty leaves by flagging
    		 * them for unlink.
    		 *
    		 * Non rightmost leaves will throw -EAGAIN and the
    		 * caller can manually move the subtree and retry.
    
    		if (eb->h_next_leaf_blk != 0ULL)
    			return -EAGAIN;
    
    		if (le16_to_cpu(right_leaf_el->l_next_free_rec) > 1) {
    			ret = ocfs2_journal_access(handle, inode,
    						   path_leaf_bh(right_path),
    						   OCFS2_JOURNAL_ACCESS_WRITE);
    
    			if (ret) {
    				mlog_errno(ret);
    				goto out;
    			}
    
    
    			ocfs2_remove_empty_extent(right_leaf_el);
    		} else
    			right_has_empty = 1;
    
    	if (eb->h_next_leaf_blk == 0ULL &&
    	    le16_to_cpu(right_leaf_el->l_next_free_rec) == 1) {
    		/*
    		 * We have to update i_last_eb_blk during the meta
    		 * data delete.
    		 */
    
    		ret = ocfs2_journal_access(handle, inode, et_root_bh,
    
    					   OCFS2_JOURNAL_ACCESS_WRITE);
    		if (ret) {
    			mlog_errno(ret);
    			goto out;
    		}
    
    		del_right_subtree = 1;
    	}
    
    	/*
    	 * Getting here with an empty extent in the right path implies
    	 * that it's the rightmost path and will be deleted.
    	 */
    	BUG_ON(right_has_empty && !del_right_subtree);
    
    	ret = ocfs2_journal_access(handle, inode, root_bh,
    				   OCFS2_JOURNAL_ACCESS_WRITE);
    	if (ret) {
    		mlog_errno(ret);
    		goto out;
    	}
    
    	for(i = subtree_index + 1; i < path_num_items(right_path); i++) {
    		ret = ocfs2_journal_access(handle, inode,
    					   right_path->p_node[i].bh,
    					   OCFS2_JOURNAL_ACCESS_WRITE);
    		if (ret) {
    			mlog_errno(ret);
    			goto out;
    		}
    
    		ret = ocfs2_journal_access(handle, inode,
    					   left_path->p_node[i].bh,
    					   OCFS2_JOURNAL_ACCESS_WRITE);
    		if (ret) {
    			mlog_errno(ret);
    			goto out;
    		}
    	}
    
    	if (!right_has_empty) {
    		/*
    		 * Only do this if we're moving a real
    		 * record. Otherwise, the action is delayed until
    		 * after removal of the right path in which case we
    		 * can do a simple shift to remove the empty extent.
    		 */
    		ocfs2_rotate_leaf(left_leaf_el, &right_leaf_el->l_recs[0]);
    		memset(&right_leaf_el->l_recs[0], 0,
    		       sizeof(struct ocfs2_extent_rec));
    	}
    	if (eb->h_next_leaf_blk == 0ULL) {
    		/*
    		 * Move recs over to get rid of empty extent, decrease
    		 * next_free. This is allowed to remove the last
    		 * extent in our leaf (setting l_next_free_rec to
    		 * zero) - the delete code below won't care.
    		 */
    		ocfs2_remove_empty_extent(right_leaf_el);
    	}
    
    	ret = ocfs2_journal_dirty(handle, path_leaf_bh(left_path));
    	if (ret)
    		mlog_errno(ret);
    	ret = ocfs2_journal_dirty(handle, path_leaf_bh(right_path));
    	if (ret)
    		mlog_errno(ret);
    
    	if (del_right_subtree) {
    		ocfs2_unlink_subtree(inode, handle, left_path, right_path,
    				     subtree_index, dealloc);
    		ocfs2_update_edge_lengths(inode, handle, left_path);
    
    		eb = (struct ocfs2_extent_block *)path_leaf_bh(left_path)->b_data;
    
    		ocfs2_set_last_eb_blk(et, le64_to_cpu(eb->h_blkno));
    
    
    		/*
    		 * Removal of the extent in the left leaf was skipped
    		 * above so we could delete the right path
    		 * 1st.
    		 */
    		if (right_has_empty)
    			ocfs2_remove_empty_extent(left_leaf_el);
    
    
    		ret = ocfs2_journal_dirty(handle, et_root_bh);
    
    		if (ret)
    			mlog_errno(ret);
    
    		*deleted = 1;
    	} else
    		ocfs2_complete_edge_insert(inode, handle, left_path, right_path,
    					   subtree_index);
    
    out:
    	return ret;
    }
    
    /*
     * Given a full path, determine what cpos value would return us a path
     * containing the leaf immediately to the right of the current one.
     *
     * Will return zero if the path passed in is already the rightmost path.
     *
     * This looks similar, but is subtly different to
     * ocfs2_find_cpos_for_left_leaf().
     */
    static int ocfs2_find_cpos_for_right_leaf(struct super_block *sb,
    					  struct ocfs2_path *path, u32 *cpos)
    {
    	int i, j, ret = 0;
    	u64 blkno;
    	struct ocfs2_extent_list *el;
    
    	*cpos = 0;
    
    	if (path->p_tree_depth == 0)
    		return 0;
    
    	blkno = path_leaf_bh(path)->b_blocknr;
    
    	/* Start at the tree node just above the leaf and work our way up. */
    	i = path->p_tree_depth - 1;
    	while (i >= 0) {
    		int next_free;
    
    		el = path->p_node[i].el;
    
    		/*
    		 * Find the extent record just after the one in our
    		 * path.
    		 */
    		next_free = le16_to_cpu(el->l_next_free_rec);
    		for(j = 0; j < le16_to_cpu(el->l_next_free_rec); j++) {
    			if (le64_to_cpu(el->l_recs[j].e_blkno) == blkno) {
    				if (j == (next_free - 1)) {
    					if (i == 0) {
    						/*
    						 * We've determined that the
    						 * path specified is already
    						 * the rightmost one - return a
    						 * cpos of zero.
    						 */
    						goto out;
    					}
    					/*
    					 * The rightmost record points to our
    					 * leaf - we need to travel up the
    					 * tree one level.
    					 */
    					goto next_node;
    				}
    
    				*cpos = le32_to_cpu(el->l_recs[j + 1].e_cpos);
    				goto out;
    			}
    		}
    
    		/*
    		 * If we got here, we never found a valid node where
    		 * the tree indicated one should be.
    		 */
    		ocfs2_error(sb,
    			    "Invalid extent tree at extent block %llu\n",
    			    (unsigned long long)blkno);
    		ret = -EROFS;
    		goto out;
    
    next_node:
    		blkno = path->p_node[i].bh->b_blocknr;
    		i--;
    	}
    
    out:
    	return ret;
    }
    
    static int ocfs2_rotate_rightmost_leaf_left(struct inode *inode,
    					    handle_t *handle,
    					    struct buffer_head *bh,
    					    struct ocfs2_extent_list *el)
    {
    	int ret;
    
    	if (!ocfs2_is_empty_extent(&el->l_recs[0]))
    		return 0;
    
    	ret = ocfs2_journal_access(handle, inode, bh,
    				   OCFS2_JOURNAL_ACCESS_WRITE);
    	if (ret) {
    		mlog_errno(ret);
    		goto out;
    	}
    
    	ocfs2_remove_empty_extent(el);
    
    	ret = ocfs2_journal_dirty(handle, bh);
    	if (ret)
    		mlog_errno(ret);
    
    out:
    	return ret;
    }
    
    static int __ocfs2_rotate_tree_left(struct inode *inode,
    				    handle_t *handle, int orig_credits,
    				    struct ocfs2_path *path,
    				    struct ocfs2_cached_dealloc_ctxt *dealloc,
    
    				    struct ocfs2_path **empty_extent_path,
    				    struct ocfs2_extent_tree *et)
    
    {
    	int ret, subtree_root, deleted;
    	u32 right_cpos;
    	struct ocfs2_path *left_path = NULL;
    	struct ocfs2_path *right_path = NULL;
    
    	BUG_ON(!ocfs2_is_empty_extent(&(path_leaf_el(path)->l_recs[0])));
    
    	*empty_extent_path = NULL;
    
    	ret = ocfs2_find_cpos_for_right_leaf(inode->i_sb, path,
    					     &right_cpos);
    	if (ret) {
    		mlog_errno(ret);
    		goto out;
    	}
    
    	left_path = ocfs2_new_path(path_root_bh(path),
    				   path_root_el(path));
    	if (!left_path) {
    		ret = -ENOMEM;
    		mlog_errno(ret);
    		goto out;
    	}
    
    	ocfs2_cp_path(left_path, path);
    
    	right_path = ocfs2_new_path(path_root_bh(path),
    				    path_root_el(path));
    	if (!right_path) {
    		ret = -ENOMEM;
    		mlog_errno(ret);
    		goto out;
    	}
    
    	while (right_cpos) {
    		ret = ocfs2_find_path(inode, right_path, right_cpos);
    		if (ret) {
    			mlog_errno(ret);
    			goto out;
    		}
    
    		subtree_root = ocfs2_find_subtree_root(inode, left_path,
    						       right_path);
    
    		mlog(0, "Subtree root at index %d (blk %llu, depth %d)\n",
    		     subtree_root,
    		     (unsigned long long)
    		     right_path->p_node[subtree_root].bh->b_blocknr,
    		     right_path->p_tree_depth);
    
    		ret = ocfs2_extend_rotate_transaction(handle, subtree_root,
    						      orig_credits, left_path);
    		if (ret) {
    			mlog_errno(ret);
    			goto out;
    		}
    
    
    		/*
    		 * Caller might still want to make changes to the
    		 * tree root, so re-add it to the journal here.
    		 */
    		ret = ocfs2_journal_access(handle, inode,
    					   path_root_bh(left_path),
    					   OCFS2_JOURNAL_ACCESS_WRITE);
    		if (ret) {
    			mlog_errno(ret);
    			goto out;
    		}
    
    
    		ret = ocfs2_rotate_subtree_left(inode, handle, left_path,
    						right_path, subtree_root,
    
    						dealloc, &deleted, et);
    
    		if (ret == -EAGAIN) {
    			/*
    			 * The rotation has to temporarily stop due to
    			 * the right subtree having an empty
    			 * extent. Pass it back to the caller for a
    			 * fixup.
    			 */
    			*empty_extent_path = right_path;
    			right_path = NULL;
    			goto out;
    		}
    		if (ret) {
    			mlog_errno(ret);
    			goto out;
    		}
    
    		/*
    		 * The subtree rotate might have removed records on
    		 * the rightmost edge. If so, then rotation is
    		 * complete.
    		 */
    		if (deleted)
    			break;
    
    		ocfs2_mv_path(left_path, right_path);
    
    		ret = ocfs2_find_cpos_for_right_leaf(inode->i_sb, left_path,
    						     &right_cpos);
    		if (ret) {
    			mlog_errno(ret);
    			goto out;
    		}
    	}
    
    out:
    	ocfs2_free_path(right_path);
    	ocfs2_free_path(left_path);
    
    	return ret;
    }
    
    static int ocfs2_remove_rightmost_path(struct inode *inode, handle_t *handle,
    
    				struct ocfs2_path *path,
    				struct ocfs2_cached_dealloc_ctxt *dealloc,
    				struct ocfs2_extent_tree *et)
    
    {
    	int ret, subtree_index;
    	u32 cpos;
    	struct ocfs2_path *left_path = NULL;
    	struct ocfs2_extent_block *eb;
    	struct ocfs2_extent_list *el;
    
    
    
    	ret = et->eops->sanity_check(inode, et);
    	if (ret)
    		goto out;
    
    	/*
    	 * There's two ways we handle this depending on
    	 * whether path is the only existing one.
    	 */
    	ret = ocfs2_extend_rotate_transaction(handle, 0,
    					      handle->h_buffer_credits,
    					      path);
    	if (ret) {
    		mlog_errno(ret);
    		goto out;
    	}
    
    	ret = ocfs2_journal_access_path(inode, handle, path);
    	if (ret) {
    		mlog_errno(ret);
    		goto out;
    	}
    
    	ret = ocfs2_find_cpos_for_left_leaf(inode->i_sb, path, &cpos);
    	if (ret) {
    		mlog_errno(ret);
    		goto out;
    	}
    
    	if (cpos) {
    		/*
    		 * We have a path to the left of this one - it needs
    		 * an update too.
    		 */
    		left_path = ocfs2_new_path(path_root_bh(path),
    					   path_root_el(path));
    		if (!left_path) {
    			ret = -ENOMEM;
    			mlog_errno(ret);
    			goto out;
    		}
    
    		ret = ocfs2_find_path(inode, left_path, cpos);
    		if (ret) {
    			mlog_errno(ret);
    			goto out;
    		}
    
    		ret = ocfs2_journal_access_path(inode, handle, left_path);
    		if (ret) {
    			mlog_errno(ret);
    			goto out;
    		}
    
    		subtree_index = ocfs2_find_subtree_root(inode, left_path, path);
    
    		ocfs2_unlink_subtree(inode, handle, left_path, path,
    				     subtree_index, dealloc);
    		ocfs2_update_edge_lengths(inode, handle, left_path);
    
    		eb = (struct ocfs2_extent_block *)path_leaf_bh(left_path)->b_data;
    
    		ocfs2_set_last_eb_blk(et, le64_to_cpu(eb->h_blkno));
    
    	} else {
    		/*
    		 * 'path' is also the leftmost path which
    		 * means it must be the only one. This gets
    		 * handled differently because we want to
    		 * revert the inode back to having extents
    		 * in-line.
    		 */
    		ocfs2_unlink_path(inode, handle, dealloc, path, 1);
    
    
    		el->l_tree_depth = 0;
    		el->l_next_free_rec = 0;
    		memset(&el->l_recs[0], 0, sizeof(struct ocfs2_extent_rec));
    
    
    		ocfs2_set_last_eb_blk(et, 0);
    
    	}
    
    	ocfs2_journal_dirty(handle, path_root_bh(path));
    
    out:
    	ocfs2_free_path(left_path);
    	return ret;
    }
    
    /*
     * Left rotation of btree records.
     *
     * In many ways, this is (unsurprisingly) the opposite of right
     * rotation. We start at some non-rightmost path containing an empty
     * extent in the leaf block. The code works its way to the rightmost
     * path by rotating records to the left in every subtree.
     *
     * This is used by any code which reduces the number of extent records
     * in a leaf. After removal, an empty record should be placed in the
     * leftmost list position.
     *
     * This won't handle a length update of the rightmost path records if
     * the rightmost tree leaf record is removed so the caller is
     * responsible for detecting and correcting that.
     */
    static int ocfs2_rotate_tree_left(struct inode *inode, handle_t *handle,
    				  struct ocfs2_path *path,
    
    				  struct ocfs2_cached_dealloc_ctxt *dealloc,
    				  struct ocfs2_extent_tree *et)
    
    {
    	int ret, orig_credits = handle->h_buffer_credits;
    	struct ocfs2_path *tmp_path = NULL, *restart_path = NULL;
    	struct ocfs2_extent_block *eb;
    	struct ocfs2_extent_list *el;
    
    	el = path_leaf_el(path);
    	if (!ocfs2_is_empty_extent(&el->l_recs[0]))
    		return 0;
    
    	if (path->p_tree_depth == 0) {
    rightmost_no_delete:
    		/*
    
    		 * Inline extents. This is trivially handled, so do
    
    		 * it up front.
    		 */
    		ret = ocfs2_rotate_rightmost_leaf_left(inode, handle,
    						       path_leaf_bh(path),
    						       path_leaf_el(path));
    		if (ret)
    			mlog_errno(ret);
    		goto out;
    	}
    
    	/*
    	 * Handle rightmost branch now. There's several cases:
    	 *  1) simple rotation leaving records in there. That's trivial.
    	 *  2) rotation requiring a branch delete - there's no more
    	 *     records left. Two cases of this:
    	 *     a) There are branches to the left.
    	 *     b) This is also the leftmost (the only) branch.
    	 *
    	 *  1) is handled via ocfs2_rotate_rightmost_leaf_left()
    	 *  2a) we need the left branch so that we can update it with the unlink
    	 *  2b) we need to bring the inode back to inline extents.
    	 */
    
    	eb = (struct ocfs2_extent_block *)path_leaf_bh(path)->b_data;
    	el = &eb->h_list;
    	if (eb->h_next_leaf_blk == 0) {
    		/*
    		 * This gets a bit tricky if we're going to delete the
    		 * rightmost path. Get the other cases out of the way
    		 * 1st.
    		 */
    		if (le16_to_cpu(el->l_next_free_rec) > 1)
    			goto rightmost_no_delete;
    
    		if (le16_to_cpu(el->l_next_free_rec) == 0) {
    			ret = -EIO;
    			ocfs2_error(inode->i_sb,
    				    "Inode %llu has empty extent block at %llu",
    				    (unsigned long long)OCFS2_I(inode)->ip_blkno,
    				    (unsigned long long)le64_to_cpu(eb->h_blkno));
    			goto out;
    		}
    
    		/*
    		 * XXX: The caller can not trust "path" any more after
    		 * this as it will have been deleted. What do we do?
    		 *
    		 * In theory the rotate-for-merge code will never get
    		 * here because it'll always ask for a rotate in a
    		 * nonempty list.
    		 */
    
    		ret = ocfs2_remove_rightmost_path(inode, handle, path,
    
    		if (ret)
    			mlog_errno(ret);
    		goto out;
    	}
    
    	/*
    	 * Now we can loop, remembering the path we get from -EAGAIN
    	 * and restarting from there.
    	 */
    try_rotate:
    	ret = __ocfs2_rotate_tree_left(inode, handle, orig_credits, path,
    
    				       dealloc, &restart_path, et);
    
    	if (ret && ret != -EAGAIN) {
    		mlog_errno(ret);
    		goto out;
    	}
    
    	while (ret == -EAGAIN) {
    		tmp_path = restart_path;
    		restart_path = NULL;
    
    		ret = __ocfs2_rotate_tree_left(inode, handle, orig_credits,
    					       tmp_path, dealloc,
    
    					       &restart_path, et);
    
    		if (ret && ret != -EAGAIN) {
    			mlog_errno(ret);
    			goto out;
    		}
    
    		ocfs2_free_path(tmp_path);
    		tmp_path = NULL;
    
    		if (ret == 0)
    			goto try_rotate;
    	}
    
    out:
    	ocfs2_free_path(tmp_path);
    	ocfs2_free_path(restart_path);
    	return ret;
    }
    
    static void ocfs2_cleanup_merge(struct ocfs2_extent_list *el,
    				int index)
    {
    	struct ocfs2_extent_rec *rec = &el->l_recs[index];
    	unsigned int size;
    
    	if (rec->e_leaf_clusters == 0) {
    		/*
    		 * We consumed all of the merged-from record. An empty
    		 * extent cannot exist anywhere but the 1st array
    		 * position, so move things over if the merged-from
    		 * record doesn't occupy that position.
    		 *
    		 * This creates a new empty extent so the caller
    		 * should be smart enough to have removed any existing
    		 * ones.
    		 */
    		if (index > 0) {
    			BUG_ON(ocfs2_is_empty_extent(&el->l_recs[0]));
    			size = index * sizeof(struct ocfs2_extent_rec);
    			memmove(&el->l_recs[1], &el->l_recs[0], size);
    		}
    
    		/*
    		 * Always memset - the caller doesn't check whether it
    		 * created an empty extent, so there could be junk in
    		 * the other fields.
    		 */
    		memset(&el->l_recs[0], 0, sizeof(struct ocfs2_extent_rec));
    	}
    }
    
    
    static int ocfs2_get_right_path(struct inode *inode,
    				struct ocfs2_path *left_path,
    				struct ocfs2_path **ret_right_path)
    {
    	int ret;
    	u32 right_cpos;
    	struct ocfs2_path *right_path = NULL;
    	struct ocfs2_extent_list *left_el;
    
    	*ret_right_path = NULL;
    
    	/* This function shouldn't be called for non-trees. */
    	BUG_ON(left_path->p_tree_depth == 0);
    
    	left_el = path_leaf_el(left_path);
    	BUG_ON(left_el->l_next_free_rec != left_el->l_count);
    
    	ret = ocfs2_find_cpos_for_right_leaf(inode->i_sb, left_path,
    					     &right_cpos);
    	if (ret) {
    		mlog_errno(ret);
    		goto out;
    	}
    
    	/* This function shouldn't be called for the rightmost leaf. */
    	BUG_ON(right_cpos == 0);
    
    	right_path = ocfs2_new_path(path_root_bh(left_path),
    				    path_root_el(left_path));
    	if (!right_path) {
    		ret = -ENOMEM;
    		mlog_errno(ret);
    		goto out;
    	}
    
    	ret = ocfs2_find_path(inode, right_path, right_cpos);
    	if (ret) {
    		mlog_errno(ret);
    		goto out;
    	}
    
    	*ret_right_path = right_path;
    out:
    	if (ret)
    		ocfs2_free_path(right_path);
    	return ret;
    }
    
    
    /*
     * Remove split_rec clusters from the record at index and merge them
    
     * onto the beginning of the record "next" to it.
     * For index < l_count - 1, the next means the extent rec at index + 1.
     * For index == l_count - 1, the "next" means the 1st extent rec of the
     * next extent block.
    
    static int ocfs2_merge_rec_right(struct inode *inode,
    				 struct ocfs2_path *left_path,
    				 handle_t *handle,
    				 struct ocfs2_extent_rec *split_rec,
    				 int index)
    
    	int ret, next_free, i;
    
    	unsigned int split_clusters = le16_to_cpu(split_rec->e_leaf_clusters);