Skip to content
Snippets Groups Projects
alloc.c 186 KiB
Newer Older
  • Learn to ignore specific revisions
  • /* -*- mode: c; c-basic-offset: 8; -*-
     * vim: noexpandtab sw=8 ts=8 sts=0:
     *
     * alloc.c
     *
     * Extent allocs and frees
     *
     * Copyright (C) 2002, 2004 Oracle.  All rights reserved.
     *
     * This program is free software; you can redistribute it and/or
     * modify it under the terms of the GNU General Public
     * License as published by the Free Software Foundation; either
     * version 2 of the License, or (at your option) any later version.
     *
     * This program is distributed in the hope that it will be useful,
     * but WITHOUT ANY WARRANTY; without even the implied warranty of
     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     * General Public License for more details.
     *
     * You should have received a copy of the GNU General Public
     * License along with this program; if not, write to the
     * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
     * Boston, MA 021110-1307, USA.
     */
    
    #include <linux/fs.h>
    #include <linux/types.h>
    #include <linux/slab.h>
    #include <linux/highmem.h>
    
    #include <linux/swap.h>
    
    
    #define MLOG_MASK_PREFIX ML_DISK_ALLOC
    #include <cluster/masklog.h>
    
    #include "ocfs2.h"
    
    #include "alloc.h"
    
    #include "aops.h"
    
    Joel Becker's avatar
    Joel Becker committed
    #include "blockcheck.h"
    
    #include "dlmglue.h"
    #include "extent_map.h"
    #include "inode.h"
    #include "journal.h"
    #include "localalloc.h"
    #include "suballoc.h"
    #include "sysfile.h"
    #include "file.h"
    #include "super.h"
    #include "uptodate.h"
    
    #include "buffer_head_io.h"
    
    
    /*
     * Operations for a specific extent tree type.
     *
     * To implement an on-disk btree (extent tree) type in ocfs2, add
     * an ocfs2_extent_tree_operations structure and the matching
    
     * ocfs2_init_<thingy>_extent_tree() function.  That's pretty much it
    
     * for the allocation portion of the extent tree.
     */
    
    struct ocfs2_extent_tree_operations {
    
    	/*
    	 * last_eb_blk is the block number of the right most leaf extent
    	 * block.  Most on-disk structures containing an extent tree store
    	 * this value for fast access.  The ->eo_set_last_eb_blk() and
    	 * ->eo_get_last_eb_blk() operations access this value.  They are
    	 *  both required.
    	 */
    
    	void (*eo_set_last_eb_blk)(struct ocfs2_extent_tree *et,
    				   u64 blkno);
    	u64 (*eo_get_last_eb_blk)(struct ocfs2_extent_tree *et);
    
    
    	/*
    	 * The on-disk structure usually keeps track of how many total
    	 * clusters are stored in this extent tree.  This function updates
    	 * that value.  new_clusters is the delta, and must be
    	 * added to the total.  Required.
    	 */
    
    	void (*eo_update_clusters)(struct inode *inode,
    				   struct ocfs2_extent_tree *et,
    				   u32 new_clusters);
    
    
    	/*
    	 * If ->eo_insert_check() exists, it is called before rec is
    	 * inserted into the extent tree.  It is optional.
    	 */
    
    	int (*eo_insert_check)(struct inode *inode,
    			       struct ocfs2_extent_tree *et,
    			       struct ocfs2_extent_rec *rec);
    
    	int (*eo_sanity_check)(struct inode *inode, struct ocfs2_extent_tree *et);
    
    	/*
    	 * --------------------------------------------------------------
    	 * The remaining are internal to ocfs2_extent_tree and don't have
    	 * accessor functions
    	 */
    
    	/*
    	 * ->eo_fill_root_el() takes et->et_object and sets et->et_root_el.
    	 * It is required.
    	 */
    
    	void (*eo_fill_root_el)(struct ocfs2_extent_tree *et);
    
    
    	/*
    	 * ->eo_fill_max_leaf_clusters sets et->et_max_leaf_clusters if
    	 * it exists.  If it does not, et->et_max_leaf_clusters is set
    	 * to 0 (unlimited).  Optional.
    	 */
    
    	void (*eo_fill_max_leaf_clusters)(struct inode *inode,
    					  struct ocfs2_extent_tree *et);
    
    /*
     * Pre-declare ocfs2_dinode_et_ops so we can use it as a sanity check
     * in the methods.
     */
    static u64 ocfs2_dinode_get_last_eb_blk(struct ocfs2_extent_tree *et);
    static void ocfs2_dinode_set_last_eb_blk(struct ocfs2_extent_tree *et,
    					 u64 blkno);
    static void ocfs2_dinode_update_clusters(struct inode *inode,
    					 struct ocfs2_extent_tree *et,
    					 u32 clusters);
    static int ocfs2_dinode_insert_check(struct inode *inode,
    				     struct ocfs2_extent_tree *et,
    				     struct ocfs2_extent_rec *rec);
    static int ocfs2_dinode_sanity_check(struct inode *inode,
    				     struct ocfs2_extent_tree *et);
    static void ocfs2_dinode_fill_root_el(struct ocfs2_extent_tree *et);
    static struct ocfs2_extent_tree_operations ocfs2_dinode_et_ops = {
    	.eo_set_last_eb_blk	= ocfs2_dinode_set_last_eb_blk,
    	.eo_get_last_eb_blk	= ocfs2_dinode_get_last_eb_blk,
    	.eo_update_clusters	= ocfs2_dinode_update_clusters,
    	.eo_insert_check	= ocfs2_dinode_insert_check,
    	.eo_sanity_check	= ocfs2_dinode_sanity_check,
    	.eo_fill_root_el	= ocfs2_dinode_fill_root_el,
    };
    
    static void ocfs2_dinode_set_last_eb_blk(struct ocfs2_extent_tree *et,
    					 u64 blkno)
    {
    
    	struct ocfs2_dinode *di = et->et_object;
    
    	BUG_ON(et->et_ops != &ocfs2_dinode_et_ops);
    
    	di->i_last_eb_blk = cpu_to_le64(blkno);
    }
    
    static u64 ocfs2_dinode_get_last_eb_blk(struct ocfs2_extent_tree *et)
    {
    
    	struct ocfs2_dinode *di = et->et_object;
    
    	BUG_ON(et->et_ops != &ocfs2_dinode_et_ops);
    
    	return le64_to_cpu(di->i_last_eb_blk);
    }
    
    static void ocfs2_dinode_update_clusters(struct inode *inode,
    					 struct ocfs2_extent_tree *et,
    					 u32 clusters)
    {
    
    	struct ocfs2_dinode *di = et->et_object;
    
    
    	le32_add_cpu(&di->i_clusters, clusters);
    	spin_lock(&OCFS2_I(inode)->ip_lock);
    	OCFS2_I(inode)->ip_clusters = le32_to_cpu(di->i_clusters);
    	spin_unlock(&OCFS2_I(inode)->ip_lock);
    }
    
    
    static int ocfs2_dinode_insert_check(struct inode *inode,
    				     struct ocfs2_extent_tree *et,
    				     struct ocfs2_extent_rec *rec)
    {
    	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
    
    	BUG_ON(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL);
    	mlog_bug_on_msg(!ocfs2_sparse_alloc(osb) &&
    			(OCFS2_I(inode)->ip_clusters != rec->e_cpos),
    			"Device %s, asking for sparse allocation: inode %llu, "
    			"cpos %u, clusters %u\n",
    			osb->dev_str,
    			(unsigned long long)OCFS2_I(inode)->ip_blkno,
    			rec->e_cpos,
    			OCFS2_I(inode)->ip_clusters);
    
    	return 0;
    }
    
    
    static int ocfs2_dinode_sanity_check(struct inode *inode,
    				     struct ocfs2_extent_tree *et)
    {
    
    	struct ocfs2_dinode *di = et->et_object;
    
    	BUG_ON(et->et_ops != &ocfs2_dinode_et_ops);
    
    	BUG_ON(!OCFS2_IS_VALID_DINODE(di));
    
    static void ocfs2_dinode_fill_root_el(struct ocfs2_extent_tree *et)
    {
    	struct ocfs2_dinode *di = et->et_object;
    
    	et->et_root_el = &di->id2.i_list;
    }
    
    
    static void ocfs2_xattr_value_fill_root_el(struct ocfs2_extent_tree *et)
    {
    	struct ocfs2_xattr_value_root *xv = et->et_object;
    
    	et->et_root_el = &xv->xr_list;
    }
    
    
    static void ocfs2_xattr_value_set_last_eb_blk(struct ocfs2_extent_tree *et,
    					      u64 blkno)
    {
    	struct ocfs2_xattr_value_root *xv =
    
    		(struct ocfs2_xattr_value_root *)et->et_object;
    
    
    	xv->xr_last_eb_blk = cpu_to_le64(blkno);
    }
    
    static u64 ocfs2_xattr_value_get_last_eb_blk(struct ocfs2_extent_tree *et)
    {
    	struct ocfs2_xattr_value_root *xv =
    
    		(struct ocfs2_xattr_value_root *) et->et_object;
    
    
    	return le64_to_cpu(xv->xr_last_eb_blk);
    }
    
    static void ocfs2_xattr_value_update_clusters(struct inode *inode,
    					      struct ocfs2_extent_tree *et,
    					      u32 clusters)
    {
    	struct ocfs2_xattr_value_root *xv =
    
    		(struct ocfs2_xattr_value_root *)et->et_object;
    
    
    	le32_add_cpu(&xv->xr_clusters, clusters);
    }
    
    
    static struct ocfs2_extent_tree_operations ocfs2_xattr_value_et_ops = {
    
    	.eo_set_last_eb_blk	= ocfs2_xattr_value_set_last_eb_blk,
    	.eo_get_last_eb_blk	= ocfs2_xattr_value_get_last_eb_blk,
    	.eo_update_clusters	= ocfs2_xattr_value_update_clusters,
    
    	.eo_fill_root_el	= ocfs2_xattr_value_fill_root_el,
    
    static void ocfs2_xattr_tree_fill_root_el(struct ocfs2_extent_tree *et)
    {
    	struct ocfs2_xattr_block *xb = et->et_object;
    
    	et->et_root_el = &xb->xb_attrs.xb_root.xt_list;
    }
    
    
    static void ocfs2_xattr_tree_fill_max_leaf_clusters(struct inode *inode,
    						    struct ocfs2_extent_tree *et)
    {
    	et->et_max_leaf_clusters =
    		ocfs2_clusters_for_bytes(inode->i_sb,
    					 OCFS2_MAX_XATTR_TREE_LEAF_SIZE);
    }
    
    
    static void ocfs2_xattr_tree_set_last_eb_blk(struct ocfs2_extent_tree *et,
    					     u64 blkno)
    {
    
    	struct ocfs2_xattr_block *xb = et->et_object;
    
    	struct ocfs2_xattr_tree_root *xt = &xb->xb_attrs.xb_root;
    
    	xt->xt_last_eb_blk = cpu_to_le64(blkno);
    }
    
    static u64 ocfs2_xattr_tree_get_last_eb_blk(struct ocfs2_extent_tree *et)
    {
    
    	struct ocfs2_xattr_block *xb = et->et_object;
    
    	struct ocfs2_xattr_tree_root *xt = &xb->xb_attrs.xb_root;
    
    	return le64_to_cpu(xt->xt_last_eb_blk);
    }
    
    static void ocfs2_xattr_tree_update_clusters(struct inode *inode,
    					     struct ocfs2_extent_tree *et,
    					     u32 clusters)
    {
    
    	struct ocfs2_xattr_block *xb = et->et_object;
    
    
    	le32_add_cpu(&xb->xb_attrs.xb_root.xt_clusters, clusters);
    }
    
    static struct ocfs2_extent_tree_operations ocfs2_xattr_tree_et_ops = {
    
    	.eo_set_last_eb_blk	= ocfs2_xattr_tree_set_last_eb_blk,
    	.eo_get_last_eb_blk	= ocfs2_xattr_tree_get_last_eb_blk,
    	.eo_update_clusters	= ocfs2_xattr_tree_update_clusters,
    
    	.eo_fill_root_el	= ocfs2_xattr_tree_fill_root_el,
    
    	.eo_fill_max_leaf_clusters = ocfs2_xattr_tree_fill_max_leaf_clusters,
    
    static void __ocfs2_init_extent_tree(struct ocfs2_extent_tree *et,
    				     struct inode *inode,
    				     struct buffer_head *bh,
    
    				     ocfs2_journal_access_func access,
    
    				     void *obj,
    				     struct ocfs2_extent_tree_operations *ops)
    
    	et->et_root_bh = bh;
    
    	et->et_root_journal_access = access;
    
    	if (!obj)
    		obj = (void *)bh->b_data;
    	et->et_object = obj;
    
    	if (!et->et_ops->eo_fill_max_leaf_clusters)
    		et->et_max_leaf_clusters = 0;
    	else
    		et->et_ops->eo_fill_max_leaf_clusters(inode, et);
    
    void ocfs2_init_dinode_extent_tree(struct ocfs2_extent_tree *et,
    				   struct inode *inode,
    				   struct buffer_head *bh)
    
    	__ocfs2_init_extent_tree(et, inode, bh, ocfs2_journal_access_di,
    				 NULL, &ocfs2_dinode_et_ops);
    
    void ocfs2_init_xattr_tree_extent_tree(struct ocfs2_extent_tree *et,
    
    	__ocfs2_init_extent_tree(et, inode, bh, ocfs2_journal_access_xb,
    				 NULL, &ocfs2_xattr_tree_et_ops);
    
    void ocfs2_init_xattr_value_extent_tree(struct ocfs2_extent_tree *et,
    					struct inode *inode,
    					struct buffer_head *bh,
    					struct ocfs2_xattr_value_root *xv)
    
    	__ocfs2_init_extent_tree(et, inode, bh, ocfs2_journal_access, xv,
    
    static inline void ocfs2_et_set_last_eb_blk(struct ocfs2_extent_tree *et,
    					    u64 new_last_eb_blk)
    
    	et->et_ops->eo_set_last_eb_blk(et, new_last_eb_blk);
    
    static inline u64 ocfs2_et_get_last_eb_blk(struct ocfs2_extent_tree *et)
    
    	return et->et_ops->eo_get_last_eb_blk(et);
    
    static inline void ocfs2_et_update_clusters(struct inode *inode,
    					    struct ocfs2_extent_tree *et,
    					    u32 clusters)
    {
    
    	et->et_ops->eo_update_clusters(inode, et, clusters);
    
    static inline int ocfs2_et_root_journal_access(handle_t *handle,
    					       struct inode *inode,
    					       struct ocfs2_extent_tree *et,
    					       int type)
    {
    	return et->et_root_journal_access(handle, inode, et->et_root_bh,
    					  type);
    }
    
    
    static inline int ocfs2_et_insert_check(struct inode *inode,
    					struct ocfs2_extent_tree *et,
    					struct ocfs2_extent_rec *rec)
    {
    	int ret = 0;
    
    	if (et->et_ops->eo_insert_check)
    		ret = et->et_ops->eo_insert_check(inode, et, rec);
    	return ret;
    }
    
    
    static inline int ocfs2_et_sanity_check(struct inode *inode,
    					struct ocfs2_extent_tree *et)
    
    	int ret = 0;
    
    	if (et->et_ops->eo_sanity_check)
    		ret = et->et_ops->eo_sanity_check(inode, et);
    	return ret;
    
    static void ocfs2_free_truncate_context(struct ocfs2_truncate_context *tc);
    
    static int ocfs2_cache_extent_block_free(struct ocfs2_cached_dealloc_ctxt *ctxt,
    					 struct ocfs2_extent_block *eb);
    
    /*
     * Structures which describe a path through a btree, and functions to
     * manipulate them.
     *
     * The idea here is to be as generic as possible with the tree
     * manipulation code.
     */
    struct ocfs2_path_item {
    	struct buffer_head		*bh;
    	struct ocfs2_extent_list	*el;
    };
    
    #define OCFS2_MAX_PATH_DEPTH	5
    
    struct ocfs2_path {
    
    	int				p_tree_depth;
    	ocfs2_journal_access_func	p_root_access;
    	struct ocfs2_path_item		p_node[OCFS2_MAX_PATH_DEPTH];
    
    #define path_root_bh(_path) ((_path)->p_node[0].bh)
    #define path_root_el(_path) ((_path)->p_node[0].el)
    
    #define path_root_access(_path)((_path)->p_root_access)
    
    #define path_leaf_bh(_path) ((_path)->p_node[(_path)->p_tree_depth].bh)
    #define path_leaf_el(_path) ((_path)->p_node[(_path)->p_tree_depth].el)
    #define path_num_items(_path) ((_path)->p_tree_depth + 1)
    
    /*
     * Reset the actual path elements so that we can re-use the structure
     * to build another path. Generally, this involves freeing the buffer
     * heads.
     */
    static void ocfs2_reinit_path(struct ocfs2_path *path, int keep_root)
    {
    	int i, start = 0, depth = 0;
    	struct ocfs2_path_item *node;
    
    	if (keep_root)
    		start = 1;
    
    	for(i = start; i < path_num_items(path); i++) {
    		node = &path->p_node[i];
    
    		brelse(node->bh);
    		node->bh = NULL;
    		node->el = NULL;
    	}
    
    	/*
    	 * Tree depth may change during truncate, or insert. If we're
    	 * keeping the root extent list, then make sure that our path
    	 * structure reflects the proper depth.
    	 */
    	if (keep_root)
    		depth = le16_to_cpu(path_root_el(path)->l_tree_depth);
    
    	else
    		path_root_access(path) = NULL;
    
    
    	path->p_tree_depth = depth;
    }
    
    static void ocfs2_free_path(struct ocfs2_path *path)
    {
    	if (path) {
    		ocfs2_reinit_path(path, 0);
    		kfree(path);
    	}
    }
    
    
    /*
     * All the elements of src into dest. After this call, src could be freed
     * without affecting dest.
     *
     * Both paths should have the same root. Any non-root elements of dest
     * will be freed.
     */
    static void ocfs2_cp_path(struct ocfs2_path *dest, struct ocfs2_path *src)
    {
    	int i;
    
    	BUG_ON(path_root_bh(dest) != path_root_bh(src));
    	BUG_ON(path_root_el(dest) != path_root_el(src));
    
    	BUG_ON(path_root_access(dest) != path_root_access(src));
    
    
    	ocfs2_reinit_path(dest, 1);
    
    	for(i = 1; i < OCFS2_MAX_PATH_DEPTH; i++) {
    		dest->p_node[i].bh = src->p_node[i].bh;
    		dest->p_node[i].el = src->p_node[i].el;
    
    		if (dest->p_node[i].bh)
    			get_bh(dest->p_node[i].bh);
    	}
    }
    
    
    /*
     * Make the *dest path the same as src and re-initialize src path to
     * have a root only.
     */
    static void ocfs2_mv_path(struct ocfs2_path *dest, struct ocfs2_path *src)
    {
    	int i;
    
    	BUG_ON(path_root_bh(dest) != path_root_bh(src));
    
    	BUG_ON(path_root_access(dest) != path_root_access(src));
    
    
    	for(i = 1; i < OCFS2_MAX_PATH_DEPTH; i++) {
    		brelse(dest->p_node[i].bh);
    
    		dest->p_node[i].bh = src->p_node[i].bh;
    		dest->p_node[i].el = src->p_node[i].el;
    
    		src->p_node[i].bh = NULL;
    		src->p_node[i].el = NULL;
    	}
    }
    
    /*
     * Insert an extent block at given index.
     *
     * This will not take an additional reference on eb_bh.
     */
    static inline void ocfs2_path_insert_eb(struct ocfs2_path *path, int index,
    					struct buffer_head *eb_bh)
    {
    	struct ocfs2_extent_block *eb = (struct ocfs2_extent_block *)eb_bh->b_data;
    
    	/*
    	 * Right now, no root bh is an extent block, so this helps
    	 * catch code errors with dinode trees. The assertion can be
    	 * safely removed if we ever need to insert extent block
    	 * structures at the root.
    	 */
    	BUG_ON(index == 0);
    
    	path->p_node[index].bh = eb_bh;
    	path->p_node[index].el = &eb->h_list;
    }
    
    static struct ocfs2_path *ocfs2_new_path(struct buffer_head *root_bh,
    
    					 struct ocfs2_extent_list *root_el,
    					 ocfs2_journal_access_func access)
    
    {
    	struct ocfs2_path *path;
    
    	BUG_ON(le16_to_cpu(root_el->l_tree_depth) >= OCFS2_MAX_PATH_DEPTH);
    
    	path = kzalloc(sizeof(*path), GFP_NOFS);
    	if (path) {
    		path->p_tree_depth = le16_to_cpu(root_el->l_tree_depth);
    		get_bh(root_bh);
    		path_root_bh(path) = root_bh;
    		path_root_el(path) = root_el;
    
    		path_root_access(path) = access;
    
    static struct ocfs2_path *ocfs2_new_path_from_path(struct ocfs2_path *path)
    {
    
    	return ocfs2_new_path(path_root_bh(path), path_root_el(path),
    			      path_root_access(path));
    
    }
    
    static struct ocfs2_path *ocfs2_new_path_from_et(struct ocfs2_extent_tree *et)
    {
    
    	return ocfs2_new_path(et->et_root_bh, et->et_root_el,
    			      et->et_root_journal_access);
    }
    
    /*
     * Journal the buffer at depth idx.  All idx>0 are extent_blocks,
     * otherwise it's the root_access function.
     *
     * I don't like the way this function's name looks next to
     * ocfs2_journal_access_path(), but I don't have a better one.
     */
    static int ocfs2_path_bh_journal_access(handle_t *handle,
    					struct inode *inode,
    					struct ocfs2_path *path,
    					int idx)
    {
    	ocfs2_journal_access_func access = path_root_access(path);
    
    	if (!access)
    		access = ocfs2_journal_access;
    
    	if (idx)
    		access = ocfs2_journal_access_eb;
    
    	return access(handle, inode, path->p_node[idx].bh,
    		      OCFS2_JOURNAL_ACCESS_WRITE);
    
    /*
     * Convenience function to journal all components in a path.
     */
    static int ocfs2_journal_access_path(struct inode *inode, handle_t *handle,
    				     struct ocfs2_path *path)
    {
    	int i, ret = 0;
    
    	if (!path)
    		goto out;
    
    	for(i = 0; i < path_num_items(path); i++) {
    
    		ret = ocfs2_path_bh_journal_access(handle, inode, path, i);
    
    		if (ret < 0) {
    			mlog_errno(ret);
    			goto out;
    		}
    	}
    
    out:
    	return ret;
    }
    
    
    /*
     * Return the index of the extent record which contains cluster #v_cluster.
     * -1 is returned if it was not found.
     *
     * Should work fine on interior and exterior nodes.
     */
    int ocfs2_search_extent_list(struct ocfs2_extent_list *el, u32 v_cluster)
    {
    	int ret = -1;
    	int i;
    	struct ocfs2_extent_rec *rec;
    	u32 rec_end, rec_start, clusters;
    
    	for(i = 0; i < le16_to_cpu(el->l_next_free_rec); i++) {
    		rec = &el->l_recs[i];
    
    		rec_start = le32_to_cpu(rec->e_cpos);
    		clusters = ocfs2_rec_clusters(el, rec);
    
    		rec_end = rec_start + clusters;
    
    		if (v_cluster >= rec_start && v_cluster < rec_end) {
    			ret = i;
    			break;
    		}
    	}
    
    	return ret;
    }
    
    
    enum ocfs2_contig_type {
    	CONTIG_NONE = 0,
    	CONTIG_LEFT,
    
    	CONTIG_RIGHT,
    	CONTIG_LEFTRIGHT,
    
    
    /*
     * NOTE: ocfs2_block_extent_contig(), ocfs2_extents_adjacent() and
     * ocfs2_extent_contig only work properly against leaf nodes!
     */
    
    static int ocfs2_block_extent_contig(struct super_block *sb,
    				     struct ocfs2_extent_rec *ext,
    				     u64 blkno)
    
    	u64 blk_end = le64_to_cpu(ext->e_blkno);
    
    	blk_end += ocfs2_clusters_to_blocks(sb,
    				    le16_to_cpu(ext->e_leaf_clusters));
    
    	return blkno == blk_end;
    
    static int ocfs2_extents_adjacent(struct ocfs2_extent_rec *left,
    				  struct ocfs2_extent_rec *right)
    {
    
    	u32 left_range;
    
    	left_range = le32_to_cpu(left->e_cpos) +
    		le16_to_cpu(left->e_leaf_clusters);
    
    	return (left_range == le32_to_cpu(right->e_cpos));
    
    }
    
    static enum ocfs2_contig_type
    	ocfs2_extent_contig(struct inode *inode,
    			    struct ocfs2_extent_rec *ext,
    			    struct ocfs2_extent_rec *insert_rec)
    {
    	u64 blkno = le64_to_cpu(insert_rec->e_blkno);
    
    
    	/*
    	 * Refuse to coalesce extent records with different flag
    	 * fields - we don't want to mix unwritten extents with user
    	 * data.
    	 */
    	if (ext->e_flags != insert_rec->e_flags)
    		return CONTIG_NONE;
    
    
    	if (ocfs2_extents_adjacent(ext, insert_rec) &&
    	    ocfs2_block_extent_contig(inode->i_sb, ext, blkno))
    			return CONTIG_RIGHT;
    
    	blkno = le64_to_cpu(ext->e_blkno);
    	if (ocfs2_extents_adjacent(insert_rec, ext) &&
    	    ocfs2_block_extent_contig(inode->i_sb, insert_rec, blkno))
    		return CONTIG_LEFT;
    
    	return CONTIG_NONE;
    }
    
    /*
     * NOTE: We can have pretty much any combination of contiguousness and
     * appending.
     *
     * The usefulness of APPEND_TAIL is more in that it lets us know that
     * we'll have to update the path to that leaf.
     */
    enum ocfs2_append_type {
    	APPEND_NONE = 0,
    	APPEND_TAIL,
    };
    
    
    enum ocfs2_split_type {
    	SPLIT_NONE = 0,
    	SPLIT_LEFT,
    	SPLIT_RIGHT,
    };
    
    
    struct ocfs2_insert_type {
    
    	enum ocfs2_split_type	ins_split;
    
    	enum ocfs2_append_type	ins_appending;
    	enum ocfs2_contig_type	ins_contig;
    	int			ins_contig_index;
    	int			ins_tree_depth;
    };
    
    
    struct ocfs2_merge_ctxt {
    	enum ocfs2_contig_type	c_contig_type;
    	int			c_has_empty_extent;
    	int			c_split_covers_rec;
    };
    
    
    static int ocfs2_validate_extent_block(struct super_block *sb,
    				       struct buffer_head *bh)
    {
    
    Joel Becker's avatar
    Joel Becker committed
    	int rc;
    
    	struct ocfs2_extent_block *eb =
    		(struct ocfs2_extent_block *)bh->b_data;
    
    
    	mlog(0, "Validating extent block %llu\n",
    	     (unsigned long long)bh->b_blocknr);
    
    
    Joel Becker's avatar
    Joel Becker committed
    	BUG_ON(!buffer_uptodate(bh));
    
    	/*
    	 * If the ecc fails, we return the error but otherwise
    	 * leave the filesystem running.  We know any error is
    	 * local to this block.
    	 */
    	rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &eb->h_check);
    
    	if (rc) {
    		mlog(ML_ERROR, "Checksum failed for extent block %llu\n",
    		     (unsigned long long)bh->b_blocknr);
    
    Joel Becker's avatar
    Joel Becker committed
    		return rc;
    
    Joel Becker's avatar
    Joel Becker committed
    
    	/*
    	 * Errors after here are fatal.
    	 */
    
    
    	if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) {
    		ocfs2_error(sb,
    			    "Extent block #%llu has bad signature %.*s",
    			    (unsigned long long)bh->b_blocknr, 7,
    			    eb->h_signature);
    		return -EINVAL;
    	}
    
    	if (le64_to_cpu(eb->h_blkno) != bh->b_blocknr) {
    		ocfs2_error(sb,
    			    "Extent block #%llu has an invalid h_blkno "
    			    "of %llu",
    			    (unsigned long long)bh->b_blocknr,
    			    (unsigned long long)le64_to_cpu(eb->h_blkno));
    		return -EINVAL;
    	}
    
    	if (le32_to_cpu(eb->h_fs_generation) != OCFS2_SB(sb)->fs_generation) {
    		ocfs2_error(sb,
    			    "Extent block #%llu has an invalid "
    			    "h_fs_generation of #%u",
    			    (unsigned long long)bh->b_blocknr,
    			    le32_to_cpu(eb->h_fs_generation));
    		return -EINVAL;
    	}
    
    	return 0;
    }
    
    int ocfs2_read_extent_block(struct inode *inode, u64 eb_blkno,
    			    struct buffer_head **bh)
    {
    	int rc;
    	struct buffer_head *tmp = *bh;
    
    
    	rc = ocfs2_read_block(inode, eb_blkno, &tmp,
    			      ocfs2_validate_extent_block);
    
    
    	/* If ocfs2_read_block() got us a new bh, pass it up. */
    
    /*
     * How many free extents have we got before we need more meta data?
     */
    int ocfs2_num_free_extents(struct ocfs2_super *osb,
    			   struct inode *inode,
    
    	struct ocfs2_extent_list *el = NULL;
    
    	struct ocfs2_extent_block *eb;
    	struct buffer_head *eb_bh = NULL;
    
    	u64 last_eb_blk = 0;
    
    	el = et->et_root_el;
    	last_eb_blk = ocfs2_et_get_last_eb_blk(et);
    
    		retval = ocfs2_read_extent_block(inode, last_eb_blk, &eb_bh);
    
    		if (retval < 0) {
    			mlog_errno(retval);
    			goto bail;
    		}
    		eb = (struct ocfs2_extent_block *) eb_bh->b_data;
    		el = &eb->h_list;
    
    
    	BUG_ON(el->l_tree_depth != 0);
    
    	retval = le16_to_cpu(el->l_count) - le16_to_cpu(el->l_next_free_rec);
    bail:
    
    	brelse(eb_bh);
    
    
    	mlog_exit(retval);
    	return retval;
    }
    
    /* expects array to already be allocated
     *
     * sets h_signature, h_blkno, h_suballoc_bit, h_suballoc_slot, and
     * l_count for you
     */
    static int ocfs2_create_new_meta_bhs(struct ocfs2_super *osb,
    
    				     struct inode *inode,
    				     int wanted,
    				     struct ocfs2_alloc_context *meta_ac,
    				     struct buffer_head *bhs[])
    {
    	int count, status, i;
    	u16 suballoc_bit_start;
    	u32 num_got;
    	u64 first_blkno;
    	struct ocfs2_extent_block *eb;
    
    	mlog_entry_void();
    
    	count = 0;
    	while (count < wanted) {
    		status = ocfs2_claim_metadata(osb,
    					      handle,
    					      meta_ac,
    					      wanted - count,
    					      &suballoc_bit_start,
    					      &num_got,
    					      &first_blkno);
    		if (status < 0) {
    			mlog_errno(status);
    			goto bail;
    		}
    
    		for(i = count;  i < (num_got + count); i++) {
    			bhs[i] = sb_getblk(osb->sb, first_blkno);
    			if (bhs[i] == NULL) {
    				status = -EIO;
    				mlog_errno(status);
    				goto bail;
    			}
    			ocfs2_set_new_buffer_uptodate(inode, bhs[i]);
    
    
    			status = ocfs2_journal_access_eb(handle, inode, bhs[i],
    							 OCFS2_JOURNAL_ACCESS_CREATE);
    
    			if (status < 0) {
    				mlog_errno(status);
    				goto bail;
    			}
    
    			memset(bhs[i]->b_data, 0, osb->sb->s_blocksize);
    			eb = (struct ocfs2_extent_block *) bhs[i]->b_data;
    			/* Ok, setup the minimal stuff here. */
    			strcpy(eb->h_signature, OCFS2_EXTENT_BLOCK_SIGNATURE);
    			eb->h_blkno = cpu_to_le64(first_blkno);
    			eb->h_fs_generation = cpu_to_le32(osb->fs_generation);
    			eb->h_suballoc_slot = cpu_to_le16(osb->slot_num);
    			eb->h_suballoc_bit = cpu_to_le16(suballoc_bit_start);
    			eb->h_list.l_count =
    				cpu_to_le16(ocfs2_extent_recs_per_eb(osb->sb));
    
    			suballoc_bit_start++;
    			first_blkno++;
    
    			/* We'll also be dirtied by the caller, so
    			 * this isn't absolutely necessary. */
    			status = ocfs2_journal_dirty(handle, bhs[i]);
    			if (status < 0) {
    				mlog_errno(status);
    				goto bail;
    			}
    		}
    
    		count += num_got;
    	}
    
    	status = 0;
    bail:
    	if (status < 0) {
    		for(i = 0; i < wanted; i++) {
    
    			brelse(bhs[i]);
    
    			bhs[i] = NULL;
    		}
    	}
    	mlog_exit(status);
    	return status;
    }
    
    
    /*
     * Helper function for ocfs2_add_branch() and ocfs2_shift_tree_depth().
     *
     * Returns the sum of the rightmost extent rec logical offset and
     * cluster count.
     *
     * ocfs2_add_branch() uses this to determine what logical cluster
     * value should be populated into the leftmost new branch records.
     *
     * ocfs2_shift_tree_depth() uses this to determine the # clusters
     * value for the new topmost tree record.
     */
    static inline u32 ocfs2_sum_rightmost_rec(struct ocfs2_extent_list  *el)
    {
    	int i;
    
    	i = le16_to_cpu(el->l_next_free_rec) - 1;
    
    	return le32_to_cpu(el->l_recs[i].e_cpos) +
    
    		ocfs2_rec_clusters(el, &el->l_recs[i]);
    
    /*
     * Add an entire tree branch to our inode. eb_bh is the extent block
     * to start at, if we don't want to start the branch at the dinode
     * structure.
     *
     * last_eb_bh is required as we have to update it's next_leaf pointer
     * for the new last extent block.
     *
     * the new branch will be 'empty' in the sense that every block will
    
     * contain a single record with cluster count == 0.
    
     */
    static int ocfs2_add_branch(struct ocfs2_super *osb,
    
    			    struct inode *inode,
    
    			    struct ocfs2_extent_tree *et,
    
    			    struct buffer_head *eb_bh,
    
    			    struct buffer_head **last_eb_bh,
    
    			    struct ocfs2_alloc_context *meta_ac)
    {
    	int status, new_blocks, i;
    	u64 next_blkno, new_last_eb_blk;
    	struct buffer_head *bh;
    	struct buffer_head **new_eb_bhs = NULL;
    	struct ocfs2_extent_block *eb;
    	struct ocfs2_extent_list  *eb_el;
    	struct ocfs2_extent_list  *el;
    
    	u32 new_cpos;
    
    	BUG_ON(!last_eb_bh || !*last_eb_bh);
    
    
    	if (eb_bh) {
    		eb = (struct ocfs2_extent_block *) eb_bh->b_data;
    		el = &eb->h_list;
    	} else
    
    		el = et->et_root_el;
    
    
    	/* we never add a branch to a leaf. */
    	BUG_ON(!el->l_tree_depth);