Skip to content
Snippets Groups Projects
xfs_log.c 105 KiB
Newer Older
  • Learn to ignore specific revisions
  • Linus Torvalds's avatar
    Linus Torvalds committed
    /*
    
     * Copyright (c) 2000-2005 Silicon Graphics, Inc.
     * All Rights Reserved.
    
    Linus Torvalds's avatar
    Linus Torvalds committed
     *
    
     * This program is free software; you can redistribute it and/or
     * modify it under the terms of the GNU General Public License as
    
    Linus Torvalds's avatar
    Linus Torvalds committed
     * published by the Free Software Foundation.
     *
    
     * This program is distributed in the hope that it would be useful,
     * but WITHOUT ANY WARRANTY; without even the implied warranty of
     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     * GNU General Public License for more details.
    
    Linus Torvalds's avatar
    Linus Torvalds committed
     *
    
     * You should have received a copy of the GNU General Public License
     * along with this program; if not, write the Free Software Foundation,
     * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
    
    Linus Torvalds's avatar
    Linus Torvalds committed
     */
    #include "xfs.h"
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    #include "xfs_types.h"
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    #include "xfs_log.h"
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    #include "xfs_trans.h"
    
    #include "xfs_sb.h"
    #include "xfs_ag.h"
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    #include "xfs_mount.h"
    #include "xfs_error.h"
    #include "xfs_log_priv.h"
    #include "xfs_buf_item.h"
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    #include "xfs_alloc_btree.h"
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    #include "xfs_log_recover.h"
    #include "xfs_trans_priv.h"
    
    #include "xfs_dinode.h"
    #include "xfs_inode.h"
    #include "xfs_rw.h"
    
    #include "xfs_trace.h"
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    kmem_zone_t	*xfs_log_ticket_zone;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    /* Local miscellaneous function prototypes */
    
    STATIC int	 xlog_commit_record(struct log *log, struct xlog_ticket *ticket,
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    				    xlog_in_core_t **, xfs_lsn_t *);
    STATIC xlog_t *  xlog_alloc_log(xfs_mount_t	*mp,
    				xfs_buftarg_t	*log_target,
    				xfs_daddr_t	blk_offset,
    				int		num_bblks);
    
    STATIC int	 xlog_space_left(struct log *log, atomic64_t *head);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    STATIC int	 xlog_sync(xlog_t *log, xlog_in_core_t *iclog);
    
    STATIC void	 xlog_dealloc_log(xlog_t *log);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    /* local state machine functions */
    STATIC void xlog_state_done_syncing(xlog_in_core_t *iclog, int);
    STATIC void xlog_state_do_callback(xlog_t *log,int aborted, xlog_in_core_t *iclog);
    STATIC int  xlog_state_get_iclog_space(xlog_t		*log,
    				       int		len,
    				       xlog_in_core_t	**iclog,
    				       xlog_ticket_t	*ticket,
    				       int		*continued_write,
    				       int		*logoffsetp);
    STATIC int  xlog_state_release_iclog(xlog_t		*log,
    				     xlog_in_core_t	*iclog);
    STATIC void xlog_state_switch_iclogs(xlog_t		*log,
    				     xlog_in_core_t *iclog,
    				     int		eventual_size);
    STATIC void xlog_state_want_sync(xlog_t	*log, xlog_in_core_t *iclog);
    
    /* local functions to manipulate grant head */
    STATIC int  xlog_grant_log_space(xlog_t		*log,
    				 xlog_ticket_t	*xtic);
    
    STATIC void xlog_grant_push_ail(struct log	*log,
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    				int		need_bytes);
    STATIC void xlog_regrant_reserve_log_space(xlog_t	 *log,
    					   xlog_ticket_t *ticket);
    STATIC int xlog_regrant_write_log_space(xlog_t		*log,
    					 xlog_ticket_t  *ticket);
    STATIC void xlog_ungrant_log_space(xlog_t	 *log,
    				   xlog_ticket_t *ticket);
    
    
    STATIC void	xlog_verify_dest_ptr(xlog_t *log, char *ptr);
    
    STATIC void	xlog_verify_grant_tail(struct log *log);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    STATIC void	xlog_verify_iclog(xlog_t *log, xlog_in_core_t *iclog,
    				  int count, boolean_t syncing);
    STATIC void	xlog_verify_tail_lsn(xlog_t *log, xlog_in_core_t *iclog,
    				     xfs_lsn_t tail_lsn);
    #else
    #define xlog_verify_dest_ptr(a,b)
    
    #define xlog_verify_grant_tail(a)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    #define xlog_verify_iclog(a,b,c,d)
    #define xlog_verify_tail_lsn(a,b,c)
    #endif
    
    
    STATIC int	xlog_iclogs_empty(xlog_t *log);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    xlog_grant_sub_space(
    	struct log	*log,
    
    	int64_t	head_val = atomic64_read(head);
    	int64_t new, old;
    
    		xlog_crack_grant_head_val(head_val, &cycle, &space);
    
    		space -= bytes;
    		if (space < 0) {
    			space += log->l_logsize;
    			cycle--;
    		}
    
    		old = head_val;
    		new = xlog_assign_grant_head_val(cycle, space);
    		head_val = atomic64_cmpxchg(head, old, new);
    	} while (head_val != old);
    
    xlog_grant_add_space(
    	struct log	*log,
    
    	int64_t	head_val = atomic64_read(head);
    	int64_t new, old;
    
    		xlog_crack_grant_head_val(head_val, &cycle, &space);
    
    		tmp = log->l_logsize - space;
    		if (tmp > bytes)
    			space += bytes;
    		else {
    			space = bytes - tmp;
    			cycle++;
    		}
    
    		old = head_val;
    		new = xlog_assign_grant_head_val(cycle, space);
    		head_val = atomic64_cmpxchg(head, old, new);
    	} while (head_val != old);
    
    static void
    xlog_tic_reset_res(xlog_ticket_t *tic)
    {
    	tic->t_res_num = 0;
    	tic->t_res_arr_sum = 0;
    	tic->t_res_num_ophdrs = 0;
    }
    
    static void
    xlog_tic_add_region(xlog_ticket_t *tic, uint len, uint type)
    {
    	if (tic->t_res_num == XLOG_TIC_LEN_MAX) {
    		/* add to overflow and start again */
    		tic->t_res_o_flow += tic->t_res_arr_sum;
    		tic->t_res_num = 0;
    		tic->t_res_arr_sum = 0;
    	}
    
    	tic->t_res_arr[tic->t_res_num].r_len = len;
    	tic->t_res_arr[tic->t_res_num].r_type = type;
    	tic->t_res_arr_sum += len;
    	tic->t_res_num++;
    }
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    /*
     * NOTES:
     *
     *	1. currblock field gets updated at startup and after in-core logs
     *		marked as with WANT_SYNC.
     */
    
    /*
     * This routine is called when a user of a log manager ticket is done with
     * the reservation.  If the ticket was ever used, then a commit record for
     * the associated transaction is written out as a log operation header with
     * no data.  The flag XLOG_TIC_INITED is set when the first write occurs with
     * a given ticket.  If the ticket was one with a permanent reservation, then
     * a few operations are done differently.  Permanent reservation tickets by
     * default don't release the reservation.  They just commit the current
     * transaction with the belief that the reservation is still needed.  A flag
     * must be passed in before permanent reservations are actually released.
     * When these type of tickets are not released, they need to be set into
     * the inited state again.  By doing this, a start record will be written
     * out when the next write occurs.
     */
    xfs_lsn_t
    
    xfs_log_done(
    	struct xfs_mount	*mp,
    	struct xlog_ticket	*ticket,
    	struct xlog_in_core	**iclog,
    	uint			flags)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    {
    
    	struct log		*log = mp->m_log;
    	xfs_lsn_t		lsn = 0;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    	if (XLOG_FORCED_SHUTDOWN(log) ||
    	    /*
    	     * If nothing was ever written, don't write out commit record.
    	     * If we get an error, just continue and give back the log ticket.
    	     */
    	    (((ticket->t_flags & XLOG_TIC_INITED) == 0) &&
    
    	     (xlog_commit_record(log, ticket, iclog, &lsn)))) {
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		lsn = (xfs_lsn_t) -1;
    		if (ticket->t_flags & XLOG_TIC_PERM_RESERV) {
    			flags |= XFS_LOG_REL_PERM_RESERV;
    		}
    	}
    
    
    	if ((ticket->t_flags & XLOG_TIC_PERM_RESERV) == 0 ||
    	    (flags & XFS_LOG_REL_PERM_RESERV)) {
    
    		trace_xfs_log_done_nonperm(log, ticket);
    
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		/*
    
    		 * Release ticket if not permanent reservation or a specific
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		 * request has been made to release a permanent reservation.
    		 */
    		xlog_ungrant_log_space(log, ticket);
    
    		xfs_log_ticket_put(ticket);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	} else {
    
    		trace_xfs_log_done_perm(log, ticket);
    
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		xlog_regrant_reserve_log_space(log, ticket);
    
    		/* If this ticket was a permanent reservation and we aren't
    		 * trying to release it, reset the inited flags; so next time
    		 * we write, a start record will be written out.
    		 */
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		ticket->t_flags |= XLOG_TIC_INITED;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    	return lsn;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    /*
     * Attaches a new iclog I/O completion callback routine during
     * transaction commit.  If the log is in error state, a non-zero
     * return code is handed back and the caller is responsible for
     * executing the callback at an appropriate time.
     */
    int
    
    xfs_log_notify(
    	struct xfs_mount	*mp,
    	struct xlog_in_core	*iclog,
    	xfs_log_callback_t	*cb)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    {
    
    Eric Sandeen's avatar
    Eric Sandeen committed
    	int	abortflg;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    	spin_lock(&iclog->ic_callback_lock);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	abortflg = (iclog->ic_state & XLOG_STATE_IOERROR);
    	if (!abortflg) {
    		ASSERT_ALWAYS((iclog->ic_state == XLOG_STATE_ACTIVE) ||
    			      (iclog->ic_state == XLOG_STATE_WANT_SYNC));
    		cb->cb_next = NULL;
    		*(iclog->ic_callback_tail) = cb;
    		iclog->ic_callback_tail = &(cb->cb_next);
    	}
    
    	spin_unlock(&iclog->ic_callback_lock);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	return abortflg;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    int
    
    xfs_log_release_iclog(
    	struct xfs_mount	*mp,
    	struct xlog_in_core	*iclog)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    {
    
    	if (xlog_state_release_iclog(mp->m_log, iclog)) {
    
    		xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	}
    
    	return 0;
    }
    
    /*
     *  1. Reserve an amount of on-disk log space and return a ticket corresponding
     *	to the reservation.
     *  2. Potentially, push buffers at tail of log to disk.
     *
     * Each reservation is going to reserve extra space for a log record header.
     * When writes happen to the on-disk log, we don't subtract the length of the
     * log record header from any reservation.  By wasting space in each
     * reservation, we prevent over allocation problems.
     */
    int
    
    xfs_log_reserve(
    	struct xfs_mount	*mp,
    	int		 	unit_bytes,
    	int		 	cnt,
    	struct xlog_ticket	**ticket,
    	__uint8_t	 	client,
    	uint		 	flags,
    	uint		 	t_type)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    {
    
    	struct log		*log = mp->m_log;
    	struct xlog_ticket	*internal_ticket;
    	int			retval = 0;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    	ASSERT(client == XFS_TRANSACTION || client == XFS_LOG);
    
    	if (XLOG_FORCED_SHUTDOWN(log))
    		return XFS_ERROR(EIO);
    
    	XFS_STATS_INC(xs_try_logspace);
    
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	if (*ticket != NULL) {
    		ASSERT(flags & XFS_LOG_PERM_RESERV);
    
    		internal_ticket = *ticket;
    
    		/*
    		 * this is a new transaction on the ticket, so we need to
    		 * change the transaction ID so that the next transaction has a
    		 * different TID in the log. Just add one to the existing tid
    		 * so that we can see chains of rolling transactions in the log
    		 * easily.
    		 */
    		internal_ticket->t_tid++;
    
    
    		trace_xfs_log_reserve(log, internal_ticket);
    
    
    		xlog_grant_push_ail(log, internal_ticket->t_unit_res);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		retval = xlog_regrant_write_log_space(log, internal_ticket);
    	} else {
    		/* may sleep if need to allocate more tickets */
    
    		internal_ticket = xlog_ticket_alloc(log, unit_bytes, cnt,
    
    						  client, flags,
    						  KM_SLEEP|KM_MAYFAIL);
    
    		if (!internal_ticket)
    			return XFS_ERROR(ENOMEM);
    
    		internal_ticket->t_trans_type = t_type;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		*ticket = internal_ticket;
    
    
    		trace_xfs_log_reserve(log, internal_ticket);
    
    
    		xlog_grant_push_ail(log,
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    				    (internal_ticket->t_unit_res *
    				     internal_ticket->t_cnt));
    		retval = xlog_grant_log_space(log, internal_ticket);
    	}
    
    	return retval;
    }	/* xfs_log_reserve */
    
    
    /*
     * Mount a log filesystem
     *
     * mp		- ubiquitous xfs mount point structure
     * log_target	- buftarg of on-disk log device
     * blk_offset	- Start block # where block size is 512 bytes (BBSIZE)
     * num_bblocks	- Number of BBSIZE blocks in on-disk log
     *
     * Return error or zero.
     */
    int
    
    xfs_log_mount(
    	xfs_mount_t	*mp,
    	xfs_buftarg_t	*log_target,
    	xfs_daddr_t	blk_offset,
    	int		num_bblks)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    {
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	if (!(mp->m_flags & XFS_MOUNT_NORECOVERY))
    
    		xfs_notice(mp, "Mounting Filesystem");
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	else {
    
    		xfs_notice(mp,
    "Mounting filesystem in no-recovery mode.  Filesystem will be inconsistent.");
    
    		ASSERT(mp->m_flags & XFS_MOUNT_RDONLY);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	}
    
    	mp->m_log = xlog_alloc_log(mp, log_target, blk_offset, num_bblks);
    
    	if (IS_ERR(mp->m_log)) {
    		error = -PTR_ERR(mp->m_log);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    	/*
    	 * Initialize the AIL now we have a log.
    	 */
    	error = xfs_trans_ail_init(mp);
    	if (error) {
    
    		xfs_warn(mp, "AIL initialisation failed: error %d", error);
    
    		goto out_free_log;
    
    	mp->m_log->l_ailp = mp->m_ail;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	/*
    	 * skip log recovery on a norecovery mount.  pretend it all
    	 * just worked.
    	 */
    	if (!(mp->m_flags & XFS_MOUNT_NORECOVERY)) {
    
    		int	readonly = (mp->m_flags & XFS_MOUNT_RDONLY);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    		if (readonly)
    
    			mp->m_flags &= ~XFS_MOUNT_RDONLY;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    		if (readonly)
    
    			mp->m_flags |= XFS_MOUNT_RDONLY;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		if (error) {
    
    			xfs_warn(mp, "log mount/recovery failed: error %d",
    				error);
    
    			goto out_destroy_ail;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		}
    	}
    
    	/* Normal transactions can now occur */
    	mp->m_log->l_flags &= ~XLOG_ACTIVE_RECOVERY;
    
    
    	/*
    	 * Now the log has been fully initialised and we know were our
    	 * space grant counters are, we can initialise the permanent ticket
    	 * needed for delayed logging to work.
    	 */
    	xlog_cil_init_post_recovery(mp->m_log);
    
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	return 0;
    
    
    out_destroy_ail:
    	xfs_trans_ail_destroy(mp);
    out_free_log:
    	xlog_dealloc_log(mp->m_log);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    /*
     * Finish the recovery of the file system.  This is separate from
     * the xfs_log_mount() call, because it depends on the code in
     * xfs_mountfs() to read in the root and real-time bitmap inodes
     * between calling xfs_log_mount() and here.
     *
     * mp		- ubiquitous xfs mount point structure
     */
    int
    
    xfs_log_mount_finish(xfs_mount_t *mp)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    {
    	int	error;
    
    	if (!(mp->m_flags & XFS_MOUNT_NORECOVERY))
    
    		error = xlog_recover_finish(mp->m_log);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	else {
    		error = 0;
    
    		ASSERT(mp->m_flags & XFS_MOUNT_RDONLY);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	}
    
    	return error;
    }
    
    /*
     * Final log writes as part of unmount.
     *
     * Mark the filesystem clean as unmount happens.  Note that during relocation
     * this routine needs to be executed as part of source-bag while the
     * deallocation must not be done until source-end.
     */
    
    /*
     * Unmount record used to have a string "Unmount filesystem--" in the
     * data section where the "Un" was really a magic number (XLOG_UNMOUNT_TYPE).
     * We just write the magic number now since that particular field isn't
     * currently architecture converted and "nUmount" is a bit foo.
     * As far as I know, there weren't any dependencies on the old behaviour.
     */
    
    int
    xfs_log_unmount_write(xfs_mount_t *mp)
    {
    	xlog_t		 *log = mp->m_log;
    	xlog_in_core_t	 *iclog;
    #ifdef DEBUG
    	xlog_in_core_t	 *first_iclog;
    #endif
    
    	xlog_ticket_t	*tic = NULL;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	xfs_lsn_t	 lsn;
    	int		 error;
    
    	/*
    	 * Don't write out unmount record on read-only mounts.
    	 * Or, if we are doing a forced umount (typically because of IO errors).
    	 */
    
    	if (mp->m_flags & XFS_MOUNT_RDONLY)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		return 0;
    
    
    	error = _xfs_log_force(mp, XFS_LOG_SYNC, NULL);
    
    	ASSERT(error || !(XLOG_FORCED_SHUTDOWN(log)));
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    #ifdef DEBUG
    	first_iclog = iclog = log->l_iclog;
    	do {
    		if (!(iclog->ic_state & XLOG_STATE_IOERROR)) {
    			ASSERT(iclog->ic_state & XLOG_STATE_ACTIVE);
    			ASSERT(iclog->ic_offset == 0);
    		}
    		iclog = iclog->ic_next;
    	} while (iclog != first_iclog);
    #endif
    	if (! (XLOG_FORCED_SHUTDOWN(log))) {
    
    		error = xfs_log_reserve(mp, 600, 1, &tic,
    					XFS_LOG, 0, XLOG_UNMOUNT_REC_TYPE);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		if (!error) {
    
    			/* the data section must be 32 bit size aligned */
    			struct {
    			    __uint16_t magic;
    			    __uint16_t pad1;
    			    __uint32_t pad2; /* may as well make it 64 bits */
    			} magic = {
    				.magic = XLOG_UNMOUNT_TYPE,
    			};
    			struct xfs_log_iovec reg = {
    
    				.i_addr = &magic,
    
    				.i_len = sizeof(magic),
    				.i_type = XLOG_REG_TYPE_UNMOUNT,
    			};
    			struct xfs_log_vec vec = {
    				.lv_niovecs = 1,
    				.lv_iovecp = &reg,
    			};
    
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    			/* remove inited flag */
    
    			tic->t_flags = 0;
    			error = xlog_write(log, &vec, tic, &lsn,
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    					   NULL, XLOG_UNMOUNT_TRANS);
    			/*
    			 * At this point, we're umounting anyway,
    			 * so there's no point in transitioning log state
    			 * to IOERROR. Just continue...
    			 */
    		}
    
    
    		if (error)
    			xfs_alert(mp, "%s: unmount record failed", __func__);
    
    Eric Sandeen's avatar
    Eric Sandeen committed
    		spin_lock(&log->l_icloglock);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		iclog = log->l_iclog;
    
    		atomic_inc(&iclog->ic_refcnt);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		xlog_state_want_sync(log, iclog);
    
    		spin_unlock(&log->l_icloglock);
    
    		error = xlog_state_release_iclog(log, iclog);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    Eric Sandeen's avatar
    Eric Sandeen committed
    		spin_lock(&log->l_icloglock);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		if (!(iclog->ic_state == XLOG_STATE_ACTIVE ||
    		      iclog->ic_state == XLOG_STATE_DIRTY)) {
    			if (!XLOG_FORCED_SHUTDOWN(log)) {
    
    				xlog_wait(&iclog->ic_force_wait,
    							&log->l_icloglock);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    			} else {
    
    Eric Sandeen's avatar
    Eric Sandeen committed
    				spin_unlock(&log->l_icloglock);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    			}
    		} else {
    
    Eric Sandeen's avatar
    Eric Sandeen committed
    			spin_unlock(&log->l_icloglock);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		}
    
    			trace_xfs_log_umount_write(log, tic);
    
    			xlog_ungrant_log_space(log, tic);
    
    			xfs_log_ticket_put(tic);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	} else {
    		/*
    		 * We're already in forced_shutdown mode, couldn't
    		 * even attempt to write out the unmount transaction.
    		 *
    		 * Go through the motions of sync'ing and releasing
    		 * the iclog, even though no I/O will actually happen,
    
    		 * we need to wait for other log I/Os that may already
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		 * be in progress.  Do this as a separate section of
    		 * code so we'll know if we ever get stuck here that
    		 * we're in this odd situation of trying to unmount
    		 * a file system that went into forced_shutdown as
    		 * the result of an unmount..
    		 */
    
    Eric Sandeen's avatar
    Eric Sandeen committed
    		spin_lock(&log->l_icloglock);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		iclog = log->l_iclog;
    
    		atomic_inc(&iclog->ic_refcnt);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    		xlog_state_want_sync(log, iclog);
    
    		spin_unlock(&log->l_icloglock);
    
    		error =  xlog_state_release_iclog(log, iclog);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    Eric Sandeen's avatar
    Eric Sandeen committed
    		spin_lock(&log->l_icloglock);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    		if ( ! (   iclog->ic_state == XLOG_STATE_ACTIVE
    			|| iclog->ic_state == XLOG_STATE_DIRTY
    			|| iclog->ic_state == XLOG_STATE_IOERROR) ) {
    
    
    				xlog_wait(&iclog->ic_force_wait,
    							&log->l_icloglock);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		} else {
    
    Eric Sandeen's avatar
    Eric Sandeen committed
    			spin_unlock(&log->l_icloglock);
    
    	return error;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    }	/* xfs_log_unmount_write */
    
    /*
     * Deallocate log structures for unmount/relocation.
    
     *
     * We need to stop the aild from running before we destroy
     * and deallocate the log as the aild references the log.
    
    Linus Torvalds's avatar
    Linus Torvalds committed
     */
    void
    
    xfs_log_unmount(xfs_mount_t *mp)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    {
    
    	xfs_trans_ail_destroy(mp);
    
    void
    xfs_log_item_init(
    	struct xfs_mount	*mp,
    	struct xfs_log_item	*item,
    	int			type,
    	struct xfs_item_ops	*ops)
    {
    	item->li_mountp = mp;
    	item->li_ailp = mp->m_ail;
    	item->li_type = type;
    	item->li_ops = ops;
    
    	item->li_lv = NULL;
    
    	INIT_LIST_HEAD(&item->li_ail);
    	INIT_LIST_HEAD(&item->li_cil);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    /*
     * Write region vectors to log.  The write happens using the space reservation
     * of the ticket (tic).  It is not a requirement that all writes for a given
    
     * transaction occur with one call to xfs_log_write(). However, it is important
     * to note that the transaction reservation code makes an assumption about the
     * number of log headers a transaction requires that may be violated if you
     * don't pass all the transaction vectors in one call....
    
    Linus Torvalds's avatar
    Linus Torvalds committed
     */
    int
    
    xfs_log_write(
    	struct xfs_mount	*mp,
    	struct xfs_log_iovec	reg[],
    	int			nentries,
    	struct xlog_ticket	*tic,
    	xfs_lsn_t		*start_lsn)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    {
    
    	struct log		*log = mp->m_log;
    	int			error;
    
    	struct xfs_log_vec	vec = {
    		.lv_niovecs = nentries,
    		.lv_iovecp = reg,
    	};
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    	if (XLOG_FORCED_SHUTDOWN(log))
    		return XFS_ERROR(EIO);
    
    
    	error = xlog_write(log, &vec, tic, start_lsn, NULL, 0);
    
    		xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    void
    xfs_log_move_tail(xfs_mount_t	*mp,
    		  xfs_lsn_t	tail_lsn)
    {
    	xlog_ticket_t	*tic;
    	xlog_t		*log = mp->m_log;
    
    	int		need_bytes, free_bytes;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    	if (XLOG_FORCED_SHUTDOWN(log))
    		return;
    
    
    	if (tail_lsn == 0)
    		tail_lsn = atomic64_read(&log->l_last_sync_lsn);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    	/* tail_lsn == 1 implies that we weren't passed a valid value.  */
    	if (tail_lsn != 1)
    		atomic64_set(&log->l_tail_lsn, tail_lsn);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    	if (!list_empty_careful(&log->l_writeq)) {
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    #ifdef DEBUG
    		if (log->l_flags & XLOG_ACTIVE_RECOVERY)
    			panic("Recovery problem");
    #endif
    
    		spin_lock(&log->l_grant_write_lock);
    
    		free_bytes = xlog_space_left(log, &log->l_grant_write_head);
    
    		list_for_each_entry(tic, &log->l_writeq, t_queue) {
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    			ASSERT(tic->t_flags & XLOG_TIC_PERM_RESERV);
    
    			if (free_bytes < tic->t_unit_res && tail_lsn != 1)
    				break;
    			tail_lsn = 0;
    			free_bytes -= tic->t_unit_res;
    
    			trace_xfs_log_regrant_write_wake_up(log, tic);
    
    			wake_up(&tic->t_wait);
    
    		spin_unlock(&log->l_grant_write_lock);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	}
    
    	if (!list_empty_careful(&log->l_reserveq)) {
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    #ifdef DEBUG
    		if (log->l_flags & XLOG_ACTIVE_RECOVERY)
    			panic("Recovery problem");
    #endif
    
    		spin_lock(&log->l_grant_reserve_lock);
    
    		free_bytes = xlog_space_left(log, &log->l_grant_reserve_head);
    
    		list_for_each_entry(tic, &log->l_reserveq, t_queue) {
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    			if (tic->t_flags & XLOG_TIC_PERM_RESERV)
    				need_bytes = tic->t_unit_res*tic->t_cnt;
    			else
    				need_bytes = tic->t_unit_res;
    			if (free_bytes < need_bytes && tail_lsn != 1)
    				break;
    			tail_lsn = 0;
    			free_bytes -= need_bytes;
    
    			trace_xfs_log_grant_wake_up(log, tic);
    
    			wake_up(&tic->t_wait);
    
    		spin_unlock(&log->l_grant_reserve_lock);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	}
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    /*
     * Determine if we have a transaction that has gone to disk
    
     * that needs to be covered. To begin the transition to the idle state
     * firstly the log needs to be idle (no AIL and nothing in the iclogs).
     * If we are then in a state where covering is needed, the caller is informed
     * that dummy transactions are required to move the log into the idle state.
     *
     * Because this is called as part of the sync process, we should also indicate
     * that dummy transactions should be issued in anything but the covered or
     * idle states. This ensures that the log tail is accurately reflected in
     * the log at the end of the sync, hence if a crash occurrs avoids replay
     * of transactions where the metadata is already on disk.
    
    Linus Torvalds's avatar
    Linus Torvalds committed
     */
    int
    xfs_log_need_covered(xfs_mount_t *mp)
    {
    
    	int		needed = 0;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	xlog_t		*log = mp->m_log;
    
    
    	if (!xfs_fs_writable(mp))
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		return 0;
    
    
    Eric Sandeen's avatar
    Eric Sandeen committed
    	spin_lock(&log->l_icloglock);
    
    	switch (log->l_covered_state) {
    	case XLOG_STATE_COVER_DONE:
    	case XLOG_STATE_COVER_DONE2:
    	case XLOG_STATE_COVER_IDLE:
    		break;
    	case XLOG_STATE_COVER_NEED:
    	case XLOG_STATE_COVER_NEED2:
    
    		if (!xfs_ail_min_lsn(log->l_ailp) &&
    
    		    xlog_iclogs_empty(log)) {
    			if (log->l_covered_state == XLOG_STATE_COVER_NEED)
    				log->l_covered_state = XLOG_STATE_COVER_DONE;
    			else
    				log->l_covered_state = XLOG_STATE_COVER_DONE2;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		}
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		needed = 1;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	}
    
    Eric Sandeen's avatar
    Eric Sandeen committed
    	spin_unlock(&log->l_icloglock);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    }
    
    /******************************************************************************
     *
     *	local routines
     *
     ******************************************************************************
     */
    
    /* xfs_trans_tail_ail returns 0 when there is nothing in the list.
     * The log manager must keep track of the last LR which was committed
     * to disk.  The lsn of this LR will become the new tail_lsn whenever
     * xfs_trans_tail_ail returns 0.  If we don't do this, we run into
     * the situation where stuff could be written into the log but nothing
     * was ever in the AIL when asked.  Eventually, we panic since the
     * tail hits the head.
     *
     * We may be holding the log iclog lock upon entering this routine.
     */
    xfs_lsn_t
    
    xlog_assign_tail_lsn(
    	struct xfs_mount	*mp)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    {
    
    	xfs_lsn_t		tail_lsn;
    	struct log		*log = mp->m_log;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    	tail_lsn = xfs_ail_min_lsn(mp->m_ail);
    
    	if (!tail_lsn)
    		tail_lsn = atomic64_read(&log->l_last_sync_lsn);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    	atomic64_set(&log->l_tail_lsn, tail_lsn);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	return tail_lsn;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    /*
     * Return the space in the log between the tail and the head.  The head
     * is passed in the cycle/bytes formal parms.  In the special case where
     * the reserve head has wrapped passed the tail, this calculation is no
     * longer valid.  In this case, just return 0 which means there is no space
     * in the log.  This works for all places where this function is called
     * with the reserve head.  Of course, if the write head were to ever
     * wrap the tail, we should blow up.  Rather than catch this case here,
     * we depend on other ASSERTions in other parts of the code.   XXXmiken
     *
     * This code also handles the case where the reservation head is behind
     * the tail.  The details of this case are described below, but the end
     * result is that we return the size of the log as the amount of space left.
     */
    
    STATIC int
    
    xlog_space_left(
    	struct log	*log,
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    {
    
    	int		free_bytes;
    	int		tail_bytes;
    	int		tail_cycle;
    	int		head_cycle;
    	int		head_bytes;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    	xlog_crack_grant_head(head, &head_cycle, &head_bytes);
    
    	xlog_crack_atomic_lsn(&log->l_tail_lsn, &tail_cycle, &tail_bytes);
    	tail_bytes = BBTOB(tail_bytes);
    
    	if (tail_cycle == head_cycle && head_bytes >= tail_bytes)
    		free_bytes = log->l_logsize - (head_bytes - tail_bytes);
    	else if (tail_cycle + 1 < head_cycle)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		return 0;
    
    	else if (tail_cycle < head_cycle) {
    		ASSERT(tail_cycle == (head_cycle - 1));
    		free_bytes = tail_bytes - head_bytes;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	} else {
    		/*
    		 * The reservation head is behind the tail.
    		 * In this case we just want to return the size of the
    		 * log as the amount of space left.
    		 */
    
    		xfs_alert(log->l_mp,
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    			"xlog_space_left: head behind tail\n"
    			"  tail_cycle = %d, tail_bytes = %d\n"
    			"  GH   cycle = %d, GH   bytes = %d",
    
    			tail_cycle, tail_bytes, head_cycle, head_bytes);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		ASSERT(0);
    		free_bytes = log->l_logsize;
    	}
    	return free_bytes;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    /*
     * Log function which is called when an io completes.
     *
     * The log manager needs its own routine, in order to control what
     * happens with the buffer after the write completes.
     */
    void
    xlog_iodone(xfs_buf_t *bp)
    {
    	xlog_in_core_t	*iclog;
    	xlog_t		*l;
    	int		aborted;
    
    	iclog = XFS_BUF_FSPRIVATE(bp, xlog_in_core_t *);
    	ASSERT(XFS_BUF_FSPRIVATE2(bp, unsigned long) == (unsigned long) 2);
    	XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1);
    	aborted = 0;
    	l = iclog->ic_log;
    
    	/*
    	 * Race to shutdown the filesystem if we see an error.
    	 */
    	if (XFS_TEST_ERROR((XFS_BUF_GETERROR(bp)), l->l_mp,
    			XFS_ERRTAG_IODONE_IOERR, XFS_RANDOM_IODONE_IOERR)) {
    		xfs_ioerror_alert("xlog_iodone", l->l_mp, bp, XFS_BUF_ADDR(bp));
    		XFS_BUF_STALE(bp);
    
    		xfs_force_shutdown(l->l_mp, SHUTDOWN_LOG_IO_ERROR);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		/*
    		 * This flag will be propagated to the trans-committed
    		 * callback routines to let them know that the log-commit
    		 * didn't succeed.
    		 */
    		aborted = XFS_LI_ABORTED;
    	} else if (iclog->ic_state & XLOG_STATE_IOERROR) {
    		aborted = XFS_LI_ABORTED;
    	}
    
    
    	/* log I/O is always issued ASYNC */
    	ASSERT(XFS_BUF_ISASYNC(bp));
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	xlog_state_done_syncing(iclog, aborted);
    
    	/*
    	 * do not reference the buffer (bp) here as we could race
    	 * with it being freed after writing the unmount record to the
    	 * log.
    	 */
    
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    }	/* xlog_iodone */
    
    /*
     * Return size of each in-core log record buffer.
     *
    
    Malcolm Parsons's avatar
    Malcolm Parsons committed
     * All machines get 8 x 32kB buffers by default, unless tuned otherwise.
    
    Linus Torvalds's avatar
    Linus Torvalds committed
     *
     * If the filesystem blocksize is too large, we may need to choose a
     * larger size since the directory code currently logs entire blocks.
     */
    
    STATIC void
    xlog_get_iclog_buffer_size(xfs_mount_t	*mp,
    			   xlog_t	*log)
    {
    	int size;
    	int xhdrs;
    
    
    	if (mp->m_logbufs <= 0)
    		log->l_iclog_bufs = XLOG_MAX_ICLOGS;
    	else
    
    		log->l_iclog_bufs = mp->m_logbufs;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    	/*
    	 * Buffer size passed in from mount system call.
    	 */
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		size = log->l_iclog_size = mp->m_logbsize;
    		log->l_iclog_size_log = 0;
    		while (size != 1) {
    			log->l_iclog_size_log++;
    			size >>= 1;
    		}
    
    
    		if (xfs_sb_version_haslogv2(&mp->m_sb)) {
    
    Malcolm Parsons's avatar
    Malcolm Parsons committed
    			/* # headers = size / 32k
    			 * one header holds cycles from 32k of data
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    			 */
    
    			xhdrs = mp->m_logbsize / XLOG_HEADER_CYCLE_SIZE;
    			if (mp->m_logbsize % XLOG_HEADER_CYCLE_SIZE)
    				xhdrs++;
    			log->l_iclog_hsize = xhdrs << BBSHIFT;
    			log->l_iclog_heads = xhdrs;
    		} else {
    			ASSERT(mp->m_logbsize <= XLOG_BIG_RECORD_BSIZE);
    			log->l_iclog_hsize = BBSIZE;
    			log->l_iclog_heads = 1;
    		}
    
    Malcolm Parsons's avatar
    Malcolm Parsons committed
    	/* All machines use 32kB buffers by default. */
    
    	log->l_iclog_size = XLOG_BIG_RECORD_BSIZE;
    	log->l_iclog_size_log = XLOG_BIG_RECORD_BSHIFT;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    	/* the default log size is 16k or 32k which is one header sector */
    	log->l_iclog_hsize = BBSIZE;
    	log->l_iclog_heads = 1;
    
    
    done:
    	/* are we being asked to make the sizes selected above visible? */
    
    	if (mp->m_logbufs == 0)
    		mp->m_logbufs = log->l_iclog_bufs;
    	if (mp->m_logbsize == 0)
    		mp->m_logbsize = log->l_iclog_size;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    }	/* xlog_get_iclog_buffer_size */
    
    
    /*
     * This routine initializes some of the log structure for a given mount point.
     * Its primary purpose is to fill in enough, so recovery can occur.  However,
     * some other stuff may be filled in too.
     */
    STATIC xlog_t *
    xlog_alloc_log(xfs_mount_t	*mp,
    	       xfs_buftarg_t	*log_target,
    	       xfs_daddr_t	blk_offset,
    	       int		num_bblks)
    {
    	xlog_t			*log;
    	xlog_rec_header_t	*head;
    	xlog_in_core_t		**iclogp;
    	xlog_in_core_t		*iclog, *prev_iclog=NULL;
    	xfs_buf_t		*bp;
    	int			i;
    
    	int			error = ENOMEM;
    
    	uint			log2_size = 0;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    	log = kmem_zalloc(sizeof(xlog_t), KM_MAYFAIL);