Skip to content
Snippets Groups Projects
buffer.c 88.5 KiB
Newer Older
  • Learn to ignore specific revisions
  • 			page_cache_release(page);
    			*pagep = NULL;
    
    			/*
    			 * prepare_write() may have instantiated a few blocks
    			 * outside i_size.  Trim these off again. Don't need
    			 * i_size_read because we hold i_mutex.
    			 */
    			if (pos + len > inode->i_size)
    				vmtruncate(inode, inode->i_size);
    		}
    	}
    
    out:
    	return status;
    }
    EXPORT_SYMBOL(block_write_begin);
    
    int block_write_end(struct file *file, struct address_space *mapping,
    			loff_t pos, unsigned len, unsigned copied,
    			struct page *page, void *fsdata)
    {
    	struct inode *inode = mapping->host;
    	unsigned start;
    
    	start = pos & (PAGE_CACHE_SIZE - 1);
    
    	if (unlikely(copied < len)) {
    		/*
    		 * The buffers that were written will now be uptodate, so we
    		 * don't have to worry about a readpage reading them and
    		 * overwriting a partial write. However if we have encountered
    		 * a short write and only partially written into a buffer, it
    		 * will not be marked uptodate, so a readpage might come in and
    		 * destroy our partial write.
    		 *
    		 * Do the simplest thing, and just treat any short write to a
    		 * non uptodate page as a zero-length write, and force the
    		 * caller to redo the whole thing.
    		 */
    		if (!PageUptodate(page))
    			copied = 0;
    
    		page_zero_new_buffers(page, start+copied, start+len);
    	}
    	flush_dcache_page(page);
    
    	/* This could be a short (even 0-length) commit */
    	__block_commit_write(inode, page, start, start+copied);
    
    	return copied;
    }
    EXPORT_SYMBOL(block_write_end);
    
    int generic_write_end(struct file *file, struct address_space *mapping,
    			loff_t pos, unsigned len, unsigned copied,
    			struct page *page, void *fsdata)
    {
    	struct inode *inode = mapping->host;
    
    
    	copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
    
    	/*
    	 * No need to use i_size_read() here, the i_size
    	 * cannot change under us because we hold i_mutex.
    	 *
    	 * But it's important to update i_size while still holding page lock:
    	 * page writeout could otherwise come in and zero beyond i_size.
    	 */
    	if (pos+copied > inode->i_size) {
    		i_size_write(inode, pos+copied);
    
    	/*
    	 * Don't mark the inode dirty under page lock. First, it unnecessarily
    	 * makes the holding time of page lock longer. Second, it forces lock
    	 * ordering of page lock and transaction start for journaling
    	 * filesystems.
    	 */
    	if (i_size_changed)
    		mark_inode_dirty(inode);
    
    
    	return copied;
    }
    EXPORT_SYMBOL(generic_write_end);
    
    
    /*
     * block_is_partially_uptodate checks whether buffers within a page are
     * uptodate or not.
     *
     * Returns true if all buffers which correspond to a file portion
     * we want to read are uptodate.
     */
    int block_is_partially_uptodate(struct page *page, read_descriptor_t *desc,
    					unsigned long from)
    {
    	struct inode *inode = page->mapping->host;
    	unsigned block_start, block_end, blocksize;
    	unsigned to;
    	struct buffer_head *bh, *head;
    	int ret = 1;
    
    	if (!page_has_buffers(page))
    		return 0;
    
    	blocksize = 1 << inode->i_blkbits;
    	to = min_t(unsigned, PAGE_CACHE_SIZE - from, desc->count);
    	to = from + to;
    	if (from < blocksize && to > PAGE_CACHE_SIZE - blocksize)
    		return 0;
    
    	head = page_buffers(page);
    	bh = head;
    	block_start = 0;
    	do {
    		block_end = block_start + blocksize;
    		if (block_end > from && block_start < to) {
    			if (!buffer_uptodate(bh)) {
    				ret = 0;
    				break;
    			}
    			if (block_end >= to)
    				break;
    		}
    		block_start = block_end;
    		bh = bh->b_this_page;
    	} while (bh != head);
    
    	return ret;
    }
    EXPORT_SYMBOL(block_is_partially_uptodate);
    
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    /*
     * Generic "read page" function for block devices that have the normal
     * get_block functionality. This is most of the block device filesystems.
     * Reads the page asynchronously --- the unlock_buffer() and
     * set/clear_buffer_uptodate() functions propagate buffer state into the
     * page struct once IO has completed.
     */
    int block_read_full_page(struct page *page, get_block_t *get_block)
    {
    	struct inode *inode = page->mapping->host;
    	sector_t iblock, lblock;
    	struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE];
    	unsigned int blocksize;
    	int nr, i;
    	int fully_mapped = 1;
    
    
    	BUG_ON(!PageLocked(page));
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	blocksize = 1 << inode->i_blkbits;
    	if (!page_has_buffers(page))
    		create_empty_buffers(page, blocksize, 0);
    	head = page_buffers(page);
    
    	iblock = (sector_t)page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
    	lblock = (i_size_read(inode)+blocksize-1) >> inode->i_blkbits;
    	bh = head;
    	nr = 0;
    	i = 0;
    
    	do {
    		if (buffer_uptodate(bh))
    			continue;
    
    		if (!buffer_mapped(bh)) {
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    			fully_mapped = 0;
    			if (iblock < lblock) {
    
    				WARN_ON(bh->b_size != blocksize);
    
    				err = get_block(inode, iblock, bh, 0);
    				if (err)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    					SetPageError(page);
    			}
    			if (!buffer_mapped(bh)) {
    
    				zero_user(page, i * blocksize, blocksize);
    
    				if (!err)
    					set_buffer_uptodate(bh);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    				continue;
    			}
    			/*
    			 * get_block() might have updated the buffer
    			 * synchronously
    			 */
    			if (buffer_uptodate(bh))
    				continue;
    		}
    		arr[nr++] = bh;
    	} while (i++, iblock++, (bh = bh->b_this_page) != head);
    
    	if (fully_mapped)
    		SetPageMappedToDisk(page);
    
    	if (!nr) {
    		/*
    		 * All buffers are uptodate - we can set the page uptodate
    		 * as well. But not if get_block() returned an error.
    		 */
    		if (!PageError(page))
    			SetPageUptodate(page);
    		unlock_page(page);
    		return 0;
    	}
    
    	/* Stage two: lock the buffers */
    	for (i = 0; i < nr; i++) {
    		bh = arr[i];
    		lock_buffer(bh);
    		mark_buffer_async_read(bh);
    	}
    
    	/*
    	 * Stage 3: start the IO.  Check for uptodateness
    	 * inside the buffer lock in case another process reading
    	 * the underlying blockdev brought it uptodate (the sct fix).
    	 */
    	for (i = 0; i < nr; i++) {
    		bh = arr[i];
    		if (buffer_uptodate(bh))
    			end_buffer_async_read(bh, 1);
    		else
    			submit_bh(READ, bh);
    	}
    	return 0;
    }
    
    EXPORT_SYMBOL(block_read_full_page);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    /* utility function for filesystems that need to do work on expanding
    
    Nicholas Piggin's avatar
    Nicholas Piggin committed
     * truncates.  Uses filesystem pagecache writes to allow the filesystem to
    
    Linus Torvalds's avatar
    Linus Torvalds committed
     * deal with the hole.  
     */
    
    Nicholas Piggin's avatar
    Nicholas Piggin committed
    int generic_cont_expand_simple(struct inode *inode, loff_t size)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    {
    	struct address_space *mapping = inode->i_mapping;
    	struct page *page;
    
    Nicholas Piggin's avatar
    Nicholas Piggin committed
    	void *fsdata;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	int err;
    
    
    	err = inode_newsize_ok(inode, size);
    	if (err)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		goto out;
    
    
    Nicholas Piggin's avatar
    Nicholas Piggin committed
    	err = pagecache_write_begin(NULL, mapping, size, 0,
    				AOP_FLAG_UNINTERRUPTIBLE|AOP_FLAG_CONT_EXPAND,
    				&page, &fsdata);
    	if (err)
    
    Nicholas Piggin's avatar
    Nicholas Piggin committed
    	err = pagecache_write_end(NULL, mapping, size, 0, 0, page, fsdata);
    	BUG_ON(err > 0);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    out:
    	return err;
    }
    
    EXPORT_SYMBOL(generic_cont_expand_simple);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    static int cont_expand_zero(struct file *file, struct address_space *mapping,
    			    loff_t pos, loff_t *bytes)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    {
    	struct inode *inode = mapping->host;
    	unsigned blocksize = 1 << inode->i_blkbits;
    
    Nicholas Piggin's avatar
    Nicholas Piggin committed
    	struct page *page;
    	void *fsdata;
    	pgoff_t index, curidx;
    	loff_t curpos;
    	unsigned zerofrom, offset, len;
    	int err = 0;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    Nicholas Piggin's avatar
    Nicholas Piggin committed
    	index = pos >> PAGE_CACHE_SHIFT;
    	offset = pos & ~PAGE_CACHE_MASK;
    
    	while (index > (curidx = (curpos = *bytes)>>PAGE_CACHE_SHIFT)) {
    		zerofrom = curpos & ~PAGE_CACHE_MASK;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		if (zerofrom & (blocksize-1)) {
    			*bytes |= (blocksize-1);
    			(*bytes)++;
    		}
    
    Nicholas Piggin's avatar
    Nicholas Piggin committed
    		len = PAGE_CACHE_SIZE - zerofrom;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    Nicholas Piggin's avatar
    Nicholas Piggin committed
    		err = pagecache_write_begin(file, mapping, curpos, len,
    						AOP_FLAG_UNINTERRUPTIBLE,
    						&page, &fsdata);
    		if (err)
    			goto out;
    
    Nicholas Piggin's avatar
    Nicholas Piggin committed
    		err = pagecache_write_end(file, mapping, curpos, len, len,
    						page, fsdata);
    		if (err < 0)
    			goto out;
    		BUG_ON(err != len);
    		err = 0;
    
    
    		balance_dirty_pages_ratelimited(mapping);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    Nicholas Piggin's avatar
    Nicholas Piggin committed
    	/* page covers the boundary, find the boundary offset */
    	if (index == curidx) {
    		zerofrom = curpos & ~PAGE_CACHE_MASK;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		/* if we will expand the thing last block will be filled */
    
    Nicholas Piggin's avatar
    Nicholas Piggin committed
    		if (offset <= zerofrom) {
    			goto out;
    		}
    		if (zerofrom & (blocksize-1)) {
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    			*bytes |= (blocksize-1);
    			(*bytes)++;
    		}
    
    Nicholas Piggin's avatar
    Nicholas Piggin committed
    		len = offset - zerofrom;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    Nicholas Piggin's avatar
    Nicholas Piggin committed
    		err = pagecache_write_begin(file, mapping, curpos, len,
    						AOP_FLAG_UNINTERRUPTIBLE,
    						&page, &fsdata);
    		if (err)
    			goto out;
    
    Nicholas Piggin's avatar
    Nicholas Piggin committed
    		err = pagecache_write_end(file, mapping, curpos, len, len,
    						page, fsdata);
    		if (err < 0)
    			goto out;
    		BUG_ON(err != len);
    		err = 0;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	}
    
    Nicholas Piggin's avatar
    Nicholas Piggin committed
    out:
    	return err;
    }
    
    /*
     * For moronic filesystems that do not allow holes in file.
     * We may have to extend the file.
     */
    int cont_write_begin(struct file *file, struct address_space *mapping,
    			loff_t pos, unsigned len, unsigned flags,
    			struct page **pagep, void **fsdata,
    			get_block_t *get_block, loff_t *bytes)
    {
    	struct inode *inode = mapping->host;
    	unsigned blocksize = 1 << inode->i_blkbits;
    	unsigned zerofrom;
    	int err;
    
    	err = cont_expand_zero(file, mapping, pos, bytes);
    	if (err)
    		goto out;
    
    	zerofrom = *bytes & ~PAGE_CACHE_MASK;
    	if (pos+len > *bytes && zerofrom & (blocksize-1)) {
    		*bytes |= (blocksize-1);
    		(*bytes)++;
    
    Nicholas Piggin's avatar
    Nicholas Piggin committed
    	*pagep = NULL;
    	err = block_write_begin(file, mapping, pos, len,
    				flags, pagep, fsdata, get_block);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    out:
    
    Nicholas Piggin's avatar
    Nicholas Piggin committed
    	return err;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    }
    
    EXPORT_SYMBOL(cont_write_begin);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    int block_prepare_write(struct page *page, unsigned from, unsigned to,
    			get_block_t *get_block)
    {
    	struct inode *inode = page->mapping->host;
    	int err = __block_prepare_write(inode, page, from, to, get_block);
    	if (err)
    		ClearPageUptodate(page);
    	return err;
    }
    
    EXPORT_SYMBOL(block_prepare_write);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    int block_commit_write(struct page *page, unsigned from, unsigned to)
    {
    	struct inode *inode = page->mapping->host;
    	__block_commit_write(inode,page,from,to);
    	return 0;
    }
    
    EXPORT_SYMBOL(block_commit_write);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    /*
     * block_page_mkwrite() is not allowed to change the file size as it gets
     * called from a page fault handler when a page is first dirtied. Hence we must
     * be careful to check for EOF conditions here. We set the page up correctly
     * for a written page which means we get ENOSPC checking when writing into
     * holes and correct delalloc and unwritten extent mapping on filesystems that
     * support these features.
     *
     * We are not allowed to take the i_mutex here so we have to play games to
     * protect against truncate races as the page could now be beyond EOF.  Because
     * vmtruncate() writes the inode size before removing pages, once we have the
     * page lock we can determine safely if the page is beyond EOF. If it is not
     * beyond EOF, then the page is guaranteed safe against truncation until we
     * unlock the page.
     */
    int
    
    block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
    
    		   get_block_t get_block)
    {
    
    	struct page *page = vmf->page;
    
    	struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
    	unsigned long end;
    	loff_t size;
    
    	int ret = VM_FAULT_NOPAGE; /* make the VM retry the fault */
    
    
    	lock_page(page);
    	size = i_size_read(inode);
    	if ((page->mapping != inode->i_mapping) ||
    
    	    (page_offset(page) > size)) {
    
    		/* page got truncated out from underneath us */
    
    		unlock_page(page);
    		goto out;
    
    	}
    
    	/* page is wholly or partially inside EOF */
    	if (((page->index + 1) << PAGE_CACHE_SHIFT) > size)
    		end = size & ~PAGE_CACHE_MASK;
    	else
    		end = PAGE_CACHE_SIZE;
    
    	ret = block_prepare_write(page, 0, end, get_block);
    	if (!ret)
    		ret = block_commit_write(page, 0, end);
    
    
    		unlock_page(page);
    
    		if (ret == -ENOMEM)
    			ret = VM_FAULT_OOM;
    		else /* -ENOSPC, -EIO, etc */
    			ret = VM_FAULT_SIGBUS;
    
    	} else
    		ret = VM_FAULT_LOCKED;
    
    EXPORT_SYMBOL(block_page_mkwrite);
    
    Nicholas Piggin's avatar
    Nicholas Piggin committed
     * nobh_write_begin()'s prereads are special: the buffer_heads are freed
    
    Linus Torvalds's avatar
    Linus Torvalds committed
     * immediately, while under the page lock.  So it needs a special end_io
     * handler which does not touch the bh after unlocking it.
     */
    static void end_buffer_read_nobh(struct buffer_head *bh, int uptodate)
    {
    
    	__end_buffer_read_notouch(bh, uptodate);
    
    Nicholas Piggin's avatar
    Nicholas Piggin committed
    /*
     * Attach the singly-linked list of buffers created by nobh_write_begin, to
     * the page (converting it to circular linked list and taking care of page
     * dirty races).
     */
    static void attach_nobh_buffers(struct page *page, struct buffer_head *head)
    {
    	struct buffer_head *bh;
    
    	BUG_ON(!PageLocked(page));
    
    	spin_lock(&page->mapping->private_lock);
    	bh = head;
    	do {
    		if (PageDirty(page))
    			set_buffer_dirty(bh);
    		if (!bh->b_this_page)
    			bh->b_this_page = head;
    		bh = bh->b_this_page;
    	} while (bh != head);
    	attach_page_buffers(page, head);
    	spin_unlock(&page->mapping->private_lock);
    }
    
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    /*
     * On entry, the page is fully not uptodate.
     * On exit the page is fully uptodate in the areas outside (from,to)
     */
    
    Nicholas Piggin's avatar
    Nicholas Piggin committed
    int nobh_write_begin(struct file *file, struct address_space *mapping,
    			loff_t pos, unsigned len, unsigned flags,
    			struct page **pagep, void **fsdata,
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    			get_block_t *get_block)
    {
    
    Nicholas Piggin's avatar
    Nicholas Piggin committed
    	struct inode *inode = mapping->host;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	const unsigned blkbits = inode->i_blkbits;
    	const unsigned blocksize = 1 << blkbits;
    
    	struct buffer_head *head, *bh;
    
    Nicholas Piggin's avatar
    Nicholas Piggin committed
    	struct page *page;
    	pgoff_t index;
    	unsigned from, to;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	unsigned block_in_page;
    
    	unsigned block_start, block_end;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	sector_t block_in_file;
    	int nr_reads = 0;
    	int ret = 0;
    	int is_mapped_to_disk = 1;
    
    
    Nicholas Piggin's avatar
    Nicholas Piggin committed
    	index = pos >> PAGE_CACHE_SHIFT;
    	from = pos & (PAGE_CACHE_SIZE - 1);
    	to = from + len;
    
    
    	page = grab_cache_page_write_begin(mapping, index, flags);
    
    Nicholas Piggin's avatar
    Nicholas Piggin committed
    	if (!page)
    		return -ENOMEM;
    	*pagep = page;
    	*fsdata = NULL;
    
    	if (page_has_buffers(page)) {
    		unlock_page(page);
    		page_cache_release(page);
    		*pagep = NULL;
    		return block_write_begin(file, mapping, pos, len, flags, pagep,
    					fsdata, get_block);
    	}
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	if (PageMappedToDisk(page))
    		return 0;
    
    
    	/*
    	 * Allocate buffers so that we can keep track of state, and potentially
    	 * attach them to the page if an error occurs. In the common case of
    	 * no error, they will just be freed again without ever being attached
    	 * to the page (which is all OK, because we're under the page lock).
    	 *
    	 * Be careful: the buffer linked list is a NULL terminated one, rather
    	 * than the circular one we're used to.
    	 */
    	head = alloc_page_buffers(page, blocksize, 0);
    
    Nicholas Piggin's avatar
    Nicholas Piggin committed
    	if (!head) {
    		ret = -ENOMEM;
    		goto out_release;
    	}
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	block_in_file = (sector_t)page->index << (PAGE_CACHE_SHIFT - blkbits);
    
    	/*
    	 * We loop across all blocks in the page, whether or not they are
    	 * part of the affected region.  This is so we can discover if the
    	 * page is fully mapped-to-disk.
    	 */
    
    	for (block_start = 0, block_in_page = 0, bh = head;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		  block_start < PAGE_CACHE_SIZE;
    
    		  block_in_page++, block_start += blocksize, bh = bh->b_this_page) {
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		int create;
    
    
    		block_end = block_start + blocksize;
    		bh->b_state = 0;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		create = 1;
    		if (block_start >= to)
    			create = 0;
    		ret = get_block(inode, block_in_file + block_in_page,
    
    					bh, create);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		if (ret)
    			goto failed;
    
    		if (!buffer_mapped(bh))
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    			is_mapped_to_disk = 0;
    
    		if (buffer_new(bh))
    			unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr);
    		if (PageUptodate(page)) {
    			set_buffer_uptodate(bh);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    			continue;
    
    		}
    		if (buffer_new(bh) || !buffer_mapped(bh)) {
    
    			zero_user_segments(page, block_start, from,
    							to, block_end);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    			continue;
    		}
    
    		if (buffer_uptodate(bh))
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    			continue;	/* reiserfs does this */
    		if (block_start < from || block_end > to) {
    
    			lock_buffer(bh);
    			bh->b_end_io = end_buffer_read_nobh;
    			submit_bh(READ, bh);
    			nr_reads++;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		}
    	}
    
    	if (nr_reads) {
    		/*
    		 * The page is locked, so these buffers are protected from
    		 * any VM or truncate activity.  Hence we don't need to care
    		 * for the buffer_head refcounts.
    		 */
    
    		for (bh = head; bh; bh = bh->b_this_page) {
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    			wait_on_buffer(bh);
    			if (!buffer_uptodate(bh))
    				ret = -EIO;
    		}
    		if (ret)
    			goto failed;
    	}
    
    	if (is_mapped_to_disk)
    		SetPageMappedToDisk(page);
    
    
    Nicholas Piggin's avatar
    Nicholas Piggin committed
    	*fsdata = head; /* to be released by nobh_write_end */
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	return 0;
    
    failed:
    
    Nicholas Piggin's avatar
    Nicholas Piggin committed
    	BUG_ON(!ret);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	/*
    
    	 * Error recovery is a bit difficult. We need to zero out blocks that
    	 * were newly allocated, and dirty them to ensure they get written out.
    	 * Buffers need to be attached to the page at this point, otherwise
    	 * the handling of potential IO errors during writeout would be hard
    	 * (could try doing synchronous writeout, but what if that fails too?)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	 */
    
    Nicholas Piggin's avatar
    Nicholas Piggin committed
    	attach_nobh_buffers(page, head);
    	page_zero_new_buffers(page, from, to);
    
    Nicholas Piggin's avatar
    Nicholas Piggin committed
    out_release:
    	unlock_page(page);
    	page_cache_release(page);
    	*pagep = NULL;
    
    Nicholas Piggin's avatar
    Nicholas Piggin committed
    	if (pos + len > inode->i_size)
    		vmtruncate(inode, inode->i_size);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	return ret;
    }
    
    Nicholas Piggin's avatar
    Nicholas Piggin committed
    EXPORT_SYMBOL(nobh_write_begin);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    Nicholas Piggin's avatar
    Nicholas Piggin committed
    int nobh_write_end(struct file *file, struct address_space *mapping,
    			loff_t pos, unsigned len, unsigned copied,
    			struct page *page, void *fsdata)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    {
    	struct inode *inode = page->mapping->host;
    
    	struct buffer_head *head = fsdata;
    
    Nicholas Piggin's avatar
    Nicholas Piggin committed
    	struct buffer_head *bh;
    
    	BUG_ON(fsdata != NULL && page_has_buffers(page));
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    	if (unlikely(copied < len) && head)
    
    		attach_nobh_buffers(page, head);
    	if (page_has_buffers(page))
    		return generic_write_end(file, mapping, pos, len,
    					copied, page, fsdata);
    
    	SetPageUptodate(page);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	set_page_dirty(page);
    
    Nicholas Piggin's avatar
    Nicholas Piggin committed
    	if (pos+copied > inode->i_size) {
    		i_size_write(inode, pos+copied);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		mark_inode_dirty(inode);
    	}
    
    Nicholas Piggin's avatar
    Nicholas Piggin committed
    
    	unlock_page(page);
    	page_cache_release(page);
    
    	while (head) {
    		bh = head;
    		head = head->b_this_page;
    		free_buffer_head(bh);
    	}
    
    	return copied;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    }
    
    Nicholas Piggin's avatar
    Nicholas Piggin committed
    EXPORT_SYMBOL(nobh_write_end);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    /*
     * nobh_writepage() - based on block_full_write_page() except
     * that it tries to operate without attaching bufferheads to
     * the page.
     */
    int nobh_writepage(struct page *page, get_block_t *get_block,
    			struct writeback_control *wbc)
    {
    	struct inode * const inode = page->mapping->host;
    	loff_t i_size = i_size_read(inode);
    	const pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
    	unsigned offset;
    	int ret;
    
    	/* Is the page fully inside i_size? */
    	if (page->index < end_index)
    		goto out;
    
    	/* Is the page fully outside i_size? (truncate in progress) */
    	offset = i_size & (PAGE_CACHE_SIZE-1);
    	if (page->index >= end_index+1 || !offset) {
    		/*
    		 * The page may have dirty, unmapped buffers.  For example,
    		 * they may have been added in ext3_writepage().  Make them
    		 * freeable here, so the page does not leak.
    		 */
    #if 0
    		/* Not really sure about this  - do we need this ? */
    		if (page->mapping->a_ops->invalidatepage)
    			page->mapping->a_ops->invalidatepage(page, offset);
    #endif
    		unlock_page(page);
    		return 0; /* don't care */
    	}
    
    	/*
    	 * The page straddles i_size.  It must be zeroed out on each and every
    	 * writepage invocation because it may be mmapped.  "A file is mapped
    	 * in multiples of the page size.  For a file that is not a multiple of
    	 * the  page size, the remaining memory is zeroed when mapped, and
    	 * writes to that region are not written out to the file."
    	 */
    
    	zero_user_segment(page, offset, PAGE_CACHE_SIZE);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    out:
    	ret = mpage_writepage(page, get_block, wbc);
    	if (ret == -EAGAIN)
    
    		ret = __block_write_full_page(inode, page, get_block, wbc,
    					      end_buffer_async_write);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	return ret;
    }
    EXPORT_SYMBOL(nobh_writepage);
    
    
    Nicholas Piggin's avatar
    Nicholas Piggin committed
    int nobh_truncate_page(struct address_space *mapping,
    			loff_t from, get_block_t *get_block)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    {
    	pgoff_t index = from >> PAGE_CACHE_SHIFT;
    	unsigned offset = from & (PAGE_CACHE_SIZE-1);
    
    Nicholas Piggin's avatar
    Nicholas Piggin committed
    	unsigned blocksize;
    	sector_t iblock;
    	unsigned length, pos;
    	struct inode *inode = mapping->host;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	struct page *page;
    
    Nicholas Piggin's avatar
    Nicholas Piggin committed
    	struct buffer_head map_bh;
    	int err;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    Nicholas Piggin's avatar
    Nicholas Piggin committed
    	blocksize = 1 << inode->i_blkbits;
    	length = offset & (blocksize - 1);
    
    	/* Block boundary? Nothing to do */
    	if (!length)
    		return 0;
    
    	length = blocksize - length;
    	iblock = (sector_t)index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    	page = grab_cache_page(mapping, index);
    
    Nicholas Piggin's avatar
    Nicholas Piggin committed
    	err = -ENOMEM;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	if (!page)
    		goto out;
    
    
    Nicholas Piggin's avatar
    Nicholas Piggin committed
    	if (page_has_buffers(page)) {
    has_buffers:
    		unlock_page(page);
    		page_cache_release(page);
    		return block_truncate_page(mapping, from, get_block);
    	}
    
    	/* Find the buffer that contains "offset" */
    	pos = blocksize;
    	while (offset >= pos) {
    		iblock++;
    		pos += blocksize;
    	}
    
    
    	map_bh.b_size = blocksize;
    	map_bh.b_state = 0;
    
    Nicholas Piggin's avatar
    Nicholas Piggin committed
    	err = get_block(inode, iblock, &map_bh, 0);
    	if (err)
    		goto unlock;
    	/* unmapped? It's a hole - nothing to do */
    	if (!buffer_mapped(&map_bh))
    		goto unlock;
    
    	/* Ok, it's mapped. Make sure it's up-to-date */
    	if (!PageUptodate(page)) {
    		err = mapping->a_ops->readpage(NULL, page);
    		if (err) {
    			page_cache_release(page);
    			goto out;
    		}
    		lock_page(page);
    		if (!PageUptodate(page)) {
    			err = -EIO;
    			goto unlock;
    		}
    		if (page_has_buffers(page))
    			goto has_buffers;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	}
    
    Nicholas Piggin's avatar
    Nicholas Piggin committed
    	set_page_dirty(page);
    	err = 0;
    
    unlock:
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	unlock_page(page);
    	page_cache_release(page);
    out:
    
    Nicholas Piggin's avatar
    Nicholas Piggin committed
    	return err;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    }
    EXPORT_SYMBOL(nobh_truncate_page);
    
    int block_truncate_page(struct address_space *mapping,
    			loff_t from, get_block_t *get_block)
    {
    	pgoff_t index = from >> PAGE_CACHE_SHIFT;
    	unsigned offset = from & (PAGE_CACHE_SIZE-1);
    	unsigned blocksize;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	unsigned length, pos;
    	struct inode *inode = mapping->host;
    	struct page *page;
    	struct buffer_head *bh;
    	int err;
    
    	blocksize = 1 << inode->i_blkbits;
    	length = offset & (blocksize - 1);
    
    	/* Block boundary? Nothing to do */
    	if (!length)
    		return 0;
    
    	length = blocksize - length;
    
    	iblock = (sector_t)index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	
    	page = grab_cache_page(mapping, index);
    	err = -ENOMEM;
    	if (!page)
    		goto out;
    
    	if (!page_has_buffers(page))
    		create_empty_buffers(page, blocksize, 0);
    
    	/* Find the buffer that contains "offset" */
    	bh = page_buffers(page);
    	pos = blocksize;
    	while (offset >= pos) {
    		bh = bh->b_this_page;
    		iblock++;
    		pos += blocksize;
    	}
    
    	err = 0;
    	if (!buffer_mapped(bh)) {
    
    		WARN_ON(bh->b_size != blocksize);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		err = get_block(inode, iblock, bh, 0);
    		if (err)
    			goto unlock;
    		/* unmapped? It's a hole - nothing to do */
    		if (!buffer_mapped(bh))
    			goto unlock;
    	}
    
    	/* Ok, it's mapped. Make sure it's up-to-date */
    	if (PageUptodate(page))
    		set_buffer_uptodate(bh);
    
    
    	if (!buffer_uptodate(bh) && !buffer_delay(bh) && !buffer_unwritten(bh)) {
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		err = -EIO;
    		ll_rw_block(READ, 1, &bh);
    		wait_on_buffer(bh);
    		/* Uhhuh. Read error. Complain and punt. */
    		if (!buffer_uptodate(bh))
    			goto unlock;
    	}
    
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	mark_buffer_dirty(bh);
    	err = 0;
    
    unlock:
    	unlock_page(page);
    	page_cache_release(page);
    out:
    	return err;
    }
    
    EXPORT_SYMBOL(block_truncate_page);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    /*
     * The generic ->writepage function for buffer-backed address_spaces
    
     * this form passes in the end_io handler used to finish the IO.
    
    Linus Torvalds's avatar
    Linus Torvalds committed
     */
    
    int block_write_full_page_endio(struct page *page, get_block_t *get_block,
    			struct writeback_control *wbc, bh_end_io_t *handler)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    {
    	struct inode * const inode = page->mapping->host;
    	loff_t i_size = i_size_read(inode);
    	const pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
    	unsigned offset;
    
    	/* Is the page fully inside i_size? */
    	if (page->index < end_index)
    
    		return __block_write_full_page(inode, page, get_block, wbc,
    					       handler);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    	/* Is the page fully outside i_size? (truncate in progress) */
    	offset = i_size & (PAGE_CACHE_SIZE-1);
    	if (page->index >= end_index+1 || !offset) {
    		/*
    		 * The page may have dirty, unmapped buffers.  For example,
    		 * they may have been added in ext3_writepage().  Make them
    		 * freeable here, so the page does not leak.
    		 */
    
    		do_invalidatepage(page, 0);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		unlock_page(page);
    		return 0; /* don't care */
    	}
    
    	/*
    	 * The page straddles i_size.  It must be zeroed out on each and every
    
    	 * writepage invocation because it may be mmapped.  "A file is mapped
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	 * in multiples of the page size.  For a file that is not a multiple of
    	 * the  page size, the remaining memory is zeroed when mapped, and
    	 * writes to that region are not written out to the file."
    	 */
    
    	zero_user_segment(page, offset, PAGE_CACHE_SIZE);
    
    	return __block_write_full_page(inode, page, get_block, wbc, handler);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    }
    
    EXPORT_SYMBOL(block_write_full_page_endio);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    /*
     * The generic ->writepage function for buffer-backed address_spaces
     */
    int block_write_full_page(struct page *page, get_block_t *get_block,
    			struct writeback_control *wbc)
    {
    	return block_write_full_page_endio(page, get_block, wbc,
    					   end_buffer_async_write);
    }
    
    EXPORT_SYMBOL(block_write_full_page);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    sector_t generic_block_bmap(struct address_space *mapping, sector_t block,
    			    get_block_t *get_block)
    {
    	struct buffer_head tmp;
    	struct inode *inode = mapping->host;
    	tmp.b_state = 0;
    	tmp.b_blocknr = 0;
    
    	tmp.b_size = 1 << inode->i_blkbits;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	get_block(inode, block, &tmp, 0);
    	return tmp.b_blocknr;
    }
    
    EXPORT_SYMBOL(generic_block_bmap);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    static void end_bio_bh_io_sync(struct bio *bio, int err)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    {
    	struct buffer_head *bh = bio->bi_private;
    
    	if (err == -EOPNOTSUPP) {
    		set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
    		set_bit(BH_Eopnotsupp, &bh->b_state);
    	}
    
    
    	if (unlikely (test_bit(BIO_QUIET,&bio->bi_flags)))
    		set_bit(BH_Quiet, &bh->b_state);
    
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	bh->b_end_io(bh, test_bit(BIO_UPTODATE, &bio->bi_flags));
    	bio_put(bio);
    }
    
    int submit_bh(int rw, struct buffer_head * bh)
    {
    	struct bio *bio;
    	int ret = 0;
    
    	BUG_ON(!buffer_locked(bh));
    	BUG_ON(!buffer_mapped(bh));
    	BUG_ON(!bh->b_end_io);
    
    	BUG_ON(buffer_delay(bh));
    	BUG_ON(buffer_unwritten(bh));
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    	/*
    	 * Mask in barrier bit for a write (could be either a WRITE or a
    	 * WRITE_SYNC
    	 */
    	if (buffer_ordered(bh) && (rw & WRITE))
    		rw |= WRITE_BARRIER;
    
    	 * Only clear out a write error when rewriting
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	 */
    
    	if (test_set_buffer_req(bh) && (rw & WRITE))
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		clear_buffer_write_io_error(bh);
    
    	/*
    	 * from here on down, it's all bio -- do the initial mapping,
    	 * submit_bio -> generic_make_request may further map this bio around
    	 */
    	bio = bio_alloc(GFP_NOIO, 1);
    
    	bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9);
    	bio->bi_bdev = bh->b_bdev;
    	bio->bi_io_vec[0].bv_page = bh->b_page;
    	bio->bi_io_vec[0].bv_len = bh->b_size;
    	bio->bi_io_vec[0].bv_offset = bh_offset(bh);
    
    	bio->bi_vcnt = 1;
    	bio->bi_idx = 0;
    	bio->bi_size = bh->b_size;
    
    	bio->bi_end_io = end_bio_bh_io_sync;
    	bio->bi_private = bh;
    
    	bio_get(bio);
    	submit_bio(rw, bio);
    
    	if (bio_flagged(bio, BIO_EOPNOTSUPP))
    		ret = -EOPNOTSUPP;
    
    	bio_put(bio);
    	return ret;
    }
    
    EXPORT_SYMBOL(submit_bh);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    /**