Newer
Older
/*
* Copyright (C) 2007 Oracle. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License v2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this program; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 021110-1307, USA.
*/
#include <linux/radix-tree.h>
#include <linux/workqueue.h>
#include <linux/kthread.h>
#include <linux/crc32c.h>
#include <linux/slab.h>
#include <linux/ratelimit.h>
#include <asm/unaligned.h>
#include "ctree.h"
#include "disk-io.h"
#include "btrfs_inode.h"
#include "volumes.h"
#include "async-thread.h"
#include "tree-log.h"
static struct extent_io_ops btree_extent_io_ops;
static void end_workqueue_fn(struct btrfs_work *work);
static void free_fs_root(struct btrfs_root *root);
static void btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
int read_only);
static int btrfs_destroy_ordered_operations(struct btrfs_root *root);
static int btrfs_destroy_ordered_extents(struct btrfs_root *root);
static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
struct btrfs_root *root);
static int btrfs_destroy_pending_snapshots(struct btrfs_transaction *t);
static int btrfs_destroy_delalloc_inodes(struct btrfs_root *root);
static int btrfs_destroy_marked_extents(struct btrfs_root *root,
struct extent_io_tree *dirty_pages,
int mark);
static int btrfs_destroy_pinned_extent(struct btrfs_root *root,
struct extent_io_tree *pinned_extents);
static int btrfs_cleanup_transaction(struct btrfs_root *root);
/*
* end_io_wq structs are used to do processing in task context when an IO is
* complete. This is used during reads to verify checksums, and it is used
* by writes to insert metadata for new file extents after IO is complete.
*/
struct end_io_wq {
struct bio *bio;
bio_end_io_t *end_io;
void *private;
struct btrfs_fs_info *info;
int error;
struct list_head list;
struct btrfs_work work;
/*
* async submit bios are used to offload expensive checksumming
* onto the worker threads. They checksum file and metadata bios
* just before they are sent down the IO stack.
*/
struct async_submit_bio {
struct inode *inode;
struct bio *bio;
struct list_head list;
extent_submit_bio_hook_t *submit_bio_start;
extent_submit_bio_hook_t *submit_bio_done;
/*
* bio_offset is optional, can be used if the pages in the bio
* can't tell us where in the file the bio should go
*/
u64 bio_offset;
struct btrfs_work work;
/*
* Lockdep class keys for extent_buffer->lock's in this root. For a given
* eb, the lockdep key is determined by the btrfs_root it belongs to and
* the level the eb occupies in the tree.
*
* Different roots are used for different purposes and may nest inside each
* other and they require separate keysets. As lockdep keys should be
* static, assign keysets according to the purpose of the root as indicated
* by btrfs_root->objectid. This ensures that all special purpose roots
* have separate keysets.
* Lock-nesting across peer nodes is always done with the immediate parent
* node locked thus preventing deadlock. As lockdep doesn't know this, use
* subclass to avoid triggering lockdep warning in such cases.
* The key is set by the readpage_end_io_hook after the buffer has passed
* csum validation but before the pages are unlocked. It is also set by
* btrfs_init_new_buffer on freshly allocated blocks.
* We also add a check to make sure the highest level of the tree is the
* same as our lockdep setup here. If BTRFS_MAX_LEVEL changes, this code
* needs update as well.
*/
#ifdef CONFIG_DEBUG_LOCK_ALLOC
# if BTRFS_MAX_LEVEL != 8
# error
# endif
static struct btrfs_lockdep_keyset {
u64 id; /* root objectid */
const char *name_stem; /* lock name stem */
char names[BTRFS_MAX_LEVEL + 1][20];
struct lock_class_key keys[BTRFS_MAX_LEVEL + 1];
} btrfs_lockdep_keysets[] = {
{ .id = BTRFS_ROOT_TREE_OBJECTID, .name_stem = "root" },
{ .id = BTRFS_EXTENT_TREE_OBJECTID, .name_stem = "extent" },
{ .id = BTRFS_CHUNK_TREE_OBJECTID, .name_stem = "chunk" },
{ .id = BTRFS_DEV_TREE_OBJECTID, .name_stem = "dev" },
{ .id = BTRFS_FS_TREE_OBJECTID, .name_stem = "fs" },
{ .id = BTRFS_CSUM_TREE_OBJECTID, .name_stem = "csum" },
{ .id = BTRFS_ORPHAN_OBJECTID, .name_stem = "orphan" },
{ .id = BTRFS_TREE_LOG_OBJECTID, .name_stem = "log" },
{ .id = BTRFS_TREE_RELOC_OBJECTID, .name_stem = "treloc" },
{ .id = BTRFS_DATA_RELOC_TREE_OBJECTID, .name_stem = "dreloc" },
{ .id = 0, .name_stem = "tree" },
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
void __init btrfs_init_lockdep(void)
{
int i, j;
/* initialize lockdep class names */
for (i = 0; i < ARRAY_SIZE(btrfs_lockdep_keysets); i++) {
struct btrfs_lockdep_keyset *ks = &btrfs_lockdep_keysets[i];
for (j = 0; j < ARRAY_SIZE(ks->names); j++)
snprintf(ks->names[j], sizeof(ks->names[j]),
"btrfs-%s-%02d", ks->name_stem, j);
}
}
void btrfs_set_buffer_lockdep_class(u64 objectid, struct extent_buffer *eb,
int level)
{
struct btrfs_lockdep_keyset *ks;
BUG_ON(level >= ARRAY_SIZE(ks->keys));
/* find the matching keyset, id 0 is the default entry */
for (ks = btrfs_lockdep_keysets; ks->id; ks++)
if (ks->id == objectid)
break;
lockdep_set_class_and_name(&eb->lock,
&ks->keys[level], ks->names[level]);
}
/*
* extents on the btree inode are pretty simple, there's one extent
* that covers the entire device
*/
static struct extent_map *btree_get_extent(struct inode *inode,
struct page *page, size_t pg_offset, u64 start, u64 len,
struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
struct extent_map *em;
int ret;
em = lookup_extent_mapping(em_tree, start, len);
if (em) {
em->bdev =
BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev;
if (!em) {
em = ERR_PTR(-ENOMEM);
goto out;
}
em->start = 0;
em->block_start = 0;
em->bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev;
ret = add_extent_mapping(em_tree, em);
if (ret == -EEXIST) {
u64 failed_start = em->start;
u64 failed_len = em->len;
free_extent_map(em);
em = lookup_extent_mapping(em_tree, start, len);
ret = 0;
} else {
em = lookup_extent_mapping(em_tree, failed_start,
failed_len);
ret = -EIO;
} else if (ret) {
free_extent_map(em);
em = NULL;
if (ret)
em = ERR_PTR(ret);
out:
return em;
u32 btrfs_csum_data(struct btrfs_root *root, char *data, u32 seed, size_t len)
{
return crc32c(seed, data, len);
}
void btrfs_csum_final(u32 crc, char *result)
{
put_unaligned_le32(~crc, result);
/*
* compute the csum for a btree block, and either verify it or write it
* into the csum field of the block.
*/
static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf,
int verify)
{
u16 csum_size =
btrfs_super_csum_size(&root->fs_info->super_copy);
char *result = NULL;
unsigned long len;
unsigned long cur_len;
unsigned long offset = BTRFS_CSUM_SIZE;
char *kaddr;
unsigned long map_start;
unsigned long map_len;
int err;
u32 crc = ~(u32)0;
unsigned long inline_result;
err = map_private_extent_buffer(buf, offset, 32,
&kaddr, &map_start, &map_len);
return 1;
cur_len = min(len, map_len - (offset - map_start));
crc = btrfs_csum_data(root, kaddr + offset - map_start,
crc, cur_len);
len -= cur_len;
offset += cur_len;
}
if (csum_size > sizeof(inline_result)) {
result = kzalloc(csum_size * sizeof(char), GFP_NOFS);
if (!result)
return 1;
} else {
result = (char *)&inline_result;
}
btrfs_csum_final(crc, result);
if (verify) {
if (memcmp_extent_buffer(buf, result, 0, csum_size)) {
u32 val;
u32 found = 0;
memcpy(&found, result, csum_size);
read_extent_buffer(buf, &val, 0, csum_size);
printk_ratelimited(KERN_INFO "btrfs: %s checksum verify "
"failed on %llu wanted %X found %X "
"level %d\n",
root->fs_info->sb->s_id,
(unsigned long long)buf->start, val, found,
btrfs_header_level(buf));
if (result != (char *)&inline_result)
kfree(result);
write_extent_buffer(buf, result, 0, csum_size);
if (result != (char *)&inline_result)
kfree(result);
/*
* we can't consider a given block up to date unless the transid of the
* block matches the transid in the parent node's pointer. This is how we
* detect blocks that either didn't get written at all or got written
* in the wrong place.
*/
static int verify_parent_transid(struct extent_io_tree *io_tree,
struct extent_buffer *eb, u64 parent_transid)
{
struct extent_state *cached_state = NULL;
int ret;
if (!parent_transid || btrfs_header_generation(eb) == parent_transid)
return 0;
lock_extent_bits(io_tree, eb->start, eb->start + eb->len - 1,
0, &cached_state, GFP_NOFS);
if (extent_buffer_uptodate(io_tree, eb, cached_state) &&
btrfs_header_generation(eb) == parent_transid) {
ret = 0;
goto out;
}
printk_ratelimited("parent transid verify failed on %llu wanted %llu "
"found %llu\n",
(unsigned long long)eb->start,
(unsigned long long)parent_transid,
(unsigned long long)btrfs_header_generation(eb));
clear_extent_buffer_uptodate(io_tree, eb, &cached_state);
unlock_extent_cached(io_tree, eb->start, eb->start + eb->len - 1,
&cached_state, GFP_NOFS);
/*
* helper to read a given tree block, doing retries as required when
* the checksums don't match and we have alternate mirrors to try.
*/
static int btree_read_extent_buffer_pages(struct btrfs_root *root,
struct extent_buffer *eb,
u64 start, u64 parent_transid)
{
struct extent_io_tree *io_tree;
int ret;
int num_copies = 0;
int mirror_num = 0;
clear_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags);
io_tree = &BTRFS_I(root->fs_info->btree_inode)->io_tree;
while (1) {
ret = read_extent_buffer_pages(io_tree, eb, start,
WAIT_COMPLETE,
btree_get_extent, mirror_num);
if (!ret &&
!verify_parent_transid(io_tree, eb, parent_transid))
/*
* This buffer's crc is fine, but its contents are corrupted, so
* there is no reason to read the other copies, they won't be
* any less wrong.
*/
if (test_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags))
return ret;
num_copies = btrfs_num_copies(&root->fs_info->mapping_tree,
eb->start, eb->len);
mirror_num++;
return ret;
}
return -EIO;
}
* checksum a dirty tree block before IO. This has extra checks to make sure
* we only fill in the checksum field in the first page of a multi-page block
static int csum_dirty_buffer(struct btrfs_root *root, struct page *page)
struct extent_io_tree *tree;
u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
u64 found_start;
unsigned long len;
struct extent_buffer *eb;
tree = &BTRFS_I(page->mapping->host)->io_tree;
if (page->private == EXTENT_PAGE_PRIVATE) {
WARN_ON(1);
}
if (!page->private) {
WARN_ON(1);
eb = alloc_extent_buffer(tree, start, len, page);
if (eb == NULL) {
WARN_ON(1);
goto out;
}
ret = btree_read_extent_buffer_pages(root, eb, start + PAGE_CACHE_SIZE,
btrfs_header_generation(eb));
WARN_ON(!btrfs_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN));
found_start = btrfs_header_bytenr(eb);
if (found_start != start) {
WARN_ON(1);
goto err;
}
if (eb->first_page != page) {
WARN_ON(1);
goto err;
}
if (!PageUptodate(page)) {
WARN_ON(1);
goto err;
free_extent_buffer(eb);
out:
return 0;
}
static int check_tree_block_fsid(struct btrfs_root *root,
struct extent_buffer *eb)
{
struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices;
u8 fsid[BTRFS_UUID_SIZE];
int ret = 1;
read_extent_buffer(eb, fsid, (unsigned long)btrfs_header_fsid(eb),
BTRFS_FSID_SIZE);
while (fs_devices) {
if (!memcmp(fsid, fs_devices->fsid, BTRFS_FSID_SIZE)) {
ret = 0;
break;
}
fs_devices = fs_devices->seed;
}
return ret;
}
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
#define CORRUPT(reason, eb, root, slot) \
printk(KERN_CRIT "btrfs: corrupt leaf, %s: block=%llu," \
"root=%llu, slot=%d\n", reason, \
(unsigned long long)btrfs_header_bytenr(eb), \
(unsigned long long)root->objectid, slot)
static noinline int check_leaf(struct btrfs_root *root,
struct extent_buffer *leaf)
{
struct btrfs_key key;
struct btrfs_key leaf_key;
u32 nritems = btrfs_header_nritems(leaf);
int slot;
if (nritems == 0)
return 0;
/* Check the 0 item */
if (btrfs_item_offset_nr(leaf, 0) + btrfs_item_size_nr(leaf, 0) !=
BTRFS_LEAF_DATA_SIZE(root)) {
CORRUPT("invalid item offset size pair", leaf, root, 0);
return -EIO;
}
/*
* Check to make sure each items keys are in the correct order and their
* offsets make sense. We only have to loop through nritems-1 because
* we check the current slot against the next slot, which verifies the
* next slot's offset+size makes sense and that the current's slot
* offset is correct.
*/
for (slot = 0; slot < nritems - 1; slot++) {
btrfs_item_key_to_cpu(leaf, &leaf_key, slot);
btrfs_item_key_to_cpu(leaf, &key, slot + 1);
/* Make sure the keys are in the right order */
if (btrfs_comp_cpu_keys(&leaf_key, &key) >= 0) {
CORRUPT("bad key order", leaf, root, slot);
return -EIO;
}
/*
* Make sure the offset and ends are right, remember that the
* item data starts at the end of the leaf and grows towards the
* front.
*/
if (btrfs_item_offset_nr(leaf, slot) !=
btrfs_item_end_nr(leaf, slot + 1)) {
CORRUPT("slot offset bad", leaf, root, slot);
return -EIO;
}
/*
* Check to make sure that we don't point outside of the leaf,
* just incase all the items are consistent to eachother, but
* all point outside of the leaf.
*/
if (btrfs_item_end_nr(leaf, slot) >
BTRFS_LEAF_DATA_SIZE(root)) {
CORRUPT("slot end outside of leaf", leaf, root, slot);
return -EIO;
}
}
return 0;
}
static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
struct extent_state *state)
{
struct extent_io_tree *tree;
u64 found_start;
int found_level;
unsigned long len;
struct extent_buffer *eb;
struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
tree = &BTRFS_I(page->mapping->host)->io_tree;
if (page->private == EXTENT_PAGE_PRIVATE)
goto out;
if (!page->private)
goto out;
len = page->private >> 2;
eb = alloc_extent_buffer(tree, start, len, page);
if (eb == NULL) {
ret = -EIO;
goto out;
}
found_start = btrfs_header_bytenr(eb);
printk_ratelimited(KERN_INFO "btrfs bad tree block start "
"%llu %llu\n",
(unsigned long long)found_start,
(unsigned long long)eb->start);
goto err;
}
if (eb->first_page != page) {
printk(KERN_INFO "btrfs bad first page %lu %lu\n",
eb->first_page->index, page->index);
printk_ratelimited(KERN_INFO "btrfs bad fsid on block %llu\n",
ret = -EIO;
goto err;
}
found_level = btrfs_header_level(eb);
btrfs_set_buffer_lockdep_class(btrfs_header_owner(eb),
eb, found_level);
ret = csum_tree_block(root, eb, 1);
goto err;
}
/*
* If this is a leaf block and it is corrupt, set the corrupt bit so
* that we don't try and read the other copies of this block, just
* return -EIO.
*/
if (found_level == 0 && check_leaf(root, eb)) {
set_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags);
ret = -EIO;
}
end = min_t(u64, eb->len, PAGE_CACHE_SIZE);
end = eb->start + end - 1;
err:
if (test_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags)) {
clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags);
btree_readahead_hook(root, eb, eb->start, ret);
}
free_extent_buffer(eb);
out:
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
static int btree_io_failed_hook(struct bio *failed_bio,
struct page *page, u64 start, u64 end,
struct extent_state *state)
{
struct extent_io_tree *tree;
unsigned long len;
struct extent_buffer *eb;
struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
tree = &BTRFS_I(page->mapping->host)->io_tree;
if (page->private == EXTENT_PAGE_PRIVATE)
goto out;
if (!page->private)
goto out;
len = page->private >> 2;
WARN_ON(len == 0);
eb = alloc_extent_buffer(tree, start, len, page);
if (eb == NULL)
goto out;
if (test_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags)) {
clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags);
btree_readahead_hook(root, eb, eb->start, -EIO);
}
out:
return -EIO; /* we fixed nothing */
}
static void end_workqueue_bio(struct bio *bio, int err)
{
struct end_io_wq *end_io_wq = bio->bi_private;
struct btrfs_fs_info *fs_info;
fs_info = end_io_wq->info;
end_io_wq->error = err;
end_io_wq->work.func = end_workqueue_fn;
end_io_wq->work.flags = 0;
if (bio->bi_rw & REQ_WRITE) {
btrfs_queue_worker(&fs_info->endio_meta_write_workers,
&end_io_wq->work);
else if (end_io_wq->metadata == 2)
btrfs_queue_worker(&fs_info->endio_freespace_worker,
&end_io_wq->work);
else
btrfs_queue_worker(&fs_info->endio_write_workers,
&end_io_wq->work);
} else {
if (end_io_wq->metadata)
btrfs_queue_worker(&fs_info->endio_meta_workers,
&end_io_wq->work);
else
btrfs_queue_worker(&fs_info->endio_workers,
&end_io_wq->work);
}
/*
* For the metadata arg you want
*
* 0 - if data
* 1 - if normal metadta
* 2 - if writing to the free space cache area
*/
int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
int metadata)
struct end_io_wq *end_io_wq;
end_io_wq = kmalloc(sizeof(*end_io_wq), GFP_NOFS);
if (!end_io_wq)
return -ENOMEM;
end_io_wq->private = bio->bi_private;
end_io_wq->end_io = bio->bi_end_io;
end_io_wq->info = info;
end_io_wq->error = 0;
end_io_wq->bio = bio;
end_io_wq->metadata = metadata;
bio->bi_private = end_io_wq;
bio->bi_end_io = end_workqueue_bio;
return 0;
}
unsigned long btrfs_async_submit_limit(struct btrfs_fs_info *info)
unsigned long limit = min_t(unsigned long,
info->workers.max_workers,
info->fs_devices->open_devices);
return 256 * limit;
}
static void run_one_async_start(struct btrfs_work *work)
{
struct async_submit_bio *async;
async = container_of(work, struct async_submit_bio, work);
async->submit_bio_start(async->inode, async->rw, async->bio,
async->mirror_num, async->bio_flags,
async->bio_offset);
}
static void run_one_async_done(struct btrfs_work *work)
{
struct btrfs_fs_info *fs_info;
struct async_submit_bio *async;
int limit;
async = container_of(work, struct async_submit_bio, work);
fs_info = BTRFS_I(async->inode)->root->fs_info;
limit = btrfs_async_submit_limit(fs_info);
limit = limit * 2 / 3;
atomic_dec(&fs_info->nr_async_submits);
if (atomic_read(&fs_info->nr_async_submits) < limit &&
waitqueue_active(&fs_info->async_submit_wait))
wake_up(&fs_info->async_submit_wait);
async->submit_bio_done(async->inode, async->rw, async->bio,
async->mirror_num, async->bio_flags,
async->bio_offset);
}
static void run_one_async_free(struct btrfs_work *work)
{
struct async_submit_bio *async;
async = container_of(work, struct async_submit_bio, work);
kfree(async);
}
int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
int rw, struct bio *bio, int mirror_num,
extent_submit_bio_hook_t *submit_bio_start,
extent_submit_bio_hook_t *submit_bio_done)
{
struct async_submit_bio *async;
async = kmalloc(sizeof(*async), GFP_NOFS);
if (!async)
return -ENOMEM;
async->inode = inode;
async->rw = rw;
async->bio = bio;
async->mirror_num = mirror_num;
async->submit_bio_start = submit_bio_start;
async->submit_bio_done = submit_bio_done;
async->work.func = run_one_async_start;
async->work.ordered_func = run_one_async_done;
async->work.ordered_free = run_one_async_free;
async->work.flags = 0;
async->bio_offset = bio_offset;

Chris Mason
committed
atomic_inc(&fs_info->nr_async_submits);
if (rw & REQ_SYNC)
btrfs_set_work_high_prio(&async->work);
btrfs_queue_worker(&fs_info->workers, &async->work);
while (atomic_read(&fs_info->async_submit_draining) &&
atomic_read(&fs_info->nr_async_submits)) {
wait_event(fs_info->async_submit_wait,
(atomic_read(&fs_info->nr_async_submits) == 0));
}
static int btree_csum_one_bio(struct bio *bio)
{
struct bio_vec *bvec = bio->bi_io_vec;
int bio_index = 0;
struct btrfs_root *root;
WARN_ON(bio->bi_vcnt <= 0);
root = BTRFS_I(bvec->bv_page->mapping->host)->root;
csum_dirty_buffer(root, bvec->bv_page);
bio_index++;
bvec++;
}
return 0;
}
static int __btree_submit_bio_start(struct inode *inode, int rw,
struct bio *bio, int mirror_num,
unsigned long bio_flags,
u64 bio_offset)
/*
* when we're called for a write, we're already in the async
* submission context. Just jump into btrfs_map_bio
btree_csum_one_bio(bio);
return 0;
}
static int __btree_submit_bio_done(struct inode *inode, int rw, struct bio *bio,
int mirror_num, unsigned long bio_flags,
u64 bio_offset)
* when we're called for a write, we're already in the async
* submission context. Just jump into btrfs_map_bio
*/
return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, mirror_num, 1);
static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
int mirror_num, unsigned long bio_flags,
u64 bio_offset)
int ret;
ret = btrfs_bio_wq_end_io(BTRFS_I(inode)->root->fs_info,
bio, 1);
BUG_ON(ret);
if (!(rw & REQ_WRITE)) {
/*
* called for a read, do the setup so that checksum validation
* can happen in the async kernel threads
*/
return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio,
/*
* kthread helpers are used to submit writes so that checksumming
* can happen in parallel across all CPUs
*/
return btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info,
__btree_submit_bio_start,
__btree_submit_bio_done);
static int btree_migratepage(struct address_space *mapping,
struct page *newpage, struct page *page)
{
/*
* we can't safely write a btree page from here,
* we haven't done the locking hook
*/
if (PageDirty(page))
return -EAGAIN;
/*
* Buffers may be managed in a filesystem specific way.
* We must have no buffers or drop them.
*/
if (page_has_private(page) &&
!try_to_release_page(page, GFP_KERNEL))
return -EAGAIN;
return migrate_page(mapping, newpage, page);
}
static int btree_writepage(struct page *page, struct writeback_control *wbc)
{
struct extent_io_tree *tree;
struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
struct extent_buffer *eb;
int was_dirty;
tree = &BTRFS_I(page->mapping->host)->io_tree;
if (!(current->flags & PF_MEMALLOC)) {
return extent_write_full_page(tree, page,
btree_get_extent, wbc);
}
redirty_page_for_writepage(wbc, page);
eb = btrfs_find_tree_block(root, page_offset(page), PAGE_CACHE_SIZE);
WARN_ON(!eb);
was_dirty = test_and_set_bit(EXTENT_BUFFER_DIRTY, &eb->bflags);
if (!was_dirty) {
spin_lock(&root->fs_info->delalloc_lock);
root->fs_info->dirty_metadata_bytes += PAGE_CACHE_SIZE;
spin_unlock(&root->fs_info->delalloc_lock);
free_extent_buffer(eb);
unlock_page(page);
return 0;
static int btree_writepages(struct address_space *mapping,
struct writeback_control *wbc)
{
struct extent_io_tree *tree;
tree = &BTRFS_I(mapping->host)->io_tree;

Chris Mason
committed
if (wbc->sync_mode == WB_SYNC_NONE) {
struct btrfs_root *root = BTRFS_I(mapping->host)->root;
unsigned long thresh = 32 * 1024 * 1024;
if (wbc->for_kupdate)
return 0;
/* this is a bit racy, but that's ok */
num_dirty = root->fs_info->dirty_metadata_bytes;
return extent_writepages(tree, mapping, btree_get_extent, wbc);
}
static int btree_readpage(struct file *file, struct page *page)
struct extent_io_tree *tree;
tree = &BTRFS_I(page->mapping->host)->io_tree;
return extent_read_full_page(tree, page, btree_get_extent);
}
static int btree_releasepage(struct page *page, gfp_t gfp_flags)
struct extent_io_tree *tree;
struct extent_map_tree *map;
if (PageWriteback(page) || PageDirty(page))
tree = &BTRFS_I(page->mapping->host)->io_tree;
map = &BTRFS_I(page->mapping->host)->extent_tree;
ret = try_release_extent_state(map, tree, page, gfp_flags);
return 0;
ret = try_release_extent_buffer(tree, page);
if (ret == 1) {
ClearPagePrivate(page);
set_page_private(page, 0);
page_cache_release(page);
}
static void btree_invalidatepage(struct page *page, unsigned long offset)
struct extent_io_tree *tree;
tree = &BTRFS_I(page->mapping->host)->io_tree;
extent_invalidatepage(tree, page, offset);
btree_releasepage(page, GFP_NOFS);
if (PagePrivate(page)) {
printk(KERN_WARNING "btrfs warning page private not zero "
"on page %llu\n", (unsigned long long)page_offset(page));
ClearPagePrivate(page);
set_page_private(page, 0);
page_cache_release(page);
}
static const struct address_space_operations btree_aops = {
.readpage = btree_readpage,
.writepage = btree_writepage,
.writepages = btree_writepages,
.releasepage = btree_releasepage,
.invalidatepage = btree_invalidatepage,
#ifdef CONFIG_MIGRATION
.migratepage = btree_migratepage,