Newer
Older
btrfs_release_path(path);
goto out;
}
if (IS_ERR(inode)) {
BUG_ON(retries);
retries++;
if (block_group->ro)
goto out_free;
ret = create_free_space_inode(root, trans, block_group, path);
if (ret)
goto out_free;
goto again;
}
/* We've already setup this transaction, go ahead and exit */
if (block_group->cache_generation == trans->transid &&
i_size_read(inode)) {
dcs = BTRFS_DC_SETUP;
goto out_put;
}
/*
* We want to set the generation to 0, that way if anything goes wrong
* from here on out we know not to trust this cache when we load up next
* time.
*/
BTRFS_I(inode)->generation = 0;
ret = btrfs_update_inode(trans, root, inode);
WARN_ON(ret);
if (i_size_read(inode) > 0) {
ret = btrfs_truncate_free_space_cache(root, trans, path,
inode);
if (ret)
goto out_put;
}
spin_lock(&block_group->lock);
if (block_group->cached != BTRFS_CACHE_FINISHED ||
!btrfs_test_opt(root, SPACE_CACHE)) {
/*
* don't bother trying to write stuff out _if_
* a) we're not cached,
* b) we're with nospace_cache mount option.
*/
spin_unlock(&block_group->lock);
goto out_put;
}
spin_unlock(&block_group->lock);
/*
* Try to preallocate enough space based on how big the block group is.
* Keep in mind this has to include any pinned space which could end up
* taking up quite a bit since it's not folded into the other space
* cache.
*/
num_pages = (int)div64_u64(block_group->key.offset, 256 * 1024 * 1024);
if (!num_pages)
num_pages = 1;
num_pages *= 16;
num_pages *= PAGE_CACHE_SIZE;
ret = btrfs_check_data_free_space(inode, num_pages);
if (ret)
goto out_put;
ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, num_pages,
num_pages, num_pages,
&alloc_hint);
if (!ret)
dcs = BTRFS_DC_SETUP;
btrfs_free_reserved_data_space(inode, num_pages);
out_put:
iput(inode);
out_free:
btrfs_release_path(path);
out:
spin_lock(&block_group->lock);
if (!ret && dcs == BTRFS_DC_SETUP)
block_group->cache_generation = trans->transid;
block_group->disk_cache_state = dcs;
spin_unlock(&block_group->lock);
return ret;
}
int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
struct btrfs_root *root)
struct btrfs_block_group_cache *cache;
int err = 0;
struct btrfs_path *path;
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
again:
while (1) {
cache = btrfs_lookup_first_block_group(root->fs_info, last);
while (cache) {
if (cache->disk_cache_state == BTRFS_DC_CLEAR)
break;
cache = next_block_group(root, cache);
}
if (!cache) {
if (last == 0)
break;
last = 0;
continue;
}
err = cache_save_setup(cache, trans, path);
last = cache->key.objectid + cache->key.offset;
btrfs_put_block_group(cache);
}
if (last == 0) {
err = btrfs_run_delayed_refs(trans, root,
(unsigned long)-1);
if (err) /* File system offline */
goto out;
cache = btrfs_lookup_first_block_group(root->fs_info, last);
while (cache) {
if (cache->disk_cache_state == BTRFS_DC_CLEAR) {
btrfs_put_block_group(cache);
goto again;
}
if (cache->dirty)
break;
cache = next_block_group(root, cache);
}
if (!cache) {
if (last == 0)
break;
last = 0;
continue;
}
if (cache->disk_cache_state == BTRFS_DC_SETUP)
cache->disk_cache_state = BTRFS_DC_NEED_WRITE;
last = cache->key.objectid + cache->key.offset;
err = write_one_cache_group(trans, root, path, cache);
if (err) /* File system offline */
goto out;
btrfs_put_block_group(cache);
while (1) {
/*
* I don't think this is needed since we're just marking our
* preallocated extent as written, but just in case it can't
* hurt.
*/
if (last == 0) {
err = btrfs_run_delayed_refs(trans, root,
(unsigned long)-1);
if (err) /* File system offline */
goto out;
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
}
cache = btrfs_lookup_first_block_group(root->fs_info, last);
while (cache) {
/*
* Really this shouldn't happen, but it could if we
* couldn't write the entire preallocated extent and
* splitting the extent resulted in a new block.
*/
if (cache->dirty) {
btrfs_put_block_group(cache);
goto again;
}
if (cache->disk_cache_state == BTRFS_DC_NEED_WRITE)
break;
cache = next_block_group(root, cache);
}
if (!cache) {
if (last == 0)
break;
last = 0;
continue;
}
err = btrfs_write_out_cache(root, trans, cache, path);
/*
* If we didn't have an error then the cache state is still
* NEED_WRITE, so we can set it to WRITTEN.
*/
if (!err && cache->disk_cache_state == BTRFS_DC_NEED_WRITE)
cache->disk_cache_state = BTRFS_DC_WRITTEN;
last = cache->key.objectid + cache->key.offset;
btrfs_put_block_group(cache);
}
int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr)
{
struct btrfs_block_group_cache *block_group;
int readonly = 0;
block_group = btrfs_lookup_block_group(root->fs_info, bytenr);
if (!block_group || block_group->ro)
readonly = 1;
if (block_group)
static int update_space_info(struct btrfs_fs_info *info, u64 flags,
u64 total_bytes, u64 bytes_used,
struct btrfs_space_info **space_info)
{
struct btrfs_space_info *found;
int i;
int factor;
if (flags & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1 |
BTRFS_BLOCK_GROUP_RAID10))
factor = 2;
else
factor = 1;
found = __find_space_info(info, flags);
if (found) {
found->total_bytes += total_bytes;
found->bytes_used += bytes_used;
found->disk_used += bytes_used * factor;
found->full = 0;
*space_info = found;
return 0;
}
found = kzalloc(sizeof(*found), GFP_NOFS);
if (!found)
return -ENOMEM;
for (i = 0; i < BTRFS_NR_RAID_TYPES; i++)
INIT_LIST_HEAD(&found->block_groups[i]);
init_rwsem(&found->groups_sem);
found->flags = flags & BTRFS_BLOCK_GROUP_TYPE_MASK;
found->total_bytes = total_bytes;
found->bytes_used = bytes_used;
found->disk_used = bytes_used * factor;
found->bytes_pinned = 0;
found->bytes_reserved = 0;
found->bytes_may_use = 0;
found->full = 0;
found->force_alloc = CHUNK_ALLOC_NO_FORCE;
found->chunk_alloc = 0;
found->flush = 0;
init_waitqueue_head(&found->wait);
*space_info = found;
list_add_rcu(&found->list, &info->space_info);
if (flags & BTRFS_BLOCK_GROUP_DATA)
info->data_sinfo = found;
return 0;
}
static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
{
u64 extra_flags = chunk_to_extended(flags) &
BTRFS_EXTENDED_PROFILE_MASK;
write_seqlock(&fs_info->profiles_lock);
if (flags & BTRFS_BLOCK_GROUP_DATA)
fs_info->avail_data_alloc_bits |= extra_flags;
if (flags & BTRFS_BLOCK_GROUP_METADATA)
fs_info->avail_metadata_alloc_bits |= extra_flags;
if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
fs_info->avail_system_alloc_bits |= extra_flags;
write_sequnlock(&fs_info->profiles_lock);
/*
* returns target flags in extended format or 0 if restripe for this
* chunk_type is not in progress
*
* should be called with either volume_mutex or balance_lock held
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
*/
static u64 get_restripe_target(struct btrfs_fs_info *fs_info, u64 flags)
{
struct btrfs_balance_control *bctl = fs_info->balance_ctl;
u64 target = 0;
if (!bctl)
return 0;
if (flags & BTRFS_BLOCK_GROUP_DATA &&
bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) {
target = BTRFS_BLOCK_GROUP_DATA | bctl->data.target;
} else if (flags & BTRFS_BLOCK_GROUP_SYSTEM &&
bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) {
target = BTRFS_BLOCK_GROUP_SYSTEM | bctl->sys.target;
} else if (flags & BTRFS_BLOCK_GROUP_METADATA &&
bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) {
target = BTRFS_BLOCK_GROUP_METADATA | bctl->meta.target;
}
return target;
}
/*
* @flags: available profiles in extended format (see ctree.h)
*
* Returns reduced profile in chunk format. If profile changing is in
* progress (either running or paused) picks the target profile (if it's
* already available), otherwise falls back to plain reducing.
u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags)
/*
* we add in the count of missing devices because we want
* to make sure that any RAID levels on a degraded FS
* continue to be honored.
*/
u64 num_devices = root->fs_info->fs_devices->rw_devices +
root->fs_info->fs_devices->missing_devices;
/*
* see if restripe for this chunk_type is in progress, if so
* try to reduce to the target profile
*/
spin_lock(&root->fs_info->balance_lock);
target = get_restripe_target(root->fs_info, flags);
if (target) {
/* pick target profile only if it's already available */
if ((flags & target) & BTRFS_EXTENDED_PROFILE_MASK) {
spin_unlock(&root->fs_info->balance_lock);
return extended_to_chunk(target);
}
}
spin_unlock(&root->fs_info->balance_lock);
/* First, mask out the RAID levels which aren't possible */
flags &= ~(BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID0 |
BTRFS_BLOCK_GROUP_RAID5);
if (num_devices < 3)
flags &= ~BTRFS_BLOCK_GROUP_RAID6;
if (num_devices < 4)
flags &= ~BTRFS_BLOCK_GROUP_RAID10;
tmp = flags & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID0 |
BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID5 |
BTRFS_BLOCK_GROUP_RAID6 | BTRFS_BLOCK_GROUP_RAID10);
flags &= ~tmp;
if (tmp & BTRFS_BLOCK_GROUP_RAID6)
tmp = BTRFS_BLOCK_GROUP_RAID6;
else if (tmp & BTRFS_BLOCK_GROUP_RAID5)
tmp = BTRFS_BLOCK_GROUP_RAID5;
else if (tmp & BTRFS_BLOCK_GROUP_RAID10)
tmp = BTRFS_BLOCK_GROUP_RAID10;
else if (tmp & BTRFS_BLOCK_GROUP_RAID1)
tmp = BTRFS_BLOCK_GROUP_RAID1;
else if (tmp & BTRFS_BLOCK_GROUP_RAID0)
tmp = BTRFS_BLOCK_GROUP_RAID0;
static u64 get_alloc_profile(struct btrfs_root *root, u64 flags)
unsigned seq;
do {
seq = read_seqbegin(&root->fs_info->profiles_lock);
if (flags & BTRFS_BLOCK_GROUP_DATA)
flags |= root->fs_info->avail_data_alloc_bits;
else if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
flags |= root->fs_info->avail_system_alloc_bits;
else if (flags & BTRFS_BLOCK_GROUP_METADATA)
flags |= root->fs_info->avail_metadata_alloc_bits;
} while (read_seqretry(&root->fs_info->profiles_lock, seq));
return btrfs_reduce_alloc_profile(root, flags);
u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data)
if (data)
flags = BTRFS_BLOCK_GROUP_DATA;
else if (root == root->fs_info->chunk_root)
flags = BTRFS_BLOCK_GROUP_SYSTEM;
flags = BTRFS_BLOCK_GROUP_METADATA;
ret = get_alloc_profile(root, flags);
return ret;
/*
* This will check the space that the inode allocates from to make sure we have
* enough space for bytes.
*/
int btrfs_check_data_free_space(struct inode *inode, u64 bytes)
{
struct btrfs_space_info *data_sinfo;
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_fs_info *fs_info = root->fs_info;
int ret = 0, committed = 0, alloc_chunk = 1;
/* make sure bytes are sectorsize aligned */
bytes = ALIGN(bytes, root->sectorsize);
if (root == root->fs_info->tree_root ||
BTRFS_I(inode)->location.objectid == BTRFS_FREE_INO_OBJECTID) {
alloc_chunk = 0;
committed = 1;
}
data_sinfo = fs_info->data_sinfo;
again:
/* make sure we have enough space to handle the data first */
spin_lock(&data_sinfo->lock);
used = data_sinfo->bytes_used + data_sinfo->bytes_reserved +
data_sinfo->bytes_pinned + data_sinfo->bytes_readonly +
data_sinfo->bytes_may_use;
if (used + bytes > data_sinfo->total_bytes) {
struct btrfs_trans_handle *trans;
/*
* if we don't have enough free bytes in this space then we need
* to alloc a new chunk.
*/
if (!data_sinfo->full && alloc_chunk) {
data_sinfo->force_alloc = CHUNK_ALLOC_FORCE;
alloc_target = btrfs_get_alloc_profile(root, 1);
trans = btrfs_join_transaction(root);
if (IS_ERR(trans))
return PTR_ERR(trans);
ret = do_chunk_alloc(trans, root->fs_info->extent_root,
alloc_target,
CHUNK_ALLOC_NO_FORCE);
if (ret < 0) {
if (ret != -ENOSPC)
return ret;
else
goto commit_trans;
}
if (!data_sinfo)
data_sinfo = fs_info->data_sinfo;
/*
* If we have less pinned bytes than we want to allocate then
* don't bother committing the transaction, it won't help us.
*/
if (data_sinfo->bytes_pinned < bytes)
committed = 1;
/* commit the current transaction and try again */
if (!committed &&
!atomic_read(&root->fs_info->open_ioctl_trans)) {
trans = btrfs_join_transaction(root);
if (IS_ERR(trans))
return PTR_ERR(trans);
ret = btrfs_commit_transaction(trans, root);
if (ret)
return ret;
goto again;
}
return -ENOSPC;
}
data_sinfo->bytes_may_use += bytes;
trace_btrfs_space_reservation(root->fs_info, "space_info",
data_sinfo->flags, bytes, 1);
* Called if we need to clear a data reservation for this inode.
void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes)
struct btrfs_root *root = BTRFS_I(inode)->root;
/* make sure bytes are sectorsize aligned */
bytes = ALIGN(bytes, root->sectorsize);
data_sinfo = root->fs_info->data_sinfo;
spin_lock(&data_sinfo->lock);
data_sinfo->bytes_may_use -= bytes;
trace_btrfs_space_reservation(root->fs_info, "space_info",
data_sinfo->flags, bytes, 0);
static void force_metadata_allocation(struct btrfs_fs_info *info)
struct list_head *head = &info->space_info;
struct btrfs_space_info *found;
rcu_read_lock();
list_for_each_entry_rcu(found, head, list) {
if (found->flags & BTRFS_BLOCK_GROUP_METADATA)
found->force_alloc = CHUNK_ALLOC_FORCE;
rcu_read_unlock();
static int should_alloc_chunk(struct btrfs_root *root,
struct btrfs_space_info *sinfo, int force)
struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv;
u64 num_bytes = sinfo->total_bytes - sinfo->bytes_readonly;
u64 num_allocated = sinfo->bytes_used + sinfo->bytes_reserved;
if (force == CHUNK_ALLOC_FORCE)
return 1;
/*
* We need to take into account the global rsv because for all intents
* and purposes it's used space. Don't worry about locking the
* global_rsv, it doesn't change except when the transaction commits.
*/
if (sinfo->flags & BTRFS_BLOCK_GROUP_METADATA)
num_allocated += global_rsv->size;
/*
* in limited mode, we want to have some free space up to
* about 1% of the FS size.
*/
if (force == CHUNK_ALLOC_LIMITED) {
thresh = btrfs_super_total_bytes(root->fs_info->super_copy);
thresh = max_t(u64, 64 * 1024 * 1024,
div_factor_fine(thresh, 1));
if (num_bytes - num_allocated < thresh)
return 1;
}
if (num_allocated + 2 * 1024 * 1024 < div_factor(num_bytes, 8))
static u64 get_system_chunk_thresh(struct btrfs_root *root, u64 type)
{
u64 num_dev;
if (type & (BTRFS_BLOCK_GROUP_RAID10 |
BTRFS_BLOCK_GROUP_RAID0 |
BTRFS_BLOCK_GROUP_RAID5 |
BTRFS_BLOCK_GROUP_RAID6))
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613
3614
3615
3616
3617
3618
3619
3620
3621
3622
3623
3624
3625
3626
3627
3628
3629
3630
3631
3632
3633
3634
num_dev = root->fs_info->fs_devices->rw_devices;
else if (type & BTRFS_BLOCK_GROUP_RAID1)
num_dev = 2;
else
num_dev = 1; /* DUP or single */
/* metadata for updaing devices and chunk tree */
return btrfs_calc_trans_metadata_size(root, num_dev + 1);
}
static void check_system_chunk(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 type)
{
struct btrfs_space_info *info;
u64 left;
u64 thresh;
info = __find_space_info(root->fs_info, BTRFS_BLOCK_GROUP_SYSTEM);
spin_lock(&info->lock);
left = info->total_bytes - info->bytes_used - info->bytes_pinned -
info->bytes_reserved - info->bytes_readonly;
spin_unlock(&info->lock);
thresh = get_system_chunk_thresh(root, type);
if (left < thresh && btrfs_test_opt(root, ENOSPC_DEBUG)) {
printk(KERN_INFO "left=%llu, need=%llu, flags=%llu\n",
left, thresh, type);
dump_space_info(info, 0, 0);
}
if (left < thresh) {
u64 flags;
flags = btrfs_get_alloc_profile(root->fs_info->chunk_root, 0);
btrfs_alloc_chunk(trans, root, flags);
}
}
static int do_chunk_alloc(struct btrfs_trans_handle *trans,
struct btrfs_root *extent_root, u64 flags, int force)
struct btrfs_space_info *space_info;
struct btrfs_fs_info *fs_info = extent_root->fs_info;
int wait_for_alloc = 0;
/* Don't re-enter if we're already allocating a chunk */
if (trans->allocating_chunk)
return -ENOSPC;
space_info = __find_space_info(extent_root->fs_info, flags);
if (!space_info) {
ret = update_space_info(extent_root->fs_info, flags,
0, 0, &space_info);
BUG_ON(ret); /* -ENOMEM */
BUG_ON(!space_info); /* Logic error */
if (force < space_info->force_alloc)
force = space_info->force_alloc;
if (space_info->full) {
spin_unlock(&space_info->lock);
if (!should_alloc_chunk(extent_root, space_info, force)) {
return 0;
} else if (space_info->chunk_alloc) {
wait_for_alloc = 1;
} else {
space_info->chunk_alloc = 1;
mutex_lock(&fs_info->chunk_mutex);
/*
* The chunk_mutex is held throughout the entirety of a chunk
* allocation, so once we've acquired the chunk_mutex we know that the
* other guy is done and we need to recheck and see if we should
* allocate.
*/
if (wait_for_alloc) {
mutex_unlock(&fs_info->chunk_mutex);
wait_for_alloc = 0;
goto again;
}
trans->allocating_chunk = true;
/*
* If we have mixed data/metadata chunks we want to make sure we keep
* allocating mixed chunks instead of individual chunks.
*/
if (btrfs_mixed_space_info(space_info))
flags |= (BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA);
/*
* if we're doing a data chunk, go ahead and make sure that
* we keep a reasonable number of metadata chunks allocated in the
* FS as well.
*/
if (flags & BTRFS_BLOCK_GROUP_DATA && fs_info->metadata_ratio) {
fs_info->data_chunk_allocations++;
if (!(fs_info->data_chunk_allocations %
fs_info->metadata_ratio))
force_metadata_allocation(fs_info);
/*
* Check if we have enough space in SYSTEM chunk because we may need
* to update devices.
*/
check_system_chunk(trans, extent_root, flags);
ret = btrfs_alloc_chunk(trans, extent_root, flags);
trans->allocating_chunk = false;
if (ret < 0 && ret != -ENOSPC)
goto out;
space_info->force_alloc = CHUNK_ALLOC_NO_FORCE;
space_info->chunk_alloc = 0;
mutex_unlock(&fs_info->chunk_mutex);
static int can_overcommit(struct btrfs_root *root,
struct btrfs_space_info *space_info, u64 bytes,
struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv;
u64 profile = btrfs_get_alloc_profile(root, 0);
u64 rsv_size = 0;
u64 to_add;
used = space_info->bytes_used + space_info->bytes_reserved +
space_info->bytes_pinned + space_info->bytes_readonly;
spin_lock(&global_rsv->lock);
rsv_size = global_rsv->size;
spin_unlock(&global_rsv->lock);
/*
* We only want to allow over committing if we have lots of actual space
* free, but if we don't have enough space to handle the global reserve
* space then we could end up having a real enospc problem when trying
* to allocate a chunk or some other such important allocation.
*/
rsv_size <<= 1;
if (used + rsv_size >= space_info->total_bytes)
return 0;
used += space_info->bytes_may_use;
spin_lock(&root->fs_info->free_chunk_lock);
avail = root->fs_info->free_chunk_space;
spin_unlock(&root->fs_info->free_chunk_lock);
/*
* If we have dup, raid1 or raid10 then only half of the free
* space is actually useable. For raid56, the space info used
* doesn't include the parity drive, so we don't have to
* change the math
*/
if (profile & (BTRFS_BLOCK_GROUP_DUP |
BTRFS_BLOCK_GROUP_RAID1 |
BTRFS_BLOCK_GROUP_RAID10))
avail >>= 1;
to_add = space_info->total_bytes;
* If we aren't flushing all things, let us overcommit up to
* 1/2th of the space. If we can flush, don't let us overcommit
* too much, let it overcommit up to 1/8 of the space.
if (flush == BTRFS_RESERVE_FLUSH_ALL)
to_add >>= 3;
to_add >>= 1;
/*
* Limit the overcommit to the amount of free space we could possibly
* allocate for chunks.
*/
to_add = min(avail, to_add);
if (used + bytes < space_info->total_bytes + to_add)
static inline int writeback_inodes_sb_nr_if_idle_safe(struct super_block *sb,
unsigned long nr_pages,
enum wb_reason reason)
/* the flusher is dealing with the dirty inodes now. */
if (writeback_in_progress(sb->s_bdi))
return 1;
if (down_read_trylock(&sb->s_umount)) {
writeback_inodes_sb_nr(sb, nr_pages, reason);
up_read(&sb->s_umount);
return 1;
}
return 0;
}
3820
3821
3822
3823
3824
3825
3826
3827
3828
3829
3830
3831
3832
3833
3834
3835
3836
3837
3838
3839
3840
3841
void btrfs_writeback_inodes_sb_nr(struct btrfs_root *root,
unsigned long nr_pages)
{
struct super_block *sb = root->fs_info->sb;
int started;
/* If we can not start writeback, just sync all the delalloc file. */
started = writeback_inodes_sb_nr_if_idle_safe(sb, nr_pages,
WB_REASON_FS_FREE_SPACE);
if (!started) {
/*
* We needn't worry the filesystem going from r/w to r/o though
* we don't acquire ->s_umount mutex, because the filesystem
* should guarantee the delalloc inodes list be empty after
* the filesystem is readonly(all dirty pages are written to
* the disk).
*/
btrfs_start_delalloc_inodes(root, 0);
btrfs_wait_ordered_extents(root, 0);
}
}
* shrink metadata reservation for delalloc
static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig,
bool wait_ordered)
struct btrfs_block_rsv *block_rsv;
struct btrfs_trans_handle *trans;
unsigned long nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT;
trans = (struct btrfs_trans_handle *)current->journal_info;
block_rsv = &root->fs_info->delalloc_block_rsv;
delalloc_bytes = percpu_counter_sum_positive(
&root->fs_info->delalloc_bytes);
btrfs_wait_ordered_extents(root, 0);
while (delalloc_bytes && loops < 3) {
max_reclaim = min(delalloc_bytes, to_reclaim);
nr_pages = max_reclaim >> PAGE_CACHE_SHIFT;
btrfs_writeback_inodes_sb_nr(root, nr_pages);
/*
* We need to wait for the async pages to actually start before
* we do anything.
*/
wait_event(root->fs_info->async_submit_wait,
!atomic_read(&root->fs_info->async_delalloc_pages));
if (!trans)
flush = BTRFS_RESERVE_FLUSH_ALL;
else
flush = BTRFS_RESERVE_NO_FLUSH;
if (can_overcommit(root, space_info, orig, flush)) {
spin_unlock(&space_info->lock);
break;
}
if (wait_ordered && !trans) {
btrfs_wait_ordered_extents(root, 0);
} else {
if (time_left)
break;
}
delalloc_bytes = percpu_counter_sum_positive(
&root->fs_info->delalloc_bytes);
/**
* maybe_commit_transaction - possibly commit the transaction if its ok to
* @root - the root we're allocating for
* @bytes - the number of bytes we want to reserve
* @force - force the commit
* This will check to make sure that committing the transaction will actually
* get us somewhere and then commit the transaction if it does. Otherwise it
* will return -ENOSPC.
3918
3919
3920
3921
3922
3923
3924
3925
3926
3927
3928
3929
3930
3931
3932
3933
3934
3935
3936
3937
3938
3939
3940
3941
3942
3943
3944
3945
3946
static int may_commit_transaction(struct btrfs_root *root,
struct btrfs_space_info *space_info,
u64 bytes, int force)
{
struct btrfs_block_rsv *delayed_rsv = &root->fs_info->delayed_block_rsv;
struct btrfs_trans_handle *trans;
trans = (struct btrfs_trans_handle *)current->journal_info;
if (trans)
return -EAGAIN;
if (force)
goto commit;
/* See if there is enough pinned space to make this reservation */
spin_lock(&space_info->lock);
if (space_info->bytes_pinned >= bytes) {
spin_unlock(&space_info->lock);
goto commit;
}
spin_unlock(&space_info->lock);
/*
* See if there is some space in the delayed insertion reservation for
* this reservation.
*/
if (space_info != delayed_rsv->space_info)
return -ENOSPC;
spin_lock(&delayed_rsv->lock);
if (space_info->bytes_pinned + delayed_rsv->size < bytes) {
spin_unlock(&delayed_rsv->lock);
return -ENOSPC;
}
spin_unlock(&delayed_rsv->lock);
commit:
trans = btrfs_join_transaction(root);
if (IS_ERR(trans))
return -ENOSPC;
return btrfs_commit_transaction(trans, root);
}
FLUSH_DELAYED_ITEMS_NR = 1,
FLUSH_DELAYED_ITEMS = 2,
FLUSH_DELALLOC = 3,
FLUSH_DELALLOC_WAIT = 4,
ALLOC_CHUNK = 5,
COMMIT_TRANS = 6,
};
static int flush_space(struct btrfs_root *root,
struct btrfs_space_info *space_info, u64 num_bytes,
u64 orig_bytes, int state)
{
struct btrfs_trans_handle *trans;
int nr;
switch (state) {
case FLUSH_DELAYED_ITEMS_NR:
case FLUSH_DELAYED_ITEMS:
if (state == FLUSH_DELAYED_ITEMS_NR) {
u64 bytes = btrfs_calc_trans_metadata_size(root, 1);
nr = (int)div64_u64(num_bytes, bytes);
if (!nr)
nr = 1;
nr *= 2;
} else {
nr = -1;
}
trans = btrfs_join_transaction(root);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
break;
}
ret = btrfs_run_delayed_items_nr(trans, root, nr);