Newer
Older
* ll_rw_block: low-level access to block devices (DEPRECATED)
* @rw: whether to %READ or %WRITE or %SWRITE or maybe %READA (readahead)
* @nr: number of &struct buffer_heads in the array
* @bhs: array of pointers to &struct buffer_head
*
* ll_rw_block() takes an array of pointers to &struct buffer_heads, and
* requests an I/O operation on them, either a %READ or a %WRITE. The third
* %SWRITE is like %WRITE only we make sure that the *current* data in buffers
* are sent to disk. The fourth %READA option is described in the documentation
* for generic_make_request() which ll_rw_block() calls.
*
* This function drops any buffer that it cannot get a lock on (with the
* BH_Lock state bit) unless SWRITE is required, any buffer that appears to be
* clean when doing a write request, and any buffer that appears to be
* up-to-date when doing read request. Further it marks as clean buffers that
* are processed for writing (the buffer cache won't assume that they are
* actually clean until the buffer gets unlocked).
*
* ll_rw_block sets b_end_io to simple completion handler that marks
* the buffer up-to-date (if approriate), unlocks the buffer and wakes
* any waiters.
*
* All of the buffers must be for the same device, and must also be a
* multiple of the current approved size for the device.
*/
void ll_rw_block(int rw, int nr, struct buffer_head *bhs[])
{
int i;
for (i = 0; i < nr; i++) {
struct buffer_head *bh = bhs[i];
if (rw == SWRITE || rw == SWRITE_SYNC || rw == SWRITE_SYNC_PLUG)
if (rw == WRITE || rw == SWRITE || rw == SWRITE_SYNC ||
rw == SWRITE_SYNC_PLUG) {
bh->b_end_io = end_buffer_write_sync;
if (rw == SWRITE_SYNC)
submit_bh(WRITE_SYNC, bh);
else
submit_bh(WRITE, bh);
continue;
}
} else {
if (!buffer_uptodate(bh)) {
bh->b_end_io = end_buffer_read_sync;
submit_bh(rw, bh);
continue;
}
}
unlock_buffer(bh);
}
}
/*
* For a data-integrity writeout, we need to wait upon any in-progress I/O
* and then start new I/O and then wait upon it. The caller must have a ref on
* the buffer_head.
*/
int sync_dirty_buffer(struct buffer_head *bh)
{
int ret = 0;
WARN_ON(atomic_read(&bh->b_count) < 1);
lock_buffer(bh);
if (test_clear_buffer_dirty(bh)) {
get_bh(bh);
bh->b_end_io = end_buffer_write_sync;
ret = submit_bh(WRITE_SYNC, bh);
wait_on_buffer(bh);
if (buffer_eopnotsupp(bh)) {
clear_buffer_eopnotsupp(bh);
ret = -EOPNOTSUPP;
}
if (!ret && !buffer_uptodate(bh))
ret = -EIO;
} else {
unlock_buffer(bh);
}
return ret;
}
EXPORT_SYMBOL(sync_dirty_buffer);
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
/*
* try_to_free_buffers() checks if all the buffers on this particular page
* are unused, and releases them if so.
*
* Exclusion against try_to_free_buffers may be obtained by either
* locking the page or by holding its mapping's private_lock.
*
* If the page is dirty but all the buffers are clean then we need to
* be sure to mark the page clean as well. This is because the page
* may be against a block device, and a later reattachment of buffers
* to a dirty page will set *all* buffers dirty. Which would corrupt
* filesystem data on the same device.
*
* The same applies to regular filesystem pages: if all the buffers are
* clean then we set the page clean and proceed. To do that, we require
* total exclusion from __set_page_dirty_buffers(). That is obtained with
* private_lock.
*
* try_to_free_buffers() is non-blocking.
*/
static inline int buffer_busy(struct buffer_head *bh)
{
return atomic_read(&bh->b_count) |
(bh->b_state & ((1 << BH_Dirty) | (1 << BH_Lock)));
}
static int
drop_buffers(struct page *page, struct buffer_head **buffers_to_free)
{
struct buffer_head *head = page_buffers(page);
struct buffer_head *bh;
bh = head;
do {
if (buffer_write_io_error(bh) && page->mapping)
set_bit(AS_EIO, &page->mapping->flags);
if (buffer_busy(bh))
goto failed;
bh = bh->b_this_page;
} while (bh != head);
do {
struct buffer_head *next = bh->b_this_page;
__remove_assoc_queue(bh);
bh = next;
} while (bh != head);
*buffers_to_free = head;
__clear_page_buffers(page);
return 1;
failed:
return 0;
}
int try_to_free_buffers(struct page *page)
{
struct address_space * const mapping = page->mapping;
struct buffer_head *buffers_to_free = NULL;
int ret = 0;
BUG_ON(!PageLocked(page));
return 0;
if (mapping == NULL) { /* can this still happen? */
ret = drop_buffers(page, &buffers_to_free);
goto out;
}
spin_lock(&mapping->private_lock);
ret = drop_buffers(page, &buffers_to_free);
/*
* If the filesystem writes its buffers by hand (eg ext3)
* then we can have clean buffers against a dirty page. We
* clean the page here; otherwise the VM will never notice
* that the filesystem did any IO at all.
*
* Also, during truncate, discard_buffer will have marked all
* the page's buffers clean. We discover that here and clean
* the page also.
*
* private_lock must be held over this entire operation in order
* to synchronise against __set_page_dirty_buffers and prevent the
* dirty bit from being lost.
*/
if (ret)
cancel_dirty_page(page, PAGE_CACHE_SIZE);
spin_unlock(&mapping->private_lock);
out:
if (buffers_to_free) {
struct buffer_head *bh = buffers_to_free;
do {
struct buffer_head *next = bh->b_this_page;
free_buffer_head(bh);
bh = next;
} while (bh != buffers_to_free);
}
return ret;
}
EXPORT_SYMBOL(try_to_free_buffers);
void block_sync_page(struct page *page)
{
struct address_space *mapping;
smp_mb();
mapping = page_mapping(page);
if (mapping)
blk_run_backing_dev(mapping->backing_dev_info, page);
}
EXPORT_SYMBOL(block_sync_page);
/*
* There are no bdflush tunables left. But distributions are
* still running obsolete flush daemons, so we terminate them here.
*
* Use of bdflush() is deprecated and will be removed in a future kernel.
* The `flush-X' kernel threads fully replace bdflush daemons and this call.
SYSCALL_DEFINE2(bdflush, int, func, long, data)
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
{
static int msg_count;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
if (msg_count < 5) {
msg_count++;
printk(KERN_INFO
"warning: process `%s' used the obsolete bdflush"
" system call\n", current->comm);
printk(KERN_INFO "Fix your initscripts?\n");
}
if (func == 1)
do_exit(0);
return 0;
}
/*
* Buffer-head allocation
*/
static struct kmem_cache *bh_cachep;
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
/*
* Once the number of bh's in the machine exceeds this level, we start
* stripping them in writeback.
*/
static int max_buffer_heads;
int buffer_heads_over_limit;
struct bh_accounting {
int nr; /* Number of live bh's */
int ratelimit; /* Limit cacheline bouncing */
};
static DEFINE_PER_CPU(struct bh_accounting, bh_accounting) = {0, 0};
static void recalc_bh_state(void)
{
int i;
int tot = 0;
if (__get_cpu_var(bh_accounting).ratelimit++ < 4096)
return;
__get_cpu_var(bh_accounting).ratelimit = 0;
for_each_online_cpu(i)
tot += per_cpu(bh_accounting, i).nr;
buffer_heads_over_limit = (tot > max_buffer_heads);
}
struct buffer_head *alloc_buffer_head(gfp_t gfp_flags)

Richard Kennedy
committed
struct buffer_head *ret = kmem_cache_zalloc(bh_cachep, gfp_flags);
INIT_LIST_HEAD(&ret->b_assoc_buffers);
get_cpu_var(bh_accounting).nr++;
put_cpu_var(bh_accounting);
}
return ret;
}
EXPORT_SYMBOL(alloc_buffer_head);
void free_buffer_head(struct buffer_head *bh)
{
BUG_ON(!list_empty(&bh->b_assoc_buffers));
kmem_cache_free(bh_cachep, bh);
get_cpu_var(bh_accounting).nr--;
put_cpu_var(bh_accounting);
}
EXPORT_SYMBOL(free_buffer_head);
static void buffer_exit_cpu(int cpu)
{
int i;
struct bh_lru *b = &per_cpu(bh_lrus, cpu);
for (i = 0; i < BH_LRU_SIZE; i++) {
brelse(b->bhs[i]);
b->bhs[i] = NULL;
}
get_cpu_var(bh_accounting).nr += per_cpu(bh_accounting, cpu).nr;
per_cpu(bh_accounting, cpu).nr = 0;
put_cpu_var(bh_accounting);
}
static int buffer_cpu_notify(struct notifier_block *self,
unsigned long action, void *hcpu)
{
if (action == CPU_DEAD || action == CPU_DEAD_FROZEN)
buffer_exit_cpu((unsigned long)hcpu);
return NOTIFY_OK;
}
* bh_uptodate_or_lock - Test whether the buffer is uptodate
* @bh: struct buffer_head
*
* Return true if the buffer is up-to-date and false,
* with the buffer locked, if not.
*/
int bh_uptodate_or_lock(struct buffer_head *bh)
{
if (!buffer_uptodate(bh)) {
lock_buffer(bh);
if (!buffer_uptodate(bh))
return 0;
unlock_buffer(bh);
}
return 1;
}
EXPORT_SYMBOL(bh_uptodate_or_lock);
/**
* bh_submit_read - Submit a locked buffer for reading
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
* @bh: struct buffer_head
*
* Returns zero on success and -EIO on error.
*/
int bh_submit_read(struct buffer_head *bh)
{
BUG_ON(!buffer_locked(bh));
if (buffer_uptodate(bh)) {
unlock_buffer(bh);
return 0;
}
get_bh(bh);
bh->b_end_io = end_buffer_read_sync;
submit_bh(READ, bh);
wait_on_buffer(bh);
if (buffer_uptodate(bh))
return 0;
return -EIO;
}
EXPORT_SYMBOL(bh_submit_read);
void __init buffer_init(void)
{
int nrpages;
bh_cachep = kmem_cache_create("buffer_head",
sizeof(struct buffer_head), 0,
(SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|
SLAB_MEM_SPREAD),

Richard Kennedy
committed
NULL);
/*
* Limit the bh occupancy to 10% of ZONE_NORMAL
*/
nrpages = (nr_free_buffer_pages() * 10) / 100;
max_buffer_heads = nrpages * (PAGE_SIZE / sizeof(struct buffer_head));
hotcpu_notifier(buffer_cpu_notify, 0);
}