Newer
Older
grow_dev_page(struct block_device *bdev, sector_t block,
pgoff_t index, int size)
{
struct inode *inode = bdev->bd_inode;
struct page *page;
struct buffer_head *bh;
page = find_or_create_page(inode->i_mapping, index,

Mel Gorman
committed
(mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS)|__GFP_MOVABLE);
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
if (page_has_buffers(page)) {
bh = page_buffers(page);
if (bh->b_size == size) {
init_page_buffers(page, bdev, block, size);
return page;
}
if (!try_to_free_buffers(page))
goto failed;
}
/*
* Allocate some buffers for this page
*/
bh = alloc_page_buffers(page, size, 0);
if (!bh)
goto failed;
/*
* Link the page to the buffers and initialise them. Take the
* lock to be atomic wrt __find_get_block(), which does not
* run under the page lock.
*/
spin_lock(&inode->i_mapping->private_lock);
link_dev_buffers(page, bh);
init_page_buffers(page, bdev, block, size);
spin_unlock(&inode->i_mapping->private_lock);
return page;
failed:
BUG();
unlock_page(page);
page_cache_release(page);
return NULL;
}
/*
* Create buffers for the specified block device block's page. If
* that page was dirty, the buffers are set dirty also.
*/
grow_buffers(struct block_device *bdev, sector_t block, int size)
{
struct page *page;
pgoff_t index;
int sizebits;
sizebits = -1;
do {
sizebits++;
} while ((size << sizebits) < PAGE_SIZE);
index = block >> sizebits;
/*
* Check for a block which wants to lie outside our maximum possible
* pagecache index. (this comparison is done using sector_t types).
*/
if (unlikely(index != block >> sizebits)) {
char b[BDEVNAME_SIZE];
printk(KERN_ERR "%s: requested out-of-range block %llu for "
"device %s\n",
__func__, (unsigned long long)block,
bdevname(bdev, b));
return -EIO;
}
block = index << sizebits;
/* Create a page with the proper size buffers.. */
page = grow_dev_page(bdev, block, index, size);
if (!page)
return 0;
unlock_page(page);
page_cache_release(page);
return 1;
}
__getblk_slow(struct block_device *bdev, sector_t block, int size)
{
/* Size must be multiple of hard sectorsize */
if (unlikely(size & (bdev_logical_block_size(bdev)-1) ||
(size < 512 || size > PAGE_SIZE))) {
printk(KERN_ERR "getblk(): invalid block size %d requested\n",
size);
printk(KERN_ERR "logical block size: %d\n",
bdev_logical_block_size(bdev));
dump_stack();
return NULL;
}
for (;;) {
struct buffer_head * bh;
bh = __find_get_block(bdev, block, size);
if (bh)
return bh;
ret = grow_buffers(bdev, block, size);
if (ret < 0)
return NULL;
if (ret == 0)
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
free_more_memory();
}
}
/*
* The relationship between dirty buffers and dirty pages:
*
* Whenever a page has any dirty buffers, the page's dirty bit is set, and
* the page is tagged dirty in its radix tree.
*
* At all times, the dirtiness of the buffers represents the dirtiness of
* subsections of the page. If the page has buffers, the page dirty bit is
* merely a hint about the true dirty state.
*
* When a page is set dirty in its entirety, all its buffers are marked dirty
* (if the page has buffers).
*
* When a buffer is marked dirty, its page is dirtied, but the page's other
* buffers are not.
*
* Also. When blockdev buffers are explicitly read with bread(), they
* individually become uptodate. But their backing page remains not
* uptodate - even if all of its buffers are uptodate. A subsequent
* block_read_full_page() against that page will discover all the uptodate
* buffers, will set the page uptodate and will perform no I/O.
*/
/**
* mark_buffer_dirty - mark a buffer_head as needing writeout
* @bh: the buffer_head to mark dirty
*
* mark_buffer_dirty() will set the dirty bit against the buffer, then set its
* backing page dirty, then tag the page as dirty in its address_space's radix
* tree and then attach the address_space's inode to its superblock's dirty
* inode list.
*
* mark_buffer_dirty() is atomic. It takes bh->b_page->mapping->private_lock,
* mapping->tree_lock and the global inode_lock.
*/
void mark_buffer_dirty(struct buffer_head *bh)
WARN_ON_ONCE(!buffer_uptodate(bh));
/*
* Very *carefully* optimize the it-is-already-dirty case.
*
* Don't let the final "is it dirty" escape to before we
* perhaps modified the buffer.
*/
if (buffer_dirty(bh)) {
smp_mb();
if (buffer_dirty(bh))
return;
}
if (!test_set_buffer_dirty(bh)) {
struct page *page = bh->b_page;
if (!TestSetPageDirty(page)) {
struct address_space *mapping = page_mapping(page);
if (mapping)
__set_page_dirty(page, mapping, 0);
}
EXPORT_SYMBOL(mark_buffer_dirty);
/*
* Decrement a buffer_head's reference count. If all buffers against a page
* have zero reference count, are clean and unlocked, and if the page is clean
* and unlocked then try_to_free_buffers() may strip the buffers from the page
* in preparation for freeing it (sometimes, rarely, buffers are removed from
* a page but it ends up not being freed, and buffers may later be reattached).
*/
void __brelse(struct buffer_head * buf)
{
if (atomic_read(&buf->b_count)) {
put_bh(buf);
return;
}
WARN(1, KERN_ERR "VFS: brelse: Trying to free free buffer\n");
/*
* bforget() is like brelse(), except it discards any
* potentially dirty data.
*/
void __bforget(struct buffer_head *bh)
{
clear_buffer_dirty(bh);
struct address_space *buffer_mapping = bh->b_page->mapping;
spin_lock(&buffer_mapping->private_lock);
list_del_init(&bh->b_assoc_buffers);
spin_unlock(&buffer_mapping->private_lock);
}
__brelse(bh);
}
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
static struct buffer_head *__bread_slow(struct buffer_head *bh)
{
lock_buffer(bh);
if (buffer_uptodate(bh)) {
unlock_buffer(bh);
return bh;
} else {
get_bh(bh);
bh->b_end_io = end_buffer_read_sync;
submit_bh(READ, bh);
wait_on_buffer(bh);
if (buffer_uptodate(bh))
return bh;
}
brelse(bh);
return NULL;
}
/*
* Per-cpu buffer LRU implementation. To reduce the cost of __find_get_block().
* The bhs[] array is sorted - newest buffer is at bhs[0]. Buffers have their
* refcount elevated by one when they're in an LRU. A buffer can only appear
* once in a particular CPU's LRU. A single buffer can be present in multiple
* CPU's LRUs at the same time.
*
* This is a transparent caching front-end to sb_bread(), sb_getblk() and
* sb_find_get_block().
*
* The LRUs themselves only need locking against invalidate_bh_lrus. We use
* a local interrupt disable for that.
*/
#define BH_LRU_SIZE 8
struct bh_lru {
struct buffer_head *bhs[BH_LRU_SIZE];
};
static DEFINE_PER_CPU(struct bh_lru, bh_lrus) = {{ NULL }};
#ifdef CONFIG_SMP
#define bh_lru_lock() local_irq_disable()
#define bh_lru_unlock() local_irq_enable()
#else
#define bh_lru_lock() preempt_disable()
#define bh_lru_unlock() preempt_enable()
#endif
static inline void check_irqs_on(void)
{
#ifdef irqs_disabled
BUG_ON(irqs_disabled());
#endif
}
/*
* The LRU management algorithm is dopey-but-simple. Sorry.
*/
static void bh_lru_install(struct buffer_head *bh)
{
struct buffer_head *evictee = NULL;
struct bh_lru *lru;
check_irqs_on();
bh_lru_lock();
lru = &__get_cpu_var(bh_lrus);
if (lru->bhs[0] != bh) {
struct buffer_head *bhs[BH_LRU_SIZE];
int in;
int out = 0;
get_bh(bh);
bhs[out++] = bh;
for (in = 0; in < BH_LRU_SIZE; in++) {
struct buffer_head *bh2 = lru->bhs[in];
if (bh2 == bh) {
__brelse(bh2);
} else {
if (out >= BH_LRU_SIZE) {
BUG_ON(evictee != NULL);
evictee = bh2;
} else {
bhs[out++] = bh2;
}
}
}
while (out < BH_LRU_SIZE)
bhs[out++] = NULL;
memcpy(lru->bhs, bhs, sizeof(bhs));
}
bh_lru_unlock();
if (evictee)
__brelse(evictee);
}
/*
* Look up the bh in this cpu's LRU. If it's there, move it to the head.
*/
static struct buffer_head *
lookup_bh_lru(struct block_device *bdev, sector_t block, unsigned size)
{
struct buffer_head *ret = NULL;
struct bh_lru *lru;
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
check_irqs_on();
bh_lru_lock();
lru = &__get_cpu_var(bh_lrus);
for (i = 0; i < BH_LRU_SIZE; i++) {
struct buffer_head *bh = lru->bhs[i];
if (bh && bh->b_bdev == bdev &&
bh->b_blocknr == block && bh->b_size == size) {
if (i) {
while (i) {
lru->bhs[i] = lru->bhs[i - 1];
i--;
}
lru->bhs[0] = bh;
}
get_bh(bh);
ret = bh;
break;
}
}
bh_lru_unlock();
return ret;
}
/*
* Perform a pagecache lookup for the matching buffer. If it's there, refresh
* it in the LRU and mark it as accessed. If it is not present then return
* NULL
*/
struct buffer_head *
__find_get_block(struct block_device *bdev, sector_t block, unsigned size)
{
struct buffer_head *bh = lookup_bh_lru(bdev, block, size);
if (bh == NULL) {
bh = __find_get_block_slow(bdev, block);
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
if (bh)
bh_lru_install(bh);
}
if (bh)
touch_buffer(bh);
return bh;
}
EXPORT_SYMBOL(__find_get_block);
/*
* __getblk will locate (and, if necessary, create) the buffer_head
* which corresponds to the passed block_device, block and size. The
* returned buffer has its reference count incremented.
*
* __getblk() cannot fail - it just keeps trying. If you pass it an
* illegal block number, __getblk() will happily return a buffer_head
* which represents the non-existent block. Very weird.
*
* __getblk() will lock up the machine if grow_dev_page's try_to_free_buffers()
* attempt is failing. FIXME, perhaps?
*/
struct buffer_head *
__getblk(struct block_device *bdev, sector_t block, unsigned size)
{
struct buffer_head *bh = __find_get_block(bdev, block, size);
might_sleep();
if (bh == NULL)
bh = __getblk_slow(bdev, block, size);
return bh;
}
EXPORT_SYMBOL(__getblk);
/*
* Do async read-ahead on a buffer..
*/
void __breadahead(struct block_device *bdev, sector_t block, unsigned size)
{
struct buffer_head *bh = __getblk(bdev, block, size);
if (likely(bh)) {
ll_rw_block(READA, 1, &bh);
brelse(bh);
}
}
EXPORT_SYMBOL(__breadahead);
/**
* __bread() - reads a specified block and returns the bh
* @bdev: the block_device to read from
* @block: number of block
* @size: size (in bytes) to read
*
* Reads a specified block, and returns buffer head that contains it.
* It returns NULL if the block was unreadable.
*/
struct buffer_head *
__bread(struct block_device *bdev, sector_t block, unsigned size)
{
struct buffer_head *bh = __getblk(bdev, block, size);
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
bh = __bread_slow(bh);
return bh;
}
EXPORT_SYMBOL(__bread);
/*
* invalidate_bh_lrus() is called rarely - but not only at unmount.
* This doesn't race because it runs in each cpu either in irq
* or with preempt disabled.
*/
static void invalidate_bh_lru(void *arg)
{
struct bh_lru *b = &get_cpu_var(bh_lrus);
int i;
for (i = 0; i < BH_LRU_SIZE; i++) {
brelse(b->bhs[i]);
b->bhs[i] = NULL;
}
put_cpu_var(bh_lrus);
}
on_each_cpu(invalidate_bh_lru, NULL, 1);
void set_bh_page(struct buffer_head *bh,
struct page *page, unsigned long offset)
{
bh->b_page = page;
if (PageHighMem(page))
/*
* This catches illegal uses and preserves the offset:
*/
bh->b_data = (char *)(0 + offset);
else
bh->b_data = page_address(page) + offset;
}
EXPORT_SYMBOL(set_bh_page);
/*
* Called when truncating a buffer on a page completely.
*/
static void discard_buffer(struct buffer_head * bh)
{
lock_buffer(bh);
clear_buffer_dirty(bh);
bh->b_bdev = NULL;
clear_buffer_mapped(bh);
clear_buffer_req(bh);
clear_buffer_new(bh);
clear_buffer_delay(bh);
clear_buffer_unwritten(bh);
unlock_buffer(bh);
}
/**
* block_invalidatepage - invalidate part of all of a buffer-backed page
*
* @page: the page which is affected
* @offset: the index of the truncation point
*
* block_invalidatepage() is called when all or part of the page has become
* invalidatedby a truncate operation.
*
* block_invalidatepage() does not have to release all buffers, but it must
* ensure that no dirty buffer is left outside @offset and that no I/O
* is underway against any of the blocks which are outside the truncation
* point. Because the caller is about to free (and possibly reuse) those
* blocks on-disk.
*/
void block_invalidatepage(struct page *page, unsigned long offset)
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
{
struct buffer_head *head, *bh, *next;
unsigned int curr_off = 0;
BUG_ON(!PageLocked(page));
if (!page_has_buffers(page))
goto out;
head = page_buffers(page);
bh = head;
do {
unsigned int next_off = curr_off + bh->b_size;
next = bh->b_this_page;
/*
* is this block fully invalidated?
*/
if (offset <= curr_off)
discard_buffer(bh);
curr_off = next_off;
bh = next;
} while (bh != head);
/*
* We release buffers only if the entire page is being invalidated.
* The get_block cached value has been unconditionally invalidated,
* so real IO is not possible anymore.
*/
if (offset == 0)
try_to_release_page(page, 0);
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
}
EXPORT_SYMBOL(block_invalidatepage);
/*
* We attach and possibly dirty the buffers atomically wrt
* __set_page_dirty_buffers() via private_lock. try_to_free_buffers
* is already excluded via the page lock.
*/
void create_empty_buffers(struct page *page,
unsigned long blocksize, unsigned long b_state)
{
struct buffer_head *bh, *head, *tail;
head = alloc_page_buffers(page, blocksize, 1);
bh = head;
do {
bh->b_state |= b_state;
tail = bh;
bh = bh->b_this_page;
} while (bh);
tail->b_this_page = head;
spin_lock(&page->mapping->private_lock);
if (PageUptodate(page) || PageDirty(page)) {
bh = head;
do {
if (PageDirty(page))
set_buffer_dirty(bh);
if (PageUptodate(page))
set_buffer_uptodate(bh);
bh = bh->b_this_page;
} while (bh != head);
}
attach_page_buffers(page, head);
spin_unlock(&page->mapping->private_lock);
}
EXPORT_SYMBOL(create_empty_buffers);
/*
* We are taking a block for data and we don't want any output from any
* buffer-cache aliases starting from return from that function and
* until the moment when something will explicitly mark the buffer
* dirty (hopefully that will not happen until we will free that block ;-)
* We don't even need to mark it not-uptodate - nobody can expect
* anything from a newly allocated buffer anyway. We used to used
* unmap_buffer() for such invalidation, but that was wrong. We definitely
* don't want to mark the alias unmapped, for example - it would confuse
* anyone who might pick it with bread() afterwards...
*
* Also.. Note that bforget() doesn't lock the buffer. So there can
* be writeout I/O going on against recently-freed buffers. We don't
* wait on that I/O in bforget() - it's more efficient to wait on the I/O
* only if we really need to. That happens here.
*/
void unmap_underlying_metadata(struct block_device *bdev, sector_t block)
{
struct buffer_head *old_bh;
might_sleep();
old_bh = __find_get_block_slow(bdev, block);
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
if (old_bh) {
clear_buffer_dirty(old_bh);
wait_on_buffer(old_bh);
clear_buffer_req(old_bh);
__brelse(old_bh);
}
}
EXPORT_SYMBOL(unmap_underlying_metadata);
/*
* NOTE! All mapped/uptodate combinations are valid:
*
* Mapped Uptodate Meaning
*
* No No "unknown" - must do get_block()
* No Yes "hole" - zero-filled
* Yes No "allocated" - allocated on disk, not read in
* Yes Yes "valid" - allocated and up-to-date in memory.
*
* "Dirty" is valid only with the last case (mapped+uptodate).
*/
/*
* While block_write_full_page is writing back the dirty buffers under
* the page lock, whoever dirtied the buffers may decide to clean them
* again at any time. We handle that by only looking at the buffer
* state inside lock_buffer().
*
* If block_write_full_page() is called for regular writeback
* (wbc->sync_mode == WB_SYNC_NONE) then it will redirty a page which has a
* locked buffer. This only can happen if someone has written the buffer
* directly, with submit_bh(). At the address_space level PageWriteback
* prevents this contention from occurring.
*
* If block_write_full_page() is called with wbc->sync_mode ==
* WB_SYNC_ALL, the writes are posted using WRITE_SYNC_PLUG; this
* causes the writes to be flagged as synchronous writes, but the
* block device queue will NOT be unplugged, since usually many pages
* will be pushed to the out before the higher-level caller actually
* waits for the writes to be completed. The various wait functions,
* such as wait_on_writeback_range() will ultimately call sync_page()
* which will ultimately call blk_run_backing_dev(), which will end up
* unplugging the device queue.
*/
static int __block_write_full_page(struct inode *inode, struct page *page,
get_block_t *get_block, struct writeback_control *wbc,
bh_end_io_t *handler)
{
int err;
sector_t block;
sector_t last_block;
struct buffer_head *bh, *head;
const unsigned blocksize = 1 << inode->i_blkbits;
int write_op = (wbc->sync_mode == WB_SYNC_ALL ?
WRITE_SYNC_PLUG : WRITE);
BUG_ON(!PageLocked(page));
last_block = (i_size_read(inode) - 1) >> inode->i_blkbits;
if (!page_has_buffers(page)) {
create_empty_buffers(page, blocksize,
(1 << BH_Dirty)|(1 << BH_Uptodate));
}
/*
* Be very careful. We have no exclusion from __set_page_dirty_buffers
* here, and the (potentially unmapped) buffers may become dirty at
* any time. If a buffer becomes dirty here after we've inspected it
* then we just miss that fact, and the page stays dirty.
*
* Buffers outside i_size may be dirtied by __set_page_dirty_buffers;
* handle that here by just cleaning them.
*/
block = (sector_t)page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
head = page_buffers(page);
bh = head;
/*
* Get all the dirty buffers mapped to disk addresses and
* handle any aliases from the underlying blockdev's mapping.
*/
do {
if (block > last_block) {
/*
* mapped buffers outside i_size will occur, because
* this page can be outside i_size when there is a
* truncate in progress.
*/
/*
* The buffer was zeroed by block_write_full_page()
*/
clear_buffer_dirty(bh);
set_buffer_uptodate(bh);
} else if ((!buffer_mapped(bh) || buffer_delay(bh)) &&
buffer_dirty(bh)) {
WARN_ON(bh->b_size != blocksize);
err = get_block(inode, block, bh, 1);
if (err)
goto recover;
clear_buffer_delay(bh);
if (buffer_new(bh)) {
/* blockdev mappings never come here */
clear_buffer_new(bh);
unmap_underlying_metadata(bh->b_bdev,
bh->b_blocknr);
}
}
bh = bh->b_this_page;
block++;
} while (bh != head);
do {
if (!buffer_mapped(bh))
continue;
/*
* If it's a fully non-blocking write attempt and we cannot
* lock the buffer then redirty the page. Note that this can
* potentially cause a busy-wait loop from writeback threads
* and kswapd activity, but those code paths have their own
* higher-level throttling.
*/
if (wbc->sync_mode != WB_SYNC_NONE || !wbc->nonblocking) {
lock_buffer(bh);
redirty_page_for_writepage(wbc, page);
continue;
}
if (test_clear_buffer_dirty(bh)) {
mark_buffer_async_write_endio(bh, handler);
} else {
unlock_buffer(bh);
}
} while ((bh = bh->b_this_page) != head);
/*
* The page and its buffers are protected by PageWriteback(), so we can
* drop the bh refcounts early.
*/
BUG_ON(PageWriteback(page));
set_page_writeback(page);
do {
struct buffer_head *next = bh->b_this_page;
if (buffer_async_write(bh)) {
submit_bh(write_op, bh);
nr_underway++;
}
bh = next;
} while (bh != head);
err = 0;
done:
if (nr_underway == 0) {
/*
* The page was marked dirty, but the buffers were
* clean. Someone wrote them back by hand with
* ll_rw_block/submit_bh. A rare case.
*/
end_page_writeback(page);
/*
* The page and buffer_heads can be released at any time from
* here on.
*/
}
return err;
recover:
/*
* ENOSPC, or some other error. We may already have added some
* blocks to the file, so we need to write these out to avoid
* exposing stale data.
* The page is currently locked and not marked for writeback
*/
bh = head;
/* Recovery: lock and submit the mapped buffers */
do {
if (buffer_mapped(bh) && buffer_dirty(bh) &&
!buffer_delay(bh)) {
mark_buffer_async_write_endio(bh, handler);
} else {
/*
* The buffer may have been set dirty during
* attachment to a dirty page.
*/
clear_buffer_dirty(bh);
}
} while ((bh = bh->b_this_page) != head);
SetPageError(page);
BUG_ON(PageWriteback(page));
mapping_set_error(page->mapping, err);
set_page_writeback(page);
do {
struct buffer_head *next = bh->b_this_page;
if (buffer_async_write(bh)) {
clear_buffer_dirty(bh);
submit_bh(write_op, bh);
nr_underway++;
}
bh = next;
} while (bh != head);
unlock_page(page);
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
/*
* If a page has any new buffers, zero them out here, and mark them uptodate
* and dirty so they'll be written out (in order to prevent uninitialised
* block data from leaking). And clear the new bit.
*/
void page_zero_new_buffers(struct page *page, unsigned from, unsigned to)
{
unsigned int block_start, block_end;
struct buffer_head *head, *bh;
BUG_ON(!PageLocked(page));
if (!page_has_buffers(page))
return;
bh = head = page_buffers(page);
block_start = 0;
do {
block_end = block_start + bh->b_size;
if (buffer_new(bh)) {
if (block_end > from && block_start < to) {
if (!PageUptodate(page)) {
unsigned start, size;
start = max(from, block_start);
size = min(to, block_end) - start;
zero_user(page, start, size);
set_buffer_uptodate(bh);
}
clear_buffer_new(bh);
mark_buffer_dirty(bh);
}
}
block_start = block_end;
bh = bh->b_this_page;
} while (bh != head);
}
EXPORT_SYMBOL(page_zero_new_buffers);
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
static int __block_prepare_write(struct inode *inode, struct page *page,
unsigned from, unsigned to, get_block_t *get_block)
{
unsigned block_start, block_end;
sector_t block;
int err = 0;
unsigned blocksize, bbits;
struct buffer_head *bh, *head, *wait[2], **wait_bh=wait;
BUG_ON(!PageLocked(page));
BUG_ON(from > PAGE_CACHE_SIZE);
BUG_ON(to > PAGE_CACHE_SIZE);
BUG_ON(from > to);
blocksize = 1 << inode->i_blkbits;
if (!page_has_buffers(page))
create_empty_buffers(page, blocksize, 0);
head = page_buffers(page);
bbits = inode->i_blkbits;
block = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits);
for(bh = head, block_start = 0; bh != head || !block_start;
block++, block_start=block_end, bh = bh->b_this_page) {
block_end = block_start + blocksize;
if (block_end <= from || block_start >= to) {
if (PageUptodate(page)) {
if (!buffer_uptodate(bh))
set_buffer_uptodate(bh);
}
continue;
}
if (buffer_new(bh))
clear_buffer_new(bh);
if (!buffer_mapped(bh)) {
WARN_ON(bh->b_size != blocksize);
if (buffer_new(bh)) {
unmap_underlying_metadata(bh->b_bdev,
bh->b_blocknr);
if (PageUptodate(page)) {
if (block_end > to || block_start < from)
zero_user_segments(page,
to, block_end,
block_start, from);
continue;
}
}
if (PageUptodate(page)) {
if (!buffer_uptodate(bh))
set_buffer_uptodate(bh);
continue;
}
if (!buffer_uptodate(bh) && !buffer_delay(bh) &&
!buffer_unwritten(bh) &&
(block_start < from || block_end > to)) {
ll_rw_block(READ, 1, &bh);
*wait_bh++=bh;
}
}
/*
* If we issued read requests - let them complete.
*/
while(wait_bh > wait) {
wait_on_buffer(*--wait_bh);
if (!buffer_uptodate(*wait_bh))
if (unlikely(err))
page_zero_new_buffers(page, from, to);
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
return err;
}
static int __block_commit_write(struct inode *inode, struct page *page,
unsigned from, unsigned to)
{
unsigned block_start, block_end;
int partial = 0;
unsigned blocksize;
struct buffer_head *bh, *head;
blocksize = 1 << inode->i_blkbits;
for(bh = head = page_buffers(page), block_start = 0;
bh != head || !block_start;
block_start=block_end, bh = bh->b_this_page) {
block_end = block_start + blocksize;
if (block_end <= from || block_start >= to) {
if (!buffer_uptodate(bh))
partial = 1;
} else {
set_buffer_uptodate(bh);
mark_buffer_dirty(bh);
}
clear_buffer_new(bh);
}
/*
* If this is a partial write which happened to make all buffers
* uptodate then we can optimize away a bogus readpage() for
* the next read(). Here we 'discover' whether the page went
* uptodate as a result of this (potentially partial) write.
*/
if (!partial)
SetPageUptodate(page);
return 0;
}
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
/*
* block_write_begin takes care of the basic task of block allocation and
* bringing partial write blocks uptodate first.
*
* If *pagep is not NULL, then block_write_begin uses the locked page
* at *pagep rather than allocating its own. In this case, the page will
* not be unlocked or deallocated on failure.
*/
int block_write_begin(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned flags,
struct page **pagep, void **fsdata,
get_block_t *get_block)
{
struct inode *inode = mapping->host;
int status = 0;
struct page *page;
pgoff_t index;
unsigned start, end;
int ownpage = 0;
index = pos >> PAGE_CACHE_SHIFT;
start = pos & (PAGE_CACHE_SIZE - 1);
end = start + len;
page = *pagep;
if (page == NULL) {
ownpage = 1;
page = grab_cache_page_write_begin(mapping, index, flags);
if (!page) {
status = -ENOMEM;
goto out;
}
*pagep = page;
} else
BUG_ON(!PageLocked(page));
status = __block_prepare_write(inode, page, start, end, get_block);
if (unlikely(status)) {
ClearPageUptodate(page);
if (ownpage) {
unlock_page(page);