Newer
Older
/*
* linux/mm/vmscan.c
*
* Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
*
* Swap reorganised 29.12.95, Stephen Tweedie.
* kswapd added: 7.1.96 sct
* Removed kswapd_ctl limits, and swap out as many pages as needed
* to bring the system back to freepages.high: 2.4.97, Rik van Riel.
* Zone aware kswapd started 02/00, Kanoj Sarcar (kanoj@sgi.com).
* Multiqueue VM started 5.8.00, Rik van Riel.
*/
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/gfp.h>
#include <linux/kernel_stat.h>
#include <linux/swap.h>
#include <linux/pagemap.h>
#include <linux/init.h>
#include <linux/highmem.h>
#include <linux/file.h>
#include <linux/writeback.h>
#include <linux/blkdev.h>
#include <linux/buffer_head.h> /* for try_to_release_page(),
buffer_heads_over_limit */
#include <linux/mm_inline.h>
#include <linux/pagevec.h>
#include <linux/backing-dev.h>
#include <linux/rmap.h>
#include <linux/topology.h>
#include <linux/cpu.h>
#include <linux/cpuset.h>
#include <linux/notifier.h>
#include <linux/rwsem.h>
#include <linux/delay.h>
#include <linux/kthread.h>
#include <linux/freezer.h>
#include <linux/memcontrol.h>
#include <linux/delayacct.h>
#include <asm/tlbflush.h>
#include <asm/div64.h>
#include <linux/swapops.h>
struct scan_control {
/* Incremented by the number of inactive pages that were scanned */
unsigned long nr_scanned;
/* Number of pages freed so far during a call to shrink_zones() */
unsigned long nr_reclaimed;
/* How many pages shrink_list() should reclaim */
unsigned long nr_to_reclaim;
unsigned long hibernation_mode;
/* Can mapped pages be reclaimed? */
int may_unmap;
/* Can pages be swapped as part of reclaim? */
int may_swap;
/*
* Intend to reclaim enough contenious memory rather than to reclaim
* enough amount memory. I.e, it's the mode for high order allocation.
*/
bool lumpy_reclaim_mode;
/* Which cgroup do we reclaim from */
struct mem_cgroup *mem_cgroup;
/*
* Nodemask of nodes allowed by the caller. If NULL, all nodes
* are scanned.
*/
nodemask_t *nodemask;
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
};
#define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru))
#ifdef ARCH_HAS_PREFETCH
#define prefetch_prev_lru_page(_page, _base, _field) \
do { \
if ((_page)->lru.prev != _base) { \
struct page *prev; \
\
prev = lru_to_page(&(_page->lru)); \
prefetch(&prev->_field); \
} \
} while (0)
#else
#define prefetch_prev_lru_page(_page, _base, _field) do { } while (0)
#endif
#ifdef ARCH_HAS_PREFETCHW
#define prefetchw_prev_lru_page(_page, _base, _field) \
do { \
if ((_page)->lru.prev != _base) { \
struct page *prev; \
\
prev = lru_to_page(&(_page->lru)); \
prefetchw(&prev->_field); \
} \
} while (0)
#else
#define prefetchw_prev_lru_page(_page, _base, _field) do { } while (0)
#endif
/*
* From 0 .. 100. Higher means more swappy.
*/
int vm_swappiness = 60;
long vm_total_pages; /* The total number of pages which the VM controls */
static LIST_HEAD(shrinker_list);
static DECLARE_RWSEM(shrinker_rwsem);
#ifdef CONFIG_CGROUP_MEM_RES_CTLR
#define scanning_global_lru(sc) (!(sc)->mem_cgroup)

KAMEZAWA Hiroyuki
committed
#else

KAMEZAWA Hiroyuki
committed
#endif
static struct zone_reclaim_stat *get_reclaim_stat(struct zone *zone,
struct scan_control *sc)
{
return mem_cgroup_get_reclaim_stat(sc->mem_cgroup, zone);
return &zone->reclaim_stat;
}
static unsigned long zone_nr_lru_pages(struct zone *zone,
struct scan_control *sc, enum lru_list lru)
return mem_cgroup_zone_nr_pages(sc->mem_cgroup, zone, lru);
return zone_page_state(zone, NR_LRU_BASE + lru);
}
/*
* Add a shrinker callback to be called from the vm
*/
void register_shrinker(struct shrinker *shrinker)
shrinker->nr = 0;
down_write(&shrinker_rwsem);
list_add_tail(&shrinker->list, &shrinker_list);
up_write(&shrinker_rwsem);
EXPORT_SYMBOL(register_shrinker);
void unregister_shrinker(struct shrinker *shrinker)
{
down_write(&shrinker_rwsem);
list_del(&shrinker->list);
up_write(&shrinker_rwsem);
}
EXPORT_SYMBOL(unregister_shrinker);
#define SHRINK_BATCH 128
/*
* Call the shrink functions to age shrinkable caches
*
* Here we assume it costs one seek to replace a lru page and that it also
* takes a seek to recreate a cache object. With this in mind we age equal
* percentages of the lru and ageable caches. This should balance the seeks
* generated by these structures.
*
* If the vm encountered mapped pages on the LRU it increase the pressure on
* slab to avoid swapping.
*
* We do weird things to avoid (scanned*seeks*entries) overflowing 32 bits.
*
* `lru_pages' represents the number of on-LRU pages in all the zones which
* are eligible for the caller's allocation attempt. It is used for balancing
* slab reclaim versus page reclaim.
*
* Returns the number of slab objects which we shrunk.
unsigned long shrink_slab(unsigned long scanned, gfp_t gfp_mask,
unsigned long lru_pages)
if (scanned == 0)
scanned = SWAP_CLUSTER_MAX;
if (!down_read_trylock(&shrinker_rwsem))
return 1; /* Assume we'll be able to shrink next time */
list_for_each_entry(shrinker, &shrinker_list, list) {
unsigned long long delta;
unsigned long total_scan;
max_pass = (*shrinker->shrink)(shrinker, 0, gfp_mask);
delta *= max_pass;
do_div(delta, lru_pages + 1);
shrinker->nr += delta;
if (shrinker->nr < 0) {
printk(KERN_ERR "shrink_slab: %pF negative objects to "
"delete nr=%ld\n",
shrinker->shrink, shrinker->nr);
shrinker->nr = max_pass;
}
/*
* Avoid risking looping forever due to too large nr value:
* never try to free more than twice the estimate number of
* freeable entries.
*/
if (shrinker->nr > max_pass * 2)
shrinker->nr = max_pass * 2;
total_scan = shrinker->nr;
shrinker->nr = 0;
while (total_scan >= SHRINK_BATCH) {
long this_scan = SHRINK_BATCH;
int shrink_ret;
nr_before = (*shrinker->shrink)(shrinker, 0, gfp_mask);
shrink_ret = (*shrinker->shrink)(shrinker, this_scan,
gfp_mask);
if (shrink_ret < nr_before)
ret += nr_before - shrink_ret;
count_vm_events(SLABS_SCANNED, this_scan);
total_scan -= this_scan;
cond_resched();
}
shrinker->nr += total_scan;
}
up_read(&shrinker_rwsem);
}
static inline int is_page_cache_freeable(struct page *page)
{
/*
* A freeable page cache page is referenced only by the caller
* that isolated the page, the page cache radix tree and
* optional buffer heads at page->private.
*/
return page_count(page) - page_has_private(page) == 2;
}
static int may_write_to_queue(struct backing_dev_info *bdi)
{
if (current->flags & PF_SWAPWRITE)
return 1;
if (!bdi_write_congested(bdi))
return 1;
if (bdi == current->backing_dev_info)
return 1;
return 0;
}
/*
* We detected a synchronous write error writing a page out. Probably
* -ENOSPC. We need to propagate that into the address_space for a subsequent
* fsync(), msync() or close().
*
* The tricky part is that after writepage we cannot touch the mapping: nothing
* prevents it from being freed up. But we have a ref on the page and once
* that page is locked, the mapping is pinned.
*
* We're allowed to run sleeping lock_page() here because we know the caller has
* __GFP_FS.
*/
static void handle_write_error(struct address_space *mapping,
struct page *page, int error)
{
if (page_mapping(page) == mapping)
mapping_set_error(mapping, error);

Andy Whitcroft
committed
/* Request for sync pageout. */
enum pageout_io {
PAGEOUT_IO_ASYNC,
PAGEOUT_IO_SYNC,
};
/* possible outcome of pageout() */
typedef enum {
/* failed to write page out, page is locked */
PAGE_KEEP,
/* move page to the active list, page is locked */
PAGE_ACTIVATE,
/* page has been sent to the disk successfully, page is unlocked */
PAGE_SUCCESS,
/* page is clean and locked */
PAGE_CLEAN,
} pageout_t;
* pageout is called by shrink_page_list() for each dirty page.
* Calls ->writepage().

Andy Whitcroft
committed
static pageout_t pageout(struct page *page, struct address_space *mapping,
enum pageout_io sync_writeback)
{
/*
* If the page is dirty, only perform writeback if that write
* will be non-blocking. To prevent this allocation from being
* stalled by pagecache activity. But note that there may be
* stalls if we need to run get_block(). We could test
* PagePrivate for that.
*
* If this process is currently in __generic_file_aio_write() against
* this page's queue, we can perform writeback even if that
* will block.
*
* If the page is swapcache, write it back even if that would
* block, for some throttling. This happens by accident, because
* swap_backing_dev_info is bust: it doesn't reflect the
* congestion state of the swapdevs. Easy to fix, if needed.
*/
if (!is_page_cache_freeable(page))
return PAGE_KEEP;
if (!mapping) {
/*
* Some data journaling orphaned pages can have
* page->mapping == NULL while being dirty with clean buffers.
*/
if (page_has_private(page)) {
if (try_to_free_buffers(page)) {
ClearPageDirty(page);
printk("%s: orphaned page\n", __func__);
return PAGE_CLEAN;
}
}
return PAGE_KEEP;
}
if (mapping->a_ops->writepage == NULL)
return PAGE_ACTIVATE;
if (!may_write_to_queue(mapping->backing_dev_info))
return PAGE_KEEP;
if (clear_page_dirty_for_io(page)) {
int res;
struct writeback_control wbc = {
.sync_mode = WB_SYNC_NONE,
.nr_to_write = SWAP_CLUSTER_MAX,
.range_start = 0,
.range_end = LLONG_MAX,
.nonblocking = 1,
.for_reclaim = 1,
};
SetPageReclaim(page);
res = mapping->a_ops->writepage(page, &wbc);
if (res < 0)
handle_write_error(mapping, page, res);
if (res == AOP_WRITEPAGE_ACTIVATE) {
ClearPageReclaim(page);
return PAGE_ACTIVATE;
}

Andy Whitcroft
committed
/*
* Wait on writeback if requested to. This happens when
* direct reclaiming a large contiguous area and the
* first attempt to free a range of pages fails.
*/
if (PageWriteback(page) && sync_writeback == PAGEOUT_IO_SYNC)
wait_on_page_writeback(page);
if (!PageWriteback(page)) {
/* synchronous write or broken a_ops? */
ClearPageReclaim(page);
}
inc_zone_page_state(page, NR_VMSCAN_WRITE);
return PAGE_SUCCESS;
}
return PAGE_CLEAN;
}
* Same as remove_mapping, but if the page is removed from the mapping, it
* gets returned with a refcount of 0.
static int __remove_mapping(struct address_space *mapping, struct page *page)
BUG_ON(!PageLocked(page));
BUG_ON(mapping != page_mapping(page));
* The non racy check for a busy page.
*
* Must be careful with the order of the tests. When someone has
* a ref to the page, it may be possible that they dirty it then
* drop the reference. So if PageDirty is tested before page_count
* here, then the following race may occur:
*
* get_user_pages(&page);
* [user mapping goes away]
* write_to(page);
* !PageDirty(page) [good]
* SetPageDirty(page);
* put_page(page);
* !page_count(page) [good, discard it]
*
* [oops, our write_to data is lost]
*
* Reversing the order of the tests ensures such a situation cannot
* escape unnoticed. The smp_rmb is needed to ensure the page->flags
* load is not satisfied before that of page->_count.
*
* Note that if SetPageDirty is always performed via set_page_dirty,
* and thus under tree_lock, then this ordering is not required.
goto cannot_free;
/* note: atomic_cmpxchg in page_freeze_refs provides the smp_rmb */
if (unlikely(PageDirty(page))) {
page_unfreeze_refs(page, 2);
goto cannot_free;
if (PageSwapCache(page)) {
swp_entry_t swap = { .val = page_private(page) };
__delete_from_swap_cache(page);
swapcache_free(swap, page);
} else {
__remove_from_page_cache(page);
mem_cgroup_uncharge_cache_page(page);
}
return 1;
cannot_free:
return 0;
}
/*
* Attempt to detach a locked page from its ->mapping. If it is dirty or if
* someone else has a ref on the page, abort and return 0. If it was
* successfully detached, return 1. Assumes the caller has a single ref on
* this page.
*/
int remove_mapping(struct address_space *mapping, struct page *page)
{
if (__remove_mapping(mapping, page)) {
/*
* Unfreezing the refcount with 1 rather than 2 effectively
* drops the pagecache ref for us without requiring another
* atomic operation.
*/
page_unfreeze_refs(page, 1);
return 1;
}
return 0;
}
/**
* putback_lru_page - put previously isolated page onto appropriate LRU list
* @page: page to be put back to appropriate lru list
*
* Add previously isolated @page to appropriate LRU list.
* Page may still be unevictable for other reasons.
*
* lru_lock must not be held, interrupts must be enabled.
*/
void putback_lru_page(struct page *page)
{
int lru;
int active = !!TestClearPageActive(page);
int was_unevictable = PageUnevictable(page);
VM_BUG_ON(PageLRU(page));
redo:
ClearPageUnevictable(page);
if (page_evictable(page, NULL)) {
/*
* For evictable pages, we can use the cache.
* In event of a race, worst case is we end up with an
* unevictable page on [in]active list.
* We know how to handle that.
*/
lru = active + page_lru_base_type(page);
lru_cache_add_lru(page, lru);
} else {
/*
* Put unevictable pages directly on zone's unevictable
* list.
*/
lru = LRU_UNEVICTABLE;
add_page_to_unevictable_list(page);
/*
* When racing with an mlock clearing (page is
* unlocked), make sure that if the other thread does
* not observe our setting of PG_lru and fails
* isolation, we see PG_mlocked cleared below and move
* the page back to the evictable list.
*
* The other side is TestClearPageMlocked().
*/
smp_mb();
}
/*
* page's status can change while we move it among lru. If an evictable
* page is on unevictable list, it never be freed. To avoid that,
* check after we added it to the list, again.
*/
if (lru == LRU_UNEVICTABLE && page_evictable(page, NULL)) {
if (!isolate_lru_page(page)) {
put_page(page);
goto redo;
}
/* This means someone else dropped this page from LRU
* So, it will be freed or putback to LRU again. There is
* nothing to do here.
*/
}
if (was_unevictable && lru != LRU_UNEVICTABLE)
count_vm_event(UNEVICTABLE_PGRESCUED);
else if (!was_unevictable && lru == LRU_UNEVICTABLE)
count_vm_event(UNEVICTABLE_PGCULLED);
put_page(page); /* drop ref from isolate */
}
enum page_references {
PAGEREF_RECLAIM,
PAGEREF_RECLAIM_CLEAN,
PAGEREF_ACTIVATE,
};
static enum page_references page_check_references(struct page *page,
struct scan_control *sc)
{
int referenced_ptes, referenced_page;
referenced_ptes = page_referenced(page, 1, sc->mem_cgroup, &vm_flags);
referenced_page = TestClearPageReferenced(page);
/* Lumpy reclaim - ignore references */
if (sc->lumpy_reclaim_mode)
return PAGEREF_RECLAIM;
/*
* Mlock lost the isolation race with us. Let try_to_unmap()
* move the page to the unevictable list.
*/
if (vm_flags & VM_LOCKED)
return PAGEREF_RECLAIM;
if (referenced_ptes) {
if (PageAnon(page))
return PAGEREF_ACTIVATE;
/*
* All mapped pages start out with page table
* references from the instantiating fault, so we need
* to look twice if a mapped file page is used more
* than once.
*
* Mark it and spare it for another trip around the
* inactive list. Another page table reference will
* lead to its activation.
*
* Note: the mark is set for activated pages as well
* so that recently deactivated but used pages are
* quickly recovered.
*/
SetPageReferenced(page);
if (referenced_page)
return PAGEREF_ACTIVATE;
return PAGEREF_KEEP;
}
/* Reclaim if clean, defer dirty pages to writeback */
if (referenced_page)
return PAGEREF_RECLAIM_CLEAN;
return PAGEREF_RECLAIM;
* shrink_page_list() returns the number of reclaimed pages
static unsigned long shrink_page_list(struct list_head *page_list,

Andy Whitcroft
committed
struct scan_control *sc,
enum pageout_io sync_writeback)
{
LIST_HEAD(ret_pages);
struct pagevec freed_pvec;
int pgactivate = 0;
cond_resched();
pagevec_init(&freed_pvec, 1);
while (!list_empty(page_list)) {
enum page_references references;
struct address_space *mapping;
struct page *page;
int may_enter_fs;
cond_resched();
page = lru_to_page(page_list);
list_del(&page->lru);

Christoph Lameter
committed
if (unlikely(!page_evictable(page, NULL)))
goto cull_mlocked;
if (!sc->may_unmap && page_mapped(page))

Christoph Lameter
committed
goto keep_locked;
/* Double the slab pressure for mapped and swapcache pages */
if (page_mapped(page) || PageSwapCache(page))
sc->nr_scanned++;

Andy Whitcroft
committed
may_enter_fs = (sc->gfp_mask & __GFP_FS) ||
(PageSwapCache(page) && (sc->gfp_mask & __GFP_IO));
if (PageWriteback(page)) {
/*
* Synchronous reclaim is performed in two passes,
* first an asynchronous pass over the list to
* start parallel writeback, and a second synchronous
* pass to wait for the IO to complete. Wait here
* for any page for which writeback has already
* started.
*/
if (sync_writeback == PAGEOUT_IO_SYNC && may_enter_fs)
wait_on_page_writeback(page);

Andy Whitcroft
committed
goto keep_locked;
}
references = page_check_references(page, sc);
switch (references) {
case PAGEREF_ACTIVATE:
case PAGEREF_KEEP:
goto keep_locked;
case PAGEREF_RECLAIM:
case PAGEREF_RECLAIM_CLEAN:
; /* try to reclaim the page below */
}
/*
* Anonymous process memory has backing store?
* Try to allocate it some swap space here.
*/
if (PageAnon(page) && !PageSwapCache(page)) {
if (!(sc->gfp_mask & __GFP_IO))
goto keep_locked;
mapping = page_mapping(page);
/*
* The page is mapped into the page tables of one or more
* processes. Try to unmap it here.
*/
if (page_mapped(page) && mapping) {
switch (try_to_unmap(page, TTU_UNMAP)) {
case SWAP_FAIL:
goto activate_locked;
case SWAP_AGAIN:
goto keep_locked;
case SWAP_MLOCK:
goto cull_mlocked;
case SWAP_SUCCESS:
; /* try to free the page below */
}
}
if (PageDirty(page)) {
if (references == PAGEREF_RECLAIM_CLEAN)
if (!may_enter_fs)
if (!sc->may_writepage)
goto keep_locked;
/* Page is dirty, try to write it out here */

Andy Whitcroft
committed
switch (pageout(page, mapping, sync_writeback)) {
case PAGE_KEEP:
goto keep_locked;
case PAGE_ACTIVATE:
goto activate_locked;
case PAGE_SUCCESS:
if (PageWriteback(page) || PageDirty(page))
goto keep;
/*
* A synchronous write - probably a ramdisk. Go
* ahead and try to reclaim the page.
*/
goto keep;
if (PageDirty(page) || PageWriteback(page))
goto keep_locked;
mapping = page_mapping(page);
case PAGE_CLEAN:
; /* try to free the page below */
}
}
/*
* If the page has buffers, try to free the buffer mappings
* associated with this page. If we succeed we try to free
* the page as well.
*
* We do this even if the page is PageDirty().
* try_to_release_page() does not perform I/O, but it is
* possible for a page to have PageDirty set, but it is actually
* clean (all its buffers are clean). This happens if the
* buffers were written out directly, with submit_bh(). ext3
* will do this, as well as the blockdev mapping.
* try_to_release_page() will discover that cleanness and will
* drop the buffers and mark the page clean - it can be freed.
*
* Rarely, pages can have buffers and no ->mapping. These are
* the pages which were not successfully invalidated in
* truncate_complete_page(). We try to drop those buffers here
* and if that worked, and the page is no longer mapped into
* process address space (page_count == 1) it can be freed.
* Otherwise, leave the page on the LRU so it is swappable.
*/
if (page_has_private(page)) {
if (!try_to_release_page(page, sc->gfp_mask))
goto activate_locked;
if (!mapping && page_count(page) == 1) {
unlock_page(page);
if (put_page_testzero(page))
goto free_it;
else {
/*
* rare race with speculative reference.
* the speculative reference will free
* this page shortly, so we may
* increment nr_reclaimed here (and
* leave it off the LRU).
*/
nr_reclaimed++;
continue;
}
}
if (!mapping || !__remove_mapping(mapping, page))
goto keep_locked;
/*
* At this point, we have no other references and there is
* no way to pick any more up (removed from LRU, removed
* from pagecache). Can use non-atomic bitops now (and
* we obviously don't have to worry about waking up a process
* waiting on the page lock, because there are no references.
*/
__clear_page_locked(page);
if (!pagevec_add(&freed_pvec, page)) {
__pagevec_free(&freed_pvec);
pagevec_reinit(&freed_pvec);
}
if (PageSwapCache(page))
try_to_free_swap(page);
unlock_page(page);
putback_lru_page(page);
continue;
/* Not a candidate for swapping, so reclaim swap space. */
if (PageSwapCache(page) && vm_swap_full())
try_to_free_swap(page);
SetPageActive(page);
pgactivate++;
keep_locked:
unlock_page(page);
keep:
list_add(&page->lru, &ret_pages);
VM_BUG_ON(PageLRU(page) || PageUnevictable(page));
}
list_splice(&ret_pages, page_list);
if (pagevec_count(&freed_pvec))
count_vm_events(PGACTIVATE, pgactivate);
/*
* Attempt to remove the specified page from its LRU. Only take this page
* if it is of the appropriate PageActive status. Pages which are being
* freed elsewhere are also ignored.
*
* page: page to consider
* mode: one of the LRU isolation modes defined above
*
* returns 0 on success, -ve errno on failure.
*/
int __isolate_lru_page(struct page *page, int mode, int file)
{
int ret = -EINVAL;
/* Only take pages on the LRU. */
if (!PageLRU(page))
return ret;
/*
* When checking the active state, we need to be sure we are
* dealing with comparible boolean values. Take the logical not
* of each.
*/
if (mode != ISOLATE_BOTH && (!PageActive(page) != !mode))
return ret;
if (mode != ISOLATE_BOTH && page_is_file_cache(page) != file)
/*
* When this function is being called for lumpy reclaim, we
* initially look into all LRU pages, active, inactive and
* unevictable; only give shrink_page_list evictable pages.
*/
if (PageUnevictable(page))
return ret;
if (likely(get_page_unless_zero(page))) {
/*
* Be careful not to clear PageLRU until after we're
* sure the page is not being freed elsewhere -- the
* page release code relies on it.
*/
ClearPageLRU(page);
ret = 0;
}
return ret;
}
/*
* zone->lru_lock is heavily contended. Some of the functions that
* shrink the lists perform better by taking out a batch of pages
* and working on them outside the LRU lock.
*
* For pagecache intensive workloads, this function is the hottest
* spot in the kernel (apart from copy_*_user functions).
*
* Appropriate locks must be held before calling this function.
*
* @nr_to_scan: The number of pages to look through on the list.
* @src: The LRU list to pull pages off.
* @dst: The temp list to put pages on to.
* @scanned: The number of pages that were scanned.
* @order: The caller's attempted allocation order
* @mode: One of the LRU isolation modes
* @file: True [1] if isolating file [!anon] pages
*
* returns how many pages were moved onto *@dst.
*/
static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
struct list_head *src, struct list_head *dst,
unsigned long *scanned, int order, int mode, int file)
for (scan = 0; scan < nr_to_scan && !list_empty(src); scan++) {
struct page *page;
unsigned long pfn;
unsigned long end_pfn;
unsigned long page_pfn;
int zone_id;
page = lru_to_page(src);
prefetchw_prev_lru_page(page, src, flags);
switch (__isolate_lru_page(page, mode, file)) {
break;
case -EBUSY:
/* else it is being freed elsewhere */
list_move(&page->lru, src);
mem_cgroup_rotate_lru_list(page, page_lru(page));
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
default:
BUG();
}
if (!order)
continue;
/*
* Attempt to take all pages in the order aligned region
* surrounding the tag page. Only take those pages of
* the same active state as that tag page. We may safely
* round the target page pfn down to the requested order
* as the mem_map is guarenteed valid out to MAX_ORDER,
* where that page is in a different zone we will detect
* it from its zone id and abort this block scan.
*/
zone_id = page_zone_id(page);
page_pfn = page_to_pfn(page);
pfn = page_pfn & ~((1 << order) - 1);
end_pfn = pfn + (1 << order);
for (; pfn < end_pfn; pfn++) {
struct page *cursor_page;
/* The target page is in the block, ignore it. */
if (unlikely(pfn == page_pfn))
continue;
/* Avoid holes within the zone. */
if (unlikely(!pfn_valid_within(pfn)))
break;
cursor_page = pfn_to_page(pfn);
/* Check that we have not crossed a zone boundary. */
if (unlikely(page_zone_id(cursor_page) != zone_id))
continue;

Minchan Kim
committed
/*
* If we don't have enough swap space, reclaiming of
* anon page which don't already have a swap slot is
* pointless.
*/
if (nr_swap_pages <= 0 && PageAnon(cursor_page) &&
!PageSwapCache(cursor_page))
continue;
if (__isolate_lru_page(cursor_page, mode, file) == 0) {
mem_cgroup_del_lru(cursor_page);