Newer
Older
/*
* linux/mm/vmscan.c
*
* Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
*
* Swap reorganised 29.12.95, Stephen Tweedie.
* kswapd added: 7.1.96 sct
* Removed kswapd_ctl limits, and swap out as many pages as needed
* to bring the system back to freepages.high: 2.4.97, Rik van Riel.
* Zone aware kswapd started 02/00, Kanoj Sarcar (kanoj@sgi.com).
* Multiqueue VM started 5.8.00, Rik van Riel.
*/
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/gfp.h>
#include <linux/kernel_stat.h>
#include <linux/swap.h>
#include <linux/pagemap.h>
#include <linux/init.h>
#include <linux/highmem.h>
#include <linux/file.h>
#include <linux/writeback.h>
#include <linux/blkdev.h>
#include <linux/buffer_head.h> /* for try_to_release_page(),
buffer_heads_over_limit */
#include <linux/mm_inline.h>
#include <linux/backing-dev.h>
#include <linux/rmap.h>
#include <linux/topology.h>
#include <linux/cpu.h>
#include <linux/cpuset.h>
#include <linux/compaction.h>
#include <linux/notifier.h>
#include <linux/rwsem.h>
#include <linux/delay.h>
#include <linux/kthread.h>
#include <linux/freezer.h>
#include <linux/memcontrol.h>
#include <linux/delayacct.h>
#include <linux/oom.h>
#include <asm/tlbflush.h>
#include <asm/div64.h>
#include <linux/swapops.h>

Mel Gorman
committed
#define CREATE_TRACE_POINTS
#include <trace/events/vmscan.h>
struct scan_control {
/* Incremented by the number of inactive pages that were scanned */
unsigned long nr_scanned;
/* Number of pages freed so far during a call to shrink_zones() */
unsigned long nr_reclaimed;
/* How many pages shrink_list() should reclaim */
unsigned long nr_to_reclaim;
unsigned long hibernation_mode;
/* Can mapped pages be reclaimed? */
int may_unmap;
/* Can pages be swapped as part of reclaim? */
int may_swap;

Johannes Weiner
committed
/*
* The memory cgroup that hit its limit and as a result is the
* primary target of this reclaim invocation.
*/
struct mem_cgroup *target_mem_cgroup;
/*
* Nodemask of nodes allowed by the caller. If NULL, all nodes
* are scanned.
*/
nodemask_t *nodemask;

Johannes Weiner
committed
struct mem_cgroup_zone {
struct mem_cgroup *mem_cgroup;
struct zone *zone;
};
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
#define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru))
#ifdef ARCH_HAS_PREFETCH
#define prefetch_prev_lru_page(_page, _base, _field) \
do { \
if ((_page)->lru.prev != _base) { \
struct page *prev; \
\
prev = lru_to_page(&(_page->lru)); \
prefetch(&prev->_field); \
} \
} while (0)
#else
#define prefetch_prev_lru_page(_page, _base, _field) do { } while (0)
#endif
#ifdef ARCH_HAS_PREFETCHW
#define prefetchw_prev_lru_page(_page, _base, _field) \
do { \
if ((_page)->lru.prev != _base) { \
struct page *prev; \
\
prev = lru_to_page(&(_page->lru)); \
prefetchw(&prev->_field); \
} \
} while (0)
#else
#define prefetchw_prev_lru_page(_page, _base, _field) do { } while (0)
#endif
/*
* From 0 .. 100. Higher means more swappy.
*/
int vm_swappiness = 60;
long vm_total_pages; /* The total number of pages which the VM controls */
static LIST_HEAD(shrinker_list);
static DECLARE_RWSEM(shrinker_rwsem);
#ifdef CONFIG_CGROUP_MEM_RES_CTLR
static bool global_reclaim(struct scan_control *sc)
{

Johannes Weiner
committed
return !sc->target_mem_cgroup;

Johannes Weiner
committed
static bool scanning_global_lru(struct mem_cgroup_zone *mz)

Johannes Weiner
committed
return !mz->mem_cgroup;

KAMEZAWA Hiroyuki
committed
#else
static bool global_reclaim(struct scan_control *sc)
{
return true;
}

Johannes Weiner
committed
static bool scanning_global_lru(struct mem_cgroup_zone *mz)
{
return true;
}

KAMEZAWA Hiroyuki
committed
#endif

Johannes Weiner
committed
static struct zone_reclaim_stat *get_reclaim_stat(struct mem_cgroup_zone *mz)

Johannes Weiner
committed
if (!scanning_global_lru(mz))
return mem_cgroup_get_reclaim_stat(mz->mem_cgroup, mz->zone);

Johannes Weiner
committed
return &mz->zone->reclaim_stat;

Johannes Weiner
committed
static unsigned long zone_nr_lru_pages(struct mem_cgroup_zone *mz,
enum lru_list lru)

Johannes Weiner
committed
if (!scanning_global_lru(mz))
return mem_cgroup_zone_nr_lru_pages(mz->mem_cgroup,
zone_to_nid(mz->zone),
zone_idx(mz->zone),
BIT(lru));

Johannes Weiner
committed
return zone_page_state(mz->zone, NR_LRU_BASE + lru);
/*
* Add a shrinker callback to be called from the vm
*/
void register_shrinker(struct shrinker *shrinker)
atomic_long_set(&shrinker->nr_in_batch, 0);
down_write(&shrinker_rwsem);
list_add_tail(&shrinker->list, &shrinker_list);
up_write(&shrinker_rwsem);
EXPORT_SYMBOL(register_shrinker);
void unregister_shrinker(struct shrinker *shrinker)
{
down_write(&shrinker_rwsem);
list_del(&shrinker->list);
up_write(&shrinker_rwsem);
}
EXPORT_SYMBOL(unregister_shrinker);
static inline int do_shrinker_shrink(struct shrinker *shrinker,
struct shrink_control *sc,
unsigned long nr_to_scan)
{
sc->nr_to_scan = nr_to_scan;
return (*shrinker->shrink)(shrinker, sc);
}
#define SHRINK_BATCH 128
/*
* Call the shrink functions to age shrinkable caches
*
* Here we assume it costs one seek to replace a lru page and that it also
* takes a seek to recreate a cache object. With this in mind we age equal
* percentages of the lru and ageable caches. This should balance the seeks
* generated by these structures.
*
* If the vm encountered mapped pages on the LRU it increase the pressure on
* slab to avoid swapping.
*
* We do weird things to avoid (scanned*seeks*entries) overflowing 32 bits.
*
* `lru_pages' represents the number of on-LRU pages in all the zones which
* are eligible for the caller's allocation attempt. It is used for balancing
* slab reclaim versus page reclaim.
*
* Returns the number of slab objects which we shrunk.
unsigned long shrink_slab(struct shrink_control *shrink,
unsigned long nr_pages_scanned,
unsigned long lru_pages)
if (nr_pages_scanned == 0)
nr_pages_scanned = SWAP_CLUSTER_MAX;
if (!down_read_trylock(&shrinker_rwsem)) {
/* Assume we'll be able to shrink next time */
ret = 1;
goto out;
}
list_for_each_entry(shrinker, &shrinker_list, list) {
unsigned long long delta;
long total_scan;
long max_pass;
long batch_size = shrinker->batch ? shrinker->batch
: SHRINK_BATCH;
max_pass = do_shrinker_shrink(shrinker, shrink, 0);
if (max_pass <= 0)
continue;
/*
* copy the current shrinker scan count into a local variable
* and zero it so that other concurrent shrinker invocations
* don't also do this scanning work.
*/
nr = atomic_long_xchg(&shrinker->nr_in_batch, 0);
delta = (4 * nr_pages_scanned) / shrinker->seeks;
delta *= max_pass;
total_scan += delta;
if (total_scan < 0) {
printk(KERN_ERR "shrink_slab: %pF negative objects to "
"delete nr=%ld\n",
shrinker->shrink, total_scan);
total_scan = max_pass;
/*
* We need to avoid excessive windup on filesystem shrinkers
* due to large numbers of GFP_NOFS allocations causing the
* shrinkers to return -1 all the time. This results in a large
* nr being built up so when a shrink that can do some work
* comes along it empties the entire cache due to nr >>>
* max_pass. This is bad for sustaining a working set in
* memory.
*
* Hence only allow the shrinker to scan the entire cache when
* a large delta change is calculated directly.
*/
if (delta < max_pass / 4)
total_scan = min(total_scan, max_pass / 2);
/*
* Avoid risking looping forever due to too large nr value:
* never try to free more than twice the estimate number of
* freeable entries.
*/
if (total_scan > max_pass * 2)
total_scan = max_pass * 2;
trace_mm_shrink_slab_start(shrinker, shrink, nr,
nr_pages_scanned, lru_pages,
max_pass, delta, total_scan);
while (total_scan >= batch_size) {
nr_before = do_shrinker_shrink(shrinker, shrink, 0);
shrink_ret = do_shrinker_shrink(shrinker, shrink,
if (shrink_ret < nr_before)
ret += nr_before - shrink_ret;
count_vm_events(SLABS_SCANNED, batch_size);
total_scan -= batch_size;
/*
* move the unused scan count back into the shrinker in a
* manner that handles concurrent updates. If we exhausted the
* scan, there is no need to do an update.
*/
if (total_scan > 0)
new_nr = atomic_long_add_return(total_scan,
&shrinker->nr_in_batch);
else
new_nr = atomic_long_read(&shrinker->nr_in_batch);
trace_mm_shrink_slab_end(shrinker, shrink_ret, nr, new_nr);
out:
cond_resched();
}
static inline int is_page_cache_freeable(struct page *page)
{
/*
* A freeable page cache page is referenced only by the caller
* that isolated the page, the page cache radix tree and
* optional buffer heads at page->private.
*/
return page_count(page) - page_has_private(page) == 2;

KOSAKI Motohiro
committed
static int may_write_to_queue(struct backing_dev_info *bdi,
struct scan_control *sc)
if (current->flags & PF_SWAPWRITE)
return 1;
if (!bdi_write_congested(bdi))
return 1;
if (bdi == current->backing_dev_info)
return 1;
return 0;
}
/*
* We detected a synchronous write error writing a page out. Probably
* -ENOSPC. We need to propagate that into the address_space for a subsequent
* fsync(), msync() or close().
*
* The tricky part is that after writepage we cannot touch the mapping: nothing
* prevents it from being freed up. But we have a ref on the page and once
* that page is locked, the mapping is pinned.
*
* We're allowed to run sleeping lock_page() here because we know the caller has
* __GFP_FS.
*/
static void handle_write_error(struct address_space *mapping,
struct page *page, int error)
{
if (page_mapping(page) == mapping)
mapping_set_error(mapping, error);
/* possible outcome of pageout() */
typedef enum {
/* failed to write page out, page is locked */
PAGE_KEEP,
/* move page to the active list, page is locked */
PAGE_ACTIVATE,
/* page has been sent to the disk successfully, page is unlocked */
PAGE_SUCCESS,
/* page is clean and locked */
PAGE_CLEAN,
} pageout_t;
* pageout is called by shrink_page_list() for each dirty page.
* Calls ->writepage().

Andy Whitcroft
committed
static pageout_t pageout(struct page *page, struct address_space *mapping,

KOSAKI Motohiro
committed
struct scan_control *sc)
{
/*
* If the page is dirty, only perform writeback if that write
* will be non-blocking. To prevent this allocation from being
* stalled by pagecache activity. But note that there may be
* stalls if we need to run get_block(). We could test
* PagePrivate for that.
*
* If this process is currently in __generic_file_aio_write() against
* this page's queue, we can perform writeback even if that
* will block.
*
* If the page is swapcache, write it back even if that would
* block, for some throttling. This happens by accident, because
* swap_backing_dev_info is bust: it doesn't reflect the
* congestion state of the swapdevs. Easy to fix, if needed.
*/
if (!is_page_cache_freeable(page))
return PAGE_KEEP;
if (!mapping) {
/*
* Some data journaling orphaned pages can have
* page->mapping == NULL while being dirty with clean buffers.
*/
if (page_has_private(page)) {
if (try_to_free_buffers(page)) {
ClearPageDirty(page);
printk("%s: orphaned page\n", __func__);
return PAGE_CLEAN;
}
}
return PAGE_KEEP;
}
if (mapping->a_ops->writepage == NULL)
return PAGE_ACTIVATE;

Mel Gorman
committed
if (!may_write_to_queue(mapping->backing_dev_info, sc))
return PAGE_KEEP;
if (clear_page_dirty_for_io(page)) {
int res;
struct writeback_control wbc = {
.sync_mode = WB_SYNC_NONE,
.nr_to_write = SWAP_CLUSTER_MAX,
.range_start = 0,
.range_end = LLONG_MAX,
.for_reclaim = 1,
};
SetPageReclaim(page);
res = mapping->a_ops->writepage(page, &wbc);
if (res < 0)
handle_write_error(mapping, page, res);
if (res == AOP_WRITEPAGE_ACTIVATE) {
ClearPageReclaim(page);
return PAGE_ACTIVATE;
}

Andy Whitcroft
committed
if (!PageWriteback(page)) {
/* synchronous write or broken a_ops? */
ClearPageReclaim(page);
}
trace_mm_vmscan_writepage(page, trace_reclaim_flags(page));
inc_zone_page_state(page, NR_VMSCAN_WRITE);
return PAGE_SUCCESS;
}
return PAGE_CLEAN;
}
* Same as remove_mapping, but if the page is removed from the mapping, it
* gets returned with a refcount of 0.
static int __remove_mapping(struct address_space *mapping, struct page *page)
BUG_ON(!PageLocked(page));
BUG_ON(mapping != page_mapping(page));
* The non racy check for a busy page.
*
* Must be careful with the order of the tests. When someone has
* a ref to the page, it may be possible that they dirty it then
* drop the reference. So if PageDirty is tested before page_count
* here, then the following race may occur:
*
* get_user_pages(&page);
* [user mapping goes away]
* write_to(page);
* !PageDirty(page) [good]
* SetPageDirty(page);
* put_page(page);
* !page_count(page) [good, discard it]
*
* [oops, our write_to data is lost]
*
* Reversing the order of the tests ensures such a situation cannot
* escape unnoticed. The smp_rmb is needed to ensure the page->flags
* load is not satisfied before that of page->_count.
*
* Note that if SetPageDirty is always performed via set_page_dirty,
* and thus under tree_lock, then this ordering is not required.
goto cannot_free;
/* note: atomic_cmpxchg in page_freeze_refs provides the smp_rmb */
if (unlikely(PageDirty(page))) {
page_unfreeze_refs(page, 2);
goto cannot_free;
if (PageSwapCache(page)) {
swp_entry_t swap = { .val = page_private(page) };
__delete_from_swap_cache(page);
swapcache_free(swap, page);
void (*freepage)(struct page *);
freepage = mapping->a_ops->freepage;
mem_cgroup_uncharge_cache_page(page);
if (freepage != NULL)
freepage(page);
}
return 1;
cannot_free:
return 0;
}
/*
* Attempt to detach a locked page from its ->mapping. If it is dirty or if
* someone else has a ref on the page, abort and return 0. If it was
* successfully detached, return 1. Assumes the caller has a single ref on
* this page.
*/
int remove_mapping(struct address_space *mapping, struct page *page)
{
if (__remove_mapping(mapping, page)) {
/*
* Unfreezing the refcount with 1 rather than 2 effectively
* drops the pagecache ref for us without requiring another
* atomic operation.
*/
page_unfreeze_refs(page, 1);
return 1;
}
return 0;
}
/**
* putback_lru_page - put previously isolated page onto appropriate LRU list
* @page: page to be put back to appropriate lru list
*
* Add previously isolated @page to appropriate LRU list.
* Page may still be unevictable for other reasons.
*
* lru_lock must not be held, interrupts must be enabled.
*/
void putback_lru_page(struct page *page)
{
int lru;
int active = !!TestClearPageActive(page);
int was_unevictable = PageUnevictable(page);
VM_BUG_ON(PageLRU(page));
redo:
ClearPageUnevictable(page);
if (page_evictable(page, NULL)) {
/*
* For evictable pages, we can use the cache.
* In event of a race, worst case is we end up with an
* unevictable page on [in]active list.
* We know how to handle that.
*/
lru = active + page_lru_base_type(page);
lru_cache_add_lru(page, lru);
} else {
/*
* Put unevictable pages directly on zone's unevictable
* list.
*/
lru = LRU_UNEVICTABLE;
add_page_to_unevictable_list(page);
* When racing with an mlock or AS_UNEVICTABLE clearing
* (page is unlocked) make sure that if the other thread
* does not observe our setting of PG_lru and fails
* isolation/check_move_unevictable_pages,
* we see PG_mlocked/AS_UNEVICTABLE cleared below and move
* the page back to the evictable list.
*
* The other side is TestClearPageMlocked() or shmem_lock().
}
/*
* page's status can change while we move it among lru. If an evictable
* page is on unevictable list, it never be freed. To avoid that,
* check after we added it to the list, again.
*/
if (lru == LRU_UNEVICTABLE && page_evictable(page, NULL)) {
if (!isolate_lru_page(page)) {
put_page(page);
goto redo;
}
/* This means someone else dropped this page from LRU
* So, it will be freed or putback to LRU again. There is
* nothing to do here.
*/
}
if (was_unevictable && lru != LRU_UNEVICTABLE)
count_vm_event(UNEVICTABLE_PGRESCUED);
else if (!was_unevictable && lru == LRU_UNEVICTABLE)
count_vm_event(UNEVICTABLE_PGCULLED);
put_page(page); /* drop ref from isolate */
}
enum page_references {
PAGEREF_RECLAIM,
PAGEREF_RECLAIM_CLEAN,
PAGEREF_ACTIVATE,
};
static enum page_references page_check_references(struct page *page,

Johannes Weiner
committed
struct mem_cgroup_zone *mz,
struct scan_control *sc)
{
int referenced_ptes, referenced_page;

Johannes Weiner
committed
referenced_ptes = page_referenced(page, 1, sc->target_mem_cgroup,
&vm_flags);
referenced_page = TestClearPageReferenced(page);
/*
* Mlock lost the isolation race with us. Let try_to_unmap()
* move the page to the unevictable list.
*/
if (vm_flags & VM_LOCKED)
return PAGEREF_RECLAIM;
if (referenced_ptes) {
if (PageAnon(page))
return PAGEREF_ACTIVATE;
/*
* All mapped pages start out with page table
* references from the instantiating fault, so we need
* to look twice if a mapped file page is used more
* than once.
*
* Mark it and spare it for another trip around the
* inactive list. Another page table reference will
* lead to its activation.
*
* Note: the mark is set for activated pages as well
* so that recently deactivated but used pages are
* quickly recovered.
*/
SetPageReferenced(page);
if (referenced_page || referenced_ptes > 1)
return PAGEREF_ACTIVATE;
/*
* Activate file-backed executable pages after first usage.
*/
if (vm_flags & VM_EXEC)
return PAGEREF_ACTIVATE;
return PAGEREF_KEEP;
}
/* Reclaim if clean, defer dirty pages to writeback */
if (referenced_page && !PageSwapBacked(page))
return PAGEREF_RECLAIM_CLEAN;
return PAGEREF_RECLAIM;
* shrink_page_list() returns the number of reclaimed pages
static unsigned long shrink_page_list(struct list_head *page_list,

Johannes Weiner
committed
struct mem_cgroup_zone *mz,

Mel Gorman
committed
struct scan_control *sc,

Mel Gorman
committed
int priority,
unsigned long *ret_nr_dirty,
unsigned long *ret_nr_writeback)
LIST_HEAD(free_pages);

Mel Gorman
committed
unsigned long nr_dirty = 0;
unsigned long nr_congested = 0;

Mel Gorman
committed
unsigned long nr_writeback = 0;
cond_resched();
while (!list_empty(page_list)) {
enum page_references references;
struct address_space *mapping;
struct page *page;
int may_enter_fs;
cond_resched();
page = lru_to_page(page_list);
list_del(&page->lru);

Johannes Weiner
committed
VM_BUG_ON(page_zone(page) != mz->zone);

Christoph Lameter
committed
if (unlikely(!page_evictable(page, NULL)))
goto cull_mlocked;
if (!sc->may_unmap && page_mapped(page))

Christoph Lameter
committed
goto keep_locked;
/* Double the slab pressure for mapped and swapcache pages */
if (page_mapped(page) || PageSwapCache(page))
sc->nr_scanned++;

Andy Whitcroft
committed
may_enter_fs = (sc->gfp_mask & __GFP_FS) ||
(PageSwapCache(page) && (sc->gfp_mask & __GFP_IO));
if (PageWriteback(page)) {

Mel Gorman
committed
nr_writeback++;
unlock_page(page);
goto keep;

Andy Whitcroft
committed
}

Johannes Weiner
committed
references = page_check_references(page, mz, sc);
switch (references) {
case PAGEREF_ACTIVATE:
case PAGEREF_KEEP:
goto keep_locked;
case PAGEREF_RECLAIM:
case PAGEREF_RECLAIM_CLEAN:
; /* try to reclaim the page below */
}
/*
* Anonymous process memory has backing store?
* Try to allocate it some swap space here.
*/
if (PageAnon(page) && !PageSwapCache(page)) {
if (!(sc->gfp_mask & __GFP_IO))
goto keep_locked;
mapping = page_mapping(page);
/*
* The page is mapped into the page tables of one or more
* processes. Try to unmap it here.
*/
if (page_mapped(page) && mapping) {
switch (try_to_unmap(page, TTU_UNMAP)) {
case SWAP_FAIL:
goto activate_locked;
case SWAP_AGAIN:
goto keep_locked;
case SWAP_MLOCK:
goto cull_mlocked;
case SWAP_SUCCESS:
; /* try to free the page below */
}
}
if (PageDirty(page)) {

Mel Gorman
committed
nr_dirty++;
/*
* Only kswapd can writeback filesystem pages to

Mel Gorman
committed
* avoid risk of stack overflow but do not writeback
* unless under significant pressure.

Mel Gorman
committed
if (page_is_file_cache(page) &&
(!current_is_kswapd() || priority >= DEF_PRIORITY - 2)) {

Mel Gorman
committed
/*
* Immediately reclaim when written back.
* Similar in principal to deactivate_page()
* except we already have the page isolated
* and know it's dirty
*/
inc_zone_page_state(page, NR_VMSCAN_IMMEDIATE);
SetPageReclaim(page);
goto keep_locked;
}
if (references == PAGEREF_RECLAIM_CLEAN)
if (!may_enter_fs)
if (!sc->may_writepage)
goto keep_locked;
/* Page is dirty, try to write it out here */

KOSAKI Motohiro
committed
switch (pageout(page, mapping, sc)) {

Mel Gorman
committed
nr_congested++;
goto keep_locked;
case PAGE_ACTIVATE:
goto activate_locked;
case PAGE_SUCCESS:

KOSAKI Motohiro
committed
if (PageWriteback(page))

KOSAKI Motohiro
committed
if (PageDirty(page))

KOSAKI Motohiro
committed
/*
* A synchronous write - probably a ramdisk. Go
* ahead and try to reclaim the page.
*/
goto keep;
if (PageDirty(page) || PageWriteback(page))
goto keep_locked;
mapping = page_mapping(page);
case PAGE_CLEAN:
; /* try to free the page below */
}
}
/*
* If the page has buffers, try to free the buffer mappings
* associated with this page. If we succeed we try to free
* the page as well.
*
* We do this even if the page is PageDirty().
* try_to_release_page() does not perform I/O, but it is
* possible for a page to have PageDirty set, but it is actually
* clean (all its buffers are clean). This happens if the
* buffers were written out directly, with submit_bh(). ext3
* will do this, as well as the blockdev mapping.
* try_to_release_page() will discover that cleanness and will
* drop the buffers and mark the page clean - it can be freed.
*
* Rarely, pages can have buffers and no ->mapping. These are
* the pages which were not successfully invalidated in
* truncate_complete_page(). We try to drop those buffers here
* and if that worked, and the page is no longer mapped into
* process address space (page_count == 1) it can be freed.
* Otherwise, leave the page on the LRU so it is swappable.
*/
if (page_has_private(page)) {
if (!try_to_release_page(page, sc->gfp_mask))
goto activate_locked;
if (!mapping && page_count(page) == 1) {
unlock_page(page);
if (put_page_testzero(page))
goto free_it;
else {
/*
* rare race with speculative reference.
* the speculative reference will free
* this page shortly, so we may
* increment nr_reclaimed here (and
* leave it off the LRU).
*/
nr_reclaimed++;
continue;
}
}
if (!mapping || !__remove_mapping(mapping, page))
goto keep_locked;
/*
* At this point, we have no other references and there is
* no way to pick any more up (removed from LRU, removed
* from pagecache). Can use non-atomic bitops now (and
* we obviously don't have to worry about waking up a process
* waiting on the page lock, because there are no references.
*/
__clear_page_locked(page);
/*
* Is there need to periodically free_page_list? It would
* appear not as the counts should be low
*/
list_add(&page->lru, &free_pages);
if (PageSwapCache(page))
try_to_free_swap(page);
unlock_page(page);
putback_lru_page(page);
continue;
/* Not a candidate for swapping, so reclaim swap space. */
if (PageSwapCache(page) && vm_swap_full())
try_to_free_swap(page);
SetPageActive(page);
pgactivate++;
keep_locked:
unlock_page(page);
keep:
list_add(&page->lru, &ret_pages);
VM_BUG_ON(PageLRU(page) || PageUnevictable(page));

Mel Gorman
committed
/*
* Tag a zone as congested if all the dirty pages encountered were
* backed by a congested BDI. In this case, reclaimers should just
* back off and wait for congestion to clear because further reclaim
* will encounter the same problem
*/
if (nr_dirty && nr_dirty == nr_congested && global_reclaim(sc))

Johannes Weiner
committed
zone_set_flag(mz->zone, ZONE_CONGESTED);

Mel Gorman
committed
free_hot_cold_page_list(&free_pages, 1);
count_vm_events(PGACTIVATE, pgactivate);

Mel Gorman
committed
*ret_nr_dirty += nr_dirty;
*ret_nr_writeback += nr_writeback;
/*
* Attempt to remove the specified page from its LRU. Only take this page
* if it is of the appropriate PageActive status. Pages which are being
* freed elsewhere are also ignored.
*
* page: page to consider
* mode: one of the LRU isolation modes defined above
*
* returns 0 on success, -ve errno on failure.
*/
int __isolate_lru_page(struct page *page, isolate_mode_t mode, int file)
int ret = -EINVAL;
/* Only take pages on the LRU. */
if (!PageLRU(page))
return ret;
all_lru_mode = (mode & (ISOLATE_ACTIVE|ISOLATE_INACTIVE)) ==
(ISOLATE_ACTIVE|ISOLATE_INACTIVE);
/*
* When checking the active state, we need to be sure we are
* dealing with comparible boolean values. Take the logical not
* of each.
*/
if (!all_lru_mode && !PageActive(page) != !(mode & ISOLATE_ACTIVE))
if (!all_lru_mode && !!page_is_file_cache(page) != file)
/* Do not give back unevictable pages for compaction */
if (PageUnevictable(page))
return ret;
/*
* To minimise LRU disruption, the caller can indicate that it only
* wants to isolate pages it will be able to operate on without
* blocking - clean pages for the most part.
*
* ISOLATE_CLEAN means that only clean pages should be isolated. This
* is used by reclaim when it is cannot write to backing storage