Newer
Older
/*
* linux/mm/vmscan.c
*
* Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
*
* Swap reorganised 29.12.95, Stephen Tweedie.
* kswapd added: 7.1.96 sct
* Removed kswapd_ctl limits, and swap out as many pages as needed
* to bring the system back to freepages.high: 2.4.97, Rik van Riel.
* Zone aware kswapd started 02/00, Kanoj Sarcar (kanoj@sgi.com).
* Multiqueue VM started 5.8.00, Rik van Riel.
*/
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/gfp.h>
#include <linux/kernel_stat.h>
#include <linux/swap.h>
#include <linux/pagemap.h>
#include <linux/init.h>
#include <linux/highmem.h>
#include <linux/file.h>
#include <linux/writeback.h>
#include <linux/blkdev.h>
#include <linux/buffer_head.h> /* for try_to_release_page(),
buffer_heads_over_limit */
#include <linux/mm_inline.h>
#include <linux/backing-dev.h>
#include <linux/rmap.h>
#include <linux/topology.h>
#include <linux/cpu.h>
#include <linux/cpuset.h>
#include <linux/compaction.h>
#include <linux/notifier.h>
#include <linux/rwsem.h>
#include <linux/delay.h>
#include <linux/kthread.h>
#include <linux/freezer.h>
#include <linux/memcontrol.h>
#include <linux/delayacct.h>
#include <linux/oom.h>
#include <asm/tlbflush.h>
#include <asm/div64.h>
#include <linux/swapops.h>

Mel Gorman
committed
#define CREATE_TRACE_POINTS
#include <trace/events/vmscan.h>
struct scan_control {
/* Incremented by the number of inactive pages that were scanned */
unsigned long nr_scanned;
/* Number of pages freed so far during a call to shrink_zones() */
unsigned long nr_reclaimed;
/* How many pages shrink_list() should reclaim */
unsigned long nr_to_reclaim;
unsigned long hibernation_mode;
/* Can mapped pages be reclaimed? */
int may_unmap;
/* Can pages be swapped as part of reclaim? */
int may_swap;
/* Scan (total_size >> priority) pages at once */
int priority;

Johannes Weiner
committed
/*
* The memory cgroup that hit its limit and as a result is the
* primary target of this reclaim invocation.
*/
struct mem_cgroup *target_mem_cgroup;
/*
* Nodemask of nodes allowed by the caller. If NULL, all nodes
* are scanned.
*/
nodemask_t *nodemask;
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
};
#define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru))
#ifdef ARCH_HAS_PREFETCH
#define prefetch_prev_lru_page(_page, _base, _field) \
do { \
if ((_page)->lru.prev != _base) { \
struct page *prev; \
\
prev = lru_to_page(&(_page->lru)); \
prefetch(&prev->_field); \
} \
} while (0)
#else
#define prefetch_prev_lru_page(_page, _base, _field) do { } while (0)
#endif
#ifdef ARCH_HAS_PREFETCHW
#define prefetchw_prev_lru_page(_page, _base, _field) \
do { \
if ((_page)->lru.prev != _base) { \
struct page *prev; \
\
prev = lru_to_page(&(_page->lru)); \
prefetchw(&prev->_field); \
} \
} while (0)
#else
#define prefetchw_prev_lru_page(_page, _base, _field) do { } while (0)
#endif
/*
* From 0 .. 100. Higher means more swappy.
*/
int vm_swappiness = 60;
unsigned long vm_total_pages; /* The total number of pages which the VM controls */
static LIST_HEAD(shrinker_list);
static DECLARE_RWSEM(shrinker_rwsem);
static bool global_reclaim(struct scan_control *sc)
{

Johannes Weiner
committed
return !sc->target_mem_cgroup;

KAMEZAWA Hiroyuki
committed
#else
static bool global_reclaim(struct scan_control *sc)
{
return true;
}

KAMEZAWA Hiroyuki
committed
#endif
static unsigned long get_lru_size(struct lruvec *lruvec, enum lru_list lru)
if (!mem_cgroup_disabled())
return mem_cgroup_get_lru_size(lruvec, lru);
return zone_page_state(lruvec_zone(lruvec), NR_LRU_BASE + lru);
/*
* Add a shrinker callback to be called from the vm
*/
void register_shrinker(struct shrinker *shrinker)
atomic_long_set(&shrinker->nr_in_batch, 0);
down_write(&shrinker_rwsem);
list_add_tail(&shrinker->list, &shrinker_list);
up_write(&shrinker_rwsem);
EXPORT_SYMBOL(register_shrinker);
void unregister_shrinker(struct shrinker *shrinker)
{
down_write(&shrinker_rwsem);
list_del(&shrinker->list);
up_write(&shrinker_rwsem);
}
EXPORT_SYMBOL(unregister_shrinker);
static inline int do_shrinker_shrink(struct shrinker *shrinker,
struct shrink_control *sc,
unsigned long nr_to_scan)
{
sc->nr_to_scan = nr_to_scan;
return (*shrinker->shrink)(shrinker, sc);
}
#define SHRINK_BATCH 128
/*
* Call the shrink functions to age shrinkable caches
*
* Here we assume it costs one seek to replace a lru page and that it also
* takes a seek to recreate a cache object. With this in mind we age equal
* percentages of the lru and ageable caches. This should balance the seeks
* generated by these structures.
*
* If the vm encountered mapped pages on the LRU it increase the pressure on
* slab to avoid swapping.
*
* We do weird things to avoid (scanned*seeks*entries) overflowing 32 bits.
*
* `lru_pages' represents the number of on-LRU pages in all the zones which
* are eligible for the caller's allocation attempt. It is used for balancing
* slab reclaim versus page reclaim.
*
* Returns the number of slab objects which we shrunk.
unsigned long shrink_slab(struct shrink_control *shrink,
unsigned long nr_pages_scanned,
unsigned long lru_pages)
if (nr_pages_scanned == 0)
nr_pages_scanned = SWAP_CLUSTER_MAX;
if (!down_read_trylock(&shrinker_rwsem)) {
/* Assume we'll be able to shrink next time */
ret = 1;
goto out;
}
list_for_each_entry(shrinker, &shrinker_list, list) {
unsigned long long delta;
long total_scan;
long max_pass;
long batch_size = shrinker->batch ? shrinker->batch
: SHRINK_BATCH;
max_pass = do_shrinker_shrink(shrinker, shrink, 0);
if (max_pass <= 0)
continue;
/*
* copy the current shrinker scan count into a local variable
* and zero it so that other concurrent shrinker invocations
* don't also do this scanning work.
*/
nr = atomic_long_xchg(&shrinker->nr_in_batch, 0);
delta = (4 * nr_pages_scanned) / shrinker->seeks;
delta *= max_pass;
total_scan += delta;
if (total_scan < 0) {
printk(KERN_ERR "shrink_slab: %pF negative objects to "
"delete nr=%ld\n",
shrinker->shrink, total_scan);
total_scan = max_pass;
/*
* We need to avoid excessive windup on filesystem shrinkers
* due to large numbers of GFP_NOFS allocations causing the
* shrinkers to return -1 all the time. This results in a large
* nr being built up so when a shrink that can do some work
* comes along it empties the entire cache due to nr >>>
* max_pass. This is bad for sustaining a working set in
* memory.
*
* Hence only allow the shrinker to scan the entire cache when
* a large delta change is calculated directly.
*/
if (delta < max_pass / 4)
total_scan = min(total_scan, max_pass / 2);
/*
* Avoid risking looping forever due to too large nr value:
* never try to free more than twice the estimate number of
* freeable entries.
*/
if (total_scan > max_pass * 2)
total_scan = max_pass * 2;
trace_mm_shrink_slab_start(shrinker, shrink, nr,
nr_pages_scanned, lru_pages,
max_pass, delta, total_scan);
while (total_scan >= batch_size) {
nr_before = do_shrinker_shrink(shrinker, shrink, 0);
shrink_ret = do_shrinker_shrink(shrinker, shrink,
if (shrink_ret < nr_before)
ret += nr_before - shrink_ret;
count_vm_events(SLABS_SCANNED, batch_size);
total_scan -= batch_size;
/*
* move the unused scan count back into the shrinker in a
* manner that handles concurrent updates. If we exhausted the
* scan, there is no need to do an update.
*/
if (total_scan > 0)
new_nr = atomic_long_add_return(total_scan,
&shrinker->nr_in_batch);
else
new_nr = atomic_long_read(&shrinker->nr_in_batch);
trace_mm_shrink_slab_end(shrinker, shrink_ret, nr, new_nr);
out:
cond_resched();
}
static inline int is_page_cache_freeable(struct page *page)
{
/*
* A freeable page cache page is referenced only by the caller
* that isolated the page, the page cache radix tree and
* optional buffer heads at page->private.
*/
return page_count(page) - page_has_private(page) == 2;

KOSAKI Motohiro
committed
static int may_write_to_queue(struct backing_dev_info *bdi,
struct scan_control *sc)
if (current->flags & PF_SWAPWRITE)
return 1;
if (!bdi_write_congested(bdi))
return 1;
if (bdi == current->backing_dev_info)
return 1;
return 0;
}
/*
* We detected a synchronous write error writing a page out. Probably
* -ENOSPC. We need to propagate that into the address_space for a subsequent
* fsync(), msync() or close().
*
* The tricky part is that after writepage we cannot touch the mapping: nothing
* prevents it from being freed up. But we have a ref on the page and once
* that page is locked, the mapping is pinned.
*
* We're allowed to run sleeping lock_page() here because we know the caller has
* __GFP_FS.
*/
static void handle_write_error(struct address_space *mapping,
struct page *page, int error)
{
if (page_mapping(page) == mapping)
mapping_set_error(mapping, error);
/* possible outcome of pageout() */
typedef enum {
/* failed to write page out, page is locked */
PAGE_KEEP,
/* move page to the active list, page is locked */
PAGE_ACTIVATE,
/* page has been sent to the disk successfully, page is unlocked */
PAGE_SUCCESS,
/* page is clean and locked */
PAGE_CLEAN,
} pageout_t;
* pageout is called by shrink_page_list() for each dirty page.
* Calls ->writepage().

Andy Whitcroft
committed
static pageout_t pageout(struct page *page, struct address_space *mapping,

KOSAKI Motohiro
committed
struct scan_control *sc)
{
/*
* If the page is dirty, only perform writeback if that write
* will be non-blocking. To prevent this allocation from being
* stalled by pagecache activity. But note that there may be
* stalls if we need to run get_block(). We could test
* PagePrivate for that.
*
* If this process is currently in __generic_file_aio_write() against
* this page's queue, we can perform writeback even if that
* will block.
*
* If the page is swapcache, write it back even if that would
* block, for some throttling. This happens by accident, because
* swap_backing_dev_info is bust: it doesn't reflect the
* congestion state of the swapdevs. Easy to fix, if needed.
*/
if (!is_page_cache_freeable(page))
return PAGE_KEEP;
if (!mapping) {
/*
* Some data journaling orphaned pages can have
* page->mapping == NULL while being dirty with clean buffers.
*/
if (page_has_private(page)) {
if (try_to_free_buffers(page)) {
ClearPageDirty(page);
printk("%s: orphaned page\n", __func__);
return PAGE_CLEAN;
}
}
return PAGE_KEEP;
}
if (mapping->a_ops->writepage == NULL)
return PAGE_ACTIVATE;

Mel Gorman
committed
if (!may_write_to_queue(mapping->backing_dev_info, sc))
return PAGE_KEEP;
if (clear_page_dirty_for_io(page)) {
int res;
struct writeback_control wbc = {
.sync_mode = WB_SYNC_NONE,
.nr_to_write = SWAP_CLUSTER_MAX,
.range_start = 0,
.range_end = LLONG_MAX,
.for_reclaim = 1,
};
SetPageReclaim(page);
res = mapping->a_ops->writepage(page, &wbc);
if (res < 0)
handle_write_error(mapping, page, res);
if (res == AOP_WRITEPAGE_ACTIVATE) {
ClearPageReclaim(page);
return PAGE_ACTIVATE;
}

Andy Whitcroft
committed
if (!PageWriteback(page)) {
/* synchronous write or broken a_ops? */
ClearPageReclaim(page);
}
trace_mm_vmscan_writepage(page, trace_reclaim_flags(page));
inc_zone_page_state(page, NR_VMSCAN_WRITE);
return PAGE_SUCCESS;
}
return PAGE_CLEAN;
}
* Same as remove_mapping, but if the page is removed from the mapping, it
* gets returned with a refcount of 0.
static int __remove_mapping(struct address_space *mapping, struct page *page)
BUG_ON(!PageLocked(page));
BUG_ON(mapping != page_mapping(page));
* The non racy check for a busy page.
*
* Must be careful with the order of the tests. When someone has
* a ref to the page, it may be possible that they dirty it then
* drop the reference. So if PageDirty is tested before page_count
* here, then the following race may occur:
*
* get_user_pages(&page);
* [user mapping goes away]
* write_to(page);
* !PageDirty(page) [good]
* SetPageDirty(page);
* put_page(page);
* !page_count(page) [good, discard it]
*
* [oops, our write_to data is lost]
*
* Reversing the order of the tests ensures such a situation cannot
* escape unnoticed. The smp_rmb is needed to ensure the page->flags
* load is not satisfied before that of page->_count.
*
* Note that if SetPageDirty is always performed via set_page_dirty,
* and thus under tree_lock, then this ordering is not required.
goto cannot_free;
/* note: atomic_cmpxchg in page_freeze_refs provides the smp_rmb */
if (unlikely(PageDirty(page))) {
page_unfreeze_refs(page, 2);
goto cannot_free;
if (PageSwapCache(page)) {
swp_entry_t swap = { .val = page_private(page) };
__delete_from_swap_cache(page);
swapcache_free(swap, page);
void (*freepage)(struct page *);
freepage = mapping->a_ops->freepage;
mem_cgroup_uncharge_cache_page(page);
if (freepage != NULL)
freepage(page);
}
return 1;
cannot_free:
return 0;
}
/*
* Attempt to detach a locked page from its ->mapping. If it is dirty or if
* someone else has a ref on the page, abort and return 0. If it was
* successfully detached, return 1. Assumes the caller has a single ref on
* this page.
*/
int remove_mapping(struct address_space *mapping, struct page *page)
{
if (__remove_mapping(mapping, page)) {
/*
* Unfreezing the refcount with 1 rather than 2 effectively
* drops the pagecache ref for us without requiring another
* atomic operation.
*/
page_unfreeze_refs(page, 1);
return 1;
}
return 0;
}
/**
* putback_lru_page - put previously isolated page onto appropriate LRU list
* @page: page to be put back to appropriate lru list
*
* Add previously isolated @page to appropriate LRU list.
* Page may still be unevictable for other reasons.
*
* lru_lock must not be held, interrupts must be enabled.
*/
void putback_lru_page(struct page *page)
{
int lru;
int active = !!TestClearPageActive(page);
int was_unevictable = PageUnevictable(page);
VM_BUG_ON(PageLRU(page));
redo:
ClearPageUnevictable(page);
/*
* For evictable pages, we can use the cache.
* In event of a race, worst case is we end up with an
* unevictable page on [in]active list.
* We know how to handle that.
*/
lru = active + page_lru_base_type(page);
lru_cache_add_lru(page, lru);
} else {
/*
* Put unevictable pages directly on zone's unevictable
* list.
*/
lru = LRU_UNEVICTABLE;
add_page_to_unevictable_list(page);
* When racing with an mlock or AS_UNEVICTABLE clearing
* (page is unlocked) make sure that if the other thread
* does not observe our setting of PG_lru and fails
* isolation/check_move_unevictable_pages,
* we see PG_mlocked/AS_UNEVICTABLE cleared below and move
* the page back to the evictable list.
*
* The other side is TestClearPageMlocked() or shmem_lock().
}
/*
* page's status can change while we move it among lru. If an evictable
* page is on unevictable list, it never be freed. To avoid that,
* check after we added it to the list, again.
*/
if (lru == LRU_UNEVICTABLE && page_evictable(page)) {
if (!isolate_lru_page(page)) {
put_page(page);
goto redo;
}
/* This means someone else dropped this page from LRU
* So, it will be freed or putback to LRU again. There is
* nothing to do here.
*/
}
if (was_unevictable && lru != LRU_UNEVICTABLE)
count_vm_event(UNEVICTABLE_PGRESCUED);
else if (!was_unevictable && lru == LRU_UNEVICTABLE)
count_vm_event(UNEVICTABLE_PGCULLED);
put_page(page); /* drop ref from isolate */
}
enum page_references {
PAGEREF_RECLAIM,
PAGEREF_RECLAIM_CLEAN,
PAGEREF_ACTIVATE,
};
static enum page_references page_check_references(struct page *page,
struct scan_control *sc)
{
int referenced_ptes, referenced_page;

Johannes Weiner
committed
referenced_ptes = page_referenced(page, 1, sc->target_mem_cgroup,
&vm_flags);
referenced_page = TestClearPageReferenced(page);
/*
* Mlock lost the isolation race with us. Let try_to_unmap()
* move the page to the unevictable list.
*/
if (vm_flags & VM_LOCKED)
return PAGEREF_RECLAIM;
if (referenced_ptes) {
if (PageSwapBacked(page))
return PAGEREF_ACTIVATE;
/*
* All mapped pages start out with page table
* references from the instantiating fault, so we need
* to look twice if a mapped file page is used more
* than once.
*
* Mark it and spare it for another trip around the
* inactive list. Another page table reference will
* lead to its activation.
*
* Note: the mark is set for activated pages as well
* so that recently deactivated but used pages are
* quickly recovered.
*/
SetPageReferenced(page);
if (referenced_page || referenced_ptes > 1)
return PAGEREF_ACTIVATE;
/*
* Activate file-backed executable pages after first usage.
*/
if (vm_flags & VM_EXEC)
return PAGEREF_ACTIVATE;
return PAGEREF_KEEP;
}
/* Reclaim if clean, defer dirty pages to writeback */
if (referenced_page && !PageSwapBacked(page))
return PAGEREF_RECLAIM_CLEAN;
return PAGEREF_RECLAIM;
* shrink_page_list() returns the number of reclaimed pages
static unsigned long shrink_page_list(struct list_head *page_list,
struct zone *zone,

Mel Gorman
committed
struct scan_control *sc,

Minchan Kim
committed
enum ttu_flags ttu_flags,

Mel Gorman
committed
unsigned long *ret_nr_unqueued_dirty,

Minchan Kim
committed
unsigned long *ret_nr_writeback,
bool force_reclaim)
LIST_HEAD(free_pages);

Mel Gorman
committed
unsigned long nr_unqueued_dirty = 0;

Mel Gorman
committed
unsigned long nr_dirty = 0;
unsigned long nr_congested = 0;

Mel Gorman
committed
unsigned long nr_writeback = 0;
mem_cgroup_uncharge_start();
while (!list_empty(page_list)) {
struct address_space *mapping;
struct page *page;
int may_enter_fs;

Minchan Kim
committed
enum page_references references = PAGEREF_RECLAIM_CLEAN;
cond_resched();
page = lru_to_page(page_list);
list_del(&page->lru);
VM_BUG_ON(page_zone(page) != zone);

Christoph Lameter
committed
if (unlikely(!page_evictable(page)))
if (!sc->may_unmap && page_mapped(page))

Christoph Lameter
committed
goto keep_locked;
/* Double the slab pressure for mapped and swapcache pages */
if (page_mapped(page) || PageSwapCache(page))
sc->nr_scanned++;

Andy Whitcroft
committed
may_enter_fs = (sc->gfp_mask & __GFP_FS) ||
(PageSwapCache(page) && (sc->gfp_mask & __GFP_IO));
if (PageWriteback(page)) {
/*
* memcg doesn't have any dirty pages throttling so we
* could easily OOM just because too many pages are in
* writeback and there is nothing else to reclaim.
* Check __GFP_IO, certainly because a loop driver
* thread might enter reclaim, and deadlock if it waits
* on a page for which it is needed to do the write
* (loop masks off __GFP_IO|__GFP_FS for this reason);
* but more thought would probably show more reasons.
*
* Don't require __GFP_FS, since we're not going into
* the FS, just waiting on its writeback completion.
* Worryingly, ext4 gfs2 and xfs allocate pages with
* grab_cache_page_write_begin(,,AOP_FLAG_NOFS), so
* testing may_enter_fs here is liable to OOM on them.
if (global_reclaim(sc) ||
!PageReclaim(page) || !(sc->gfp_mask & __GFP_IO)) {
/*
* This is slightly racy - end_page_writeback()
* might have just cleared PageReclaim, then
* setting PageReclaim here end up interpreted
* as PageReadahead - but that does not matter
* enough to care. What we do want is for this
* page to have PageReclaim set next time memcg
* reclaim reaches the tests above, so it will
* then wait_on_page_writeback() to avoid OOM;
* and it's also appropriate in global reclaim.
*/
SetPageReclaim(page);
wait_on_page_writeback(page);

Andy Whitcroft
committed
}

Minchan Kim
committed
if (!force_reclaim)
references = page_check_references(page, sc);
switch (references) {
case PAGEREF_ACTIVATE:
case PAGEREF_KEEP:
goto keep_locked;
case PAGEREF_RECLAIM:
case PAGEREF_RECLAIM_CLEAN:
; /* try to reclaim the page below */
}
/*
* Anonymous process memory has backing store?
* Try to allocate it some swap space here.
*/
if (PageAnon(page) && !PageSwapCache(page)) {
if (!(sc->gfp_mask & __GFP_IO))
goto keep_locked;
if (!add_to_swap(page, page_list))
mapping = page_mapping(page);
/*
* The page is mapped into the page tables of one or more
* processes. Try to unmap it here.
*/
if (page_mapped(page) && mapping) {

Minchan Kim
committed
switch (try_to_unmap(page, ttu_flags)) {
case SWAP_FAIL:
goto activate_locked;
case SWAP_AGAIN:
goto keep_locked;
case SWAP_MLOCK:
goto cull_mlocked;
case SWAP_SUCCESS:
; /* try to free the page below */
}
}
if (PageDirty(page)) {

Mel Gorman
committed
nr_dirty++;

Mel Gorman
committed
if (!PageWriteback(page))
nr_unqueued_dirty++;
/*
* Only kswapd can writeback filesystem pages to

Mel Gorman
committed
* avoid risk of stack overflow but only writeback
* if many dirty pages have been encountered.

Mel Gorman
committed
if (page_is_file_cache(page) &&
(!current_is_kswapd() ||

Mel Gorman
committed
!zone_is_reclaim_dirty(zone))) {

Mel Gorman
committed
/*
* Immediately reclaim when written back.
* Similar in principal to deactivate_page()
* except we already have the page isolated
* and know it's dirty
*/
inc_zone_page_state(page, NR_VMSCAN_IMMEDIATE);
SetPageReclaim(page);
goto keep_locked;
}
if (references == PAGEREF_RECLAIM_CLEAN)
if (!may_enter_fs)
if (!sc->may_writepage)
goto keep_locked;
/* Page is dirty, try to write it out here */

KOSAKI Motohiro
committed
switch (pageout(page, mapping, sc)) {

Mel Gorman
committed
nr_congested++;
goto keep_locked;
case PAGE_ACTIVATE:
goto activate_locked;
case PAGE_SUCCESS:

KOSAKI Motohiro
committed
if (PageWriteback(page))

KOSAKI Motohiro
committed
if (PageDirty(page))

KOSAKI Motohiro
committed
/*
* A synchronous write - probably a ramdisk. Go
* ahead and try to reclaim the page.
*/
goto keep;
if (PageDirty(page) || PageWriteback(page))
goto keep_locked;
mapping = page_mapping(page);
case PAGE_CLEAN:
; /* try to free the page below */
}
}
/*
* If the page has buffers, try to free the buffer mappings
* associated with this page. If we succeed we try to free
* the page as well.
*
* We do this even if the page is PageDirty().
* try_to_release_page() does not perform I/O, but it is
* possible for a page to have PageDirty set, but it is actually
* clean (all its buffers are clean). This happens if the
* buffers were written out directly, with submit_bh(). ext3
* will do this, as well as the blockdev mapping.
* try_to_release_page() will discover that cleanness and will
* drop the buffers and mark the page clean - it can be freed.
*
* Rarely, pages can have buffers and no ->mapping. These are
* the pages which were not successfully invalidated in
* truncate_complete_page(). We try to drop those buffers here
* and if that worked, and the page is no longer mapped into
* process address space (page_count == 1) it can be freed.
* Otherwise, leave the page on the LRU so it is swappable.
*/
if (page_has_private(page)) {
if (!try_to_release_page(page, sc->gfp_mask))
goto activate_locked;
if (!mapping && page_count(page) == 1) {
unlock_page(page);
if (put_page_testzero(page))
goto free_it;
else {
/*
* rare race with speculative reference.
* the speculative reference will free
* this page shortly, so we may
* increment nr_reclaimed here (and
* leave it off the LRU).
*/
nr_reclaimed++;
continue;
}
}
if (!mapping || !__remove_mapping(mapping, page))
goto keep_locked;
/*
* At this point, we have no other references and there is
* no way to pick any more up (removed from LRU, removed
* from pagecache). Can use non-atomic bitops now (and
* we obviously don't have to worry about waking up a process
* waiting on the page lock, because there are no references.
*/
__clear_page_locked(page);
/*
* Is there need to periodically free_page_list? It would
* appear not as the counts should be low
*/
list_add(&page->lru, &free_pages);
if (PageSwapCache(page))
try_to_free_swap(page);
unlock_page(page);
putback_lru_page(page);
continue;
/* Not a candidate for swapping, so reclaim swap space. */
if (PageSwapCache(page) && vm_swap_full())
try_to_free_swap(page);
SetPageActive(page);
pgactivate++;
keep_locked:
unlock_page(page);
keep:
list_add(&page->lru, &ret_pages);
VM_BUG_ON(PageLRU(page) || PageUnevictable(page));

Mel Gorman
committed
/*
* Tag a zone as congested if all the dirty pages encountered were
* backed by a congested BDI. In this case, reclaimers should just
* back off and wait for congestion to clear because further reclaim
* will encounter the same problem
*/
if (nr_dirty && nr_dirty == nr_congested && global_reclaim(sc))
zone_set_flag(zone, ZONE_CONGESTED);

Mel Gorman
committed
free_hot_cold_page_list(&free_pages, 1);
count_vm_events(PGACTIVATE, pgactivate);
mem_cgroup_uncharge_end();

Mel Gorman
committed
*ret_nr_unqueued_dirty += nr_unqueued_dirty;

Mel Gorman
committed
*ret_nr_writeback += nr_writeback;

Minchan Kim
committed
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
unsigned long reclaim_clean_pages_from_list(struct zone *zone,
struct list_head *page_list)
{
struct scan_control sc = {
.gfp_mask = GFP_KERNEL,
.priority = DEF_PRIORITY,
.may_unmap = 1,
};
unsigned long ret, dummy1, dummy2;
struct page *page, *next;
LIST_HEAD(clean_pages);
list_for_each_entry_safe(page, next, page_list, lru) {
if (page_is_file_cache(page) && !PageDirty(page)) {
ClearPageActive(page);
list_move(&page->lru, &clean_pages);
}
}
ret = shrink_page_list(&clean_pages, zone, &sc,
TTU_UNMAP|TTU_IGNORE_ACCESS,
&dummy1, &dummy2, true);
list_splice(&clean_pages, page_list);
__mod_zone_page_state(zone, NR_ISOLATED_FILE, -ret);
return ret;
}
/*
* Attempt to remove the specified page from its LRU. Only take this page