Newer
Older

KOSAKI Motohiro
committed
switch (pageout(page, mapping, sc)) {
case PAGE_KEEP:
goto keep_locked;
case PAGE_ACTIVATE:
goto activate_locked;
case PAGE_SUCCESS:

KOSAKI Motohiro
committed
if (PageWriteback(page))

KOSAKI Motohiro
committed
if (PageDirty(page))

KOSAKI Motohiro
committed
/*
* A synchronous write - probably a ramdisk. Go
* ahead and try to reclaim the page.
*/
goto keep;
if (PageDirty(page) || PageWriteback(page))
goto keep_locked;
mapping = page_mapping(page);
case PAGE_CLEAN:
; /* try to free the page below */
}
}
/*
* If the page has buffers, try to free the buffer mappings
* associated with this page. If we succeed we try to free
* the page as well.
*
* We do this even if the page is PageDirty().
* try_to_release_page() does not perform I/O, but it is
* possible for a page to have PageDirty set, but it is actually
* clean (all its buffers are clean). This happens if the
* buffers were written out directly, with submit_bh(). ext3
* will do this, as well as the blockdev mapping.
* try_to_release_page() will discover that cleanness and will
* drop the buffers and mark the page clean - it can be freed.
*
* Rarely, pages can have buffers and no ->mapping. These are
* the pages which were not successfully invalidated in
* truncate_complete_page(). We try to drop those buffers here
* and if that worked, and the page is no longer mapped into
* process address space (page_count == 1) it can be freed.
* Otherwise, leave the page on the LRU so it is swappable.
*/
if (page_has_private(page)) {
if (!try_to_release_page(page, sc->gfp_mask))
goto activate_locked;
if (!mapping && page_count(page) == 1) {
unlock_page(page);
if (put_page_testzero(page))
goto free_it;
else {
/*
* rare race with speculative reference.
* the speculative reference will free
* this page shortly, so we may
* increment nr_reclaimed here (and
* leave it off the LRU).
*/
nr_reclaimed++;
continue;
}
}
if (!mapping || !__remove_mapping(mapping, page))
goto keep_locked;
/*
* At this point, we have no other references and there is
* no way to pick any more up (removed from LRU, removed
* from pagecache). Can use non-atomic bitops now (and
* we obviously don't have to worry about waking up a process
* waiting on the page lock, because there are no references.
*/
__clear_page_locked(page);
/*
* Is there need to periodically free_page_list? It would
* appear not as the counts should be low
*/
list_add(&page->lru, &free_pages);
if (PageSwapCache(page))
try_to_free_swap(page);
unlock_page(page);
putback_lru_page(page);
continue;
/* Not a candidate for swapping, so reclaim swap space. */
if (PageSwapCache(page) && vm_swap_full())
try_to_free_swap(page);
VM_BUG_ON_PAGE(PageActive(page), page);
SetPageActive(page);
pgactivate++;
keep_locked:
unlock_page(page);
keep:
list_add(&page->lru, &ret_pages);
VM_BUG_ON_PAGE(PageLRU(page) || PageUnevictable(page), page);
free_hot_cold_page_list(&free_pages, 1);
count_vm_events(PGACTIVATE, pgactivate);
mem_cgroup_uncharge_end();
*ret_nr_dirty += nr_dirty;
*ret_nr_congested += nr_congested;

Mel Gorman
committed
*ret_nr_unqueued_dirty += nr_unqueued_dirty;

Mel Gorman
committed
*ret_nr_writeback += nr_writeback;
*ret_nr_immediate += nr_immediate;

Minchan Kim
committed
unsigned long reclaim_clean_pages_from_list(struct zone *zone,
struct list_head *page_list)
{
struct scan_control sc = {
.gfp_mask = GFP_KERNEL,
.priority = DEF_PRIORITY,
.may_unmap = 1,
};
unsigned long ret, dummy1, dummy2, dummy3, dummy4, dummy5;

Minchan Kim
committed
struct page *page, *next;
LIST_HEAD(clean_pages);
list_for_each_entry_safe(page, next, page_list, lru) {
if (page_is_file_cache(page) && !PageDirty(page) &&
!isolated_balloon_page(page)) {

Minchan Kim
committed
ClearPageActive(page);
list_move(&page->lru, &clean_pages);
}
}
ret = shrink_page_list(&clean_pages, zone, &sc,
TTU_UNMAP|TTU_IGNORE_ACCESS,
&dummy1, &dummy2, &dummy3, &dummy4, &dummy5, true);

Minchan Kim
committed
list_splice(&clean_pages, page_list);
__mod_zone_page_state(zone, NR_ISOLATED_FILE, -ret);
return ret;
}
/*
* Attempt to remove the specified page from its LRU. Only take this page
* if it is of the appropriate PageActive status. Pages which are being
* freed elsewhere are also ignored.
*
* page: page to consider
* mode: one of the LRU isolation modes defined above
*
* returns 0 on success, -ve errno on failure.
*/
int __isolate_lru_page(struct page *page, isolate_mode_t mode)
{
int ret = -EINVAL;
/* Only take pages on the LRU. */
if (!PageLRU(page))
return ret;
/* Compaction should not handle unevictable pages but CMA can do so */
if (PageUnevictable(page) && !(mode & ISOLATE_UNEVICTABLE))
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
/*
* To minimise LRU disruption, the caller can indicate that it only
* wants to isolate pages it will be able to operate on without
* blocking - clean pages for the most part.
*
* ISOLATE_CLEAN means that only clean pages should be isolated. This
* is used by reclaim when it is cannot write to backing storage
*
* ISOLATE_ASYNC_MIGRATE is used to indicate that it only wants to pages
* that it is possible to migrate without blocking
*/
if (mode & (ISOLATE_CLEAN|ISOLATE_ASYNC_MIGRATE)) {
/* All the caller can do on PageWriteback is block */
if (PageWriteback(page))
return ret;
if (PageDirty(page)) {
struct address_space *mapping;
/* ISOLATE_CLEAN means only clean pages */
if (mode & ISOLATE_CLEAN)
return ret;
/*
* Only pages without mappings or that have a
* ->migratepage callback are possible to migrate
* without blocking
*/
mapping = page_mapping(page);
if (mapping && !mapping->a_ops->migratepage)
return ret;
}
}
if ((mode & ISOLATE_UNMAPPED) && page_mapped(page))
return ret;
if (likely(get_page_unless_zero(page))) {
/*
* Be careful not to clear PageLRU until after we're
* sure the page is not being freed elsewhere -- the
* page release code relies on it.
*/
ClearPageLRU(page);
ret = 0;
}
return ret;
}
/*
* zone->lru_lock is heavily contended. Some of the functions that
* shrink the lists perform better by taking out a batch of pages
* and working on them outside the LRU lock.
*
* For pagecache intensive workloads, this function is the hottest
* spot in the kernel (apart from copy_*_user functions).
*
* Appropriate locks must be held before calling this function.
*
* @nr_to_scan: The number of pages to look through on the list.
* @lruvec: The LRU vector to pull pages from.
* @nr_scanned: The number of pages that were scanned.
* @sc: The scan_control struct for this reclaim session
* @lru: LRU list id for isolating
*
* returns how many pages were moved onto *@dst.
*/
static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
struct lruvec *lruvec, struct list_head *dst,
unsigned long *nr_scanned, struct scan_control *sc,
isolate_mode_t mode, enum lru_list lru)
struct list_head *src = &lruvec->lists[lru];
for (scan = 0; scan < nr_to_scan && !list_empty(src); scan++) {
page = lru_to_page(src);
prefetchw_prev_lru_page(page, src, flags);
VM_BUG_ON_PAGE(!PageLRU(page), page);
switch (__isolate_lru_page(page, mode)) {
nr_pages = hpage_nr_pages(page);
mem_cgroup_update_lru_size(lruvec, lru, -nr_pages);
break;
case -EBUSY:
/* else it is being freed elsewhere */
list_move(&page->lru, src);
continue;
trace_mm_vmscan_lru_isolate(sc->order, nr_to_scan, scan,
nr_taken, mode, is_file_lru(lru));
/**
* isolate_lru_page - tries to isolate a page from its LRU list
* @page: page to isolate from its LRU list
*
* Isolates a @page from an LRU list, clears PageLRU and adjusts the
* vmstat statistic corresponding to whatever LRU list the page was on.
*
* Returns 0 if the page was removed from an LRU list.
* Returns -EBUSY if the page was not on an LRU list.
*
* The returned page will have PageLRU() cleared. If it was found on
* the active list, it will have PageActive set. If it was found on
* the unevictable list, it will have the PageUnevictable bit set. That flag
* may need to be cleared by the caller before letting the page go.
*
* The vmstat statistic corresponding to the list on which the page was
* found will be decremented.
*
* Restrictions:
* (1) Must be called with an elevated refcount on the page. This is a
* fundamentnal difference from isolate_lru_pages (which is called
* without a stable reference).
* (2) the lru_lock must not be held.
* (3) interrupts must be enabled.
*/
int isolate_lru_page(struct page *page)
{
int ret = -EBUSY;
VM_BUG_ON_PAGE(!page_count(page), page);
if (PageLRU(page)) {
struct zone *zone = page_zone(page);
spin_lock_irq(&zone->lru_lock);
lruvec = mem_cgroup_page_lruvec(page, zone);
if (PageLRU(page)) {
get_page(page);
del_page_from_lru_list(page, lruvec, lru);
ret = 0;
}
spin_unlock_irq(&zone->lru_lock);
}
return ret;
}
/*
* A direct reclaimer may isolate SWAP_CLUSTER_MAX pages from the LRU list and
* then get resheduled. When there are massive number of tasks doing page
* allocation, such sleeping direct reclaimers may keep piling up on each CPU,
* the LRU list will go small and be scanned faster than necessary, leading to
* unnecessary swapping, thrashing and OOM.
*/
static int too_many_isolated(struct zone *zone, int file,
struct scan_control *sc)
{
unsigned long inactive, isolated;
if (current_is_kswapd())
return 0;
if (!global_reclaim(sc))
return 0;
if (file) {
inactive = zone_page_state(zone, NR_INACTIVE_FILE);
isolated = zone_page_state(zone, NR_ISOLATED_FILE);
} else {
inactive = zone_page_state(zone, NR_INACTIVE_ANON);
isolated = zone_page_state(zone, NR_ISOLATED_ANON);
}
/*
* GFP_NOIO/GFP_NOFS callers are allowed to isolate more pages, so they
* won't get blocked by normal direct-reclaimers, forming a circular
* deadlock.
*/
if ((sc->gfp_mask & GFP_IOFS) == GFP_IOFS)
inactive >>= 3;
return isolated > inactive;
}
static noinline_for_stack void
putback_inactive_pages(struct lruvec *lruvec, struct list_head *page_list)
struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat;
struct zone *zone = lruvec_zone(lruvec);
/*
* Put back any unfreeable pages.
*/
while (!list_empty(page_list)) {
struct page *page = lru_to_page(page_list);
int lru;
VM_BUG_ON_PAGE(PageLRU(page), page);
list_del(&page->lru);
if (unlikely(!page_evictable(page))) {
spin_unlock_irq(&zone->lru_lock);
putback_lru_page(page);
spin_lock_irq(&zone->lru_lock);
continue;
}
lruvec = mem_cgroup_page_lruvec(page, zone);
SetPageLRU(page);
lru = page_lru(page);
add_page_to_lru_list(page, lruvec, lru);
if (is_active_lru(lru)) {
int file = is_file_lru(lru);
int numpages = hpage_nr_pages(page);
reclaim_stat->recent_rotated[file] += numpages;
if (put_page_testzero(page)) {
__ClearPageLRU(page);
__ClearPageActive(page);
del_page_from_lru_list(page, lruvec, lru);
if (unlikely(PageCompound(page))) {
spin_unlock_irq(&zone->lru_lock);
(*get_compound_page_dtor(page))(page);
spin_lock_irq(&zone->lru_lock);
} else
list_add(&page->lru, &pages_to_free);
}
}
/*
* To save our caller's stack, now use input list for pages to free.
*/
list_splice(&pages_to_free, page_list);
}
* shrink_inactive_list() is a helper for shrink_zone(). It returns the number
* of reclaimed pages
static noinline_for_stack unsigned long
shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
struct scan_control *sc, enum lru_list lru)
unsigned long nr_dirty = 0;
unsigned long nr_congested = 0;

Mel Gorman
committed
unsigned long nr_unqueued_dirty = 0;

Mel Gorman
committed
unsigned long nr_writeback = 0;
unsigned long nr_immediate = 0;
isolate_mode_t isolate_mode = 0;
int file = is_file_lru(lru);
struct zone *zone = lruvec_zone(lruvec);
struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat;
while (unlikely(too_many_isolated(zone, file, sc))) {
congestion_wait(BLK_RW_ASYNC, HZ/10);
/* We are about to die and free our memory. Return now. */
if (fatal_signal_pending(current))
return SWAP_CLUSTER_MAX;
}
if (!sc->may_unmap)
isolate_mode |= ISOLATE_UNMAPPED;
if (!sc->may_writepage)
isolate_mode |= ISOLATE_CLEAN;
nr_taken = isolate_lru_pages(nr_to_scan, lruvec, &page_list,
&nr_scanned, sc, isolate_mode, lru);
__mod_zone_page_state(zone, NR_LRU_BASE + lru, -nr_taken);
__mod_zone_page_state(zone, NR_ISOLATED_ANON + file, nr_taken);
if (global_reclaim(sc)) {
zone->pages_scanned += nr_scanned;
if (current_is_kswapd())
__count_zone_vm_events(PGSCAN_KSWAPD, zone, nr_scanned);
__count_zone_vm_events(PGSCAN_DIRECT, zone, nr_scanned);
spin_unlock_irq(&zone->lru_lock);
return 0;

Minchan Kim
committed
nr_reclaimed = shrink_page_list(&page_list, zone, sc, TTU_UNMAP,
&nr_dirty, &nr_unqueued_dirty, &nr_congested,
&nr_writeback, &nr_immediate,
false);

Andy Whitcroft
committed
spin_lock_irq(&zone->lru_lock);
reclaim_stat->recent_scanned[file] += nr_taken;
if (global_reclaim(sc)) {
if (current_is_kswapd())
__count_zone_vm_events(PGSTEAL_KSWAPD, zone,
nr_reclaimed);
else
__count_zone_vm_events(PGSTEAL_DIRECT, zone,
nr_reclaimed);
}
putback_inactive_pages(lruvec, &page_list);
__mod_zone_page_state(zone, NR_ISOLATED_ANON + file, -nr_taken);
spin_unlock_irq(&zone->lru_lock);
free_hot_cold_page_list(&page_list, 1);

Mel Gorman
committed
/*
* If reclaim is isolating dirty pages under writeback, it implies
* that the long-lived page allocation rate is exceeding the page
* laundering rate. Either the global limits are not being effective
* at throttling processes due to the page distribution throughout
* zones or there is heavy usage of a slow backing device. The
* only option is to throttle from reclaim context which is not ideal
* as there is no guarantee the dirtying process is throttled in the
* same way balance_dirty_pages() manages.
*
* Once a zone is flagged ZONE_WRITEBACK, kswapd will count the number
* of pages under pages flagged for immediate reclaim and stall if any
* are encountered in the nr_immediate check below.

Mel Gorman
committed
*/

Mel Gorman
committed
if (nr_writeback && nr_writeback == nr_taken)
zone_set_flag(zone, ZONE_WRITEBACK);

Mel Gorman
committed

Mel Gorman
committed
/*
* memcg will stall in page writeback so only consider forcibly
* stalling for global reclaim

Mel Gorman
committed
*/
if (global_reclaim(sc)) {
/*
* Tag a zone as congested if all the dirty pages scanned were
* backed by a congested BDI and wait_iff_congested will stall.
*/
if (nr_dirty && nr_dirty == nr_congested)
zone_set_flag(zone, ZONE_CONGESTED);
/*
* If dirty pages are scanned that are not queued for IO, it
* implies that flushers are not keeping up. In this case, flag
* the zone ZONE_TAIL_LRU_DIRTY and kswapd will start writing
* pages from reclaim context. It will forcibly stall in the
* next check.
*/
if (nr_unqueued_dirty == nr_taken)
zone_set_flag(zone, ZONE_TAIL_LRU_DIRTY);
/*
* In addition, if kswapd scans pages marked marked for
* immediate reclaim and under writeback (nr_immediate), it
* implies that pages are cycling through the LRU faster than
* they are written so also forcibly stall.
*/
if (nr_unqueued_dirty == nr_taken || nr_immediate)
congestion_wait(BLK_RW_ASYNC, HZ/10);

Mel Gorman
committed
}

Mel Gorman
committed
/*
* Stall direct reclaim for IO completions if underlying BDIs or zone
* is congested. Allow kswapd to continue until it starts encountering
* unqueued dirty pages or cycling through the LRU too quickly.
*/
if (!sc->hibernation_mode && !current_is_kswapd())
wait_iff_congested(zone, BLK_RW_ASYNC, HZ/10);
trace_mm_vmscan_lru_shrink_inactive(zone->zone_pgdat->node_id,
zone_idx(zone),
nr_scanned, nr_reclaimed,
sc->priority,
}
/*
* This moves pages from the active list to the inactive list.
*
* We move them the other way if the page is referenced by one or more
* processes, from rmap.
*
* If the pages are mostly unmapped, the processing is fast and it is
* appropriate to hold zone->lru_lock across the whole operation. But if
* the pages are mapped, the processing is slow (page_referenced()) so we
* should drop zone->lru_lock around each page. It's impossible to balance
* this, so instead we remove the pages from the LRU while processing them.
* It is safe to rely on PG_active against the non-LRU pages in here because
* nobody will play with that bit on a non-LRU page.
*
* The downside is that we have to touch page->_count against each page.
* But we had to alter page->flags anyway.
*/

KAMEZAWA Hiroyuki
committed
static void move_active_pages_to_lru(struct lruvec *lruvec,
struct list_head *list,
enum lru_list lru)
{
struct zone *zone = lruvec_zone(lruvec);
unsigned long pgmoved = 0;
struct page *page;
while (!list_empty(list)) {
page = lru_to_page(list);
lruvec = mem_cgroup_page_lruvec(page, zone);
VM_BUG_ON_PAGE(PageLRU(page), page);
SetPageLRU(page);
nr_pages = hpage_nr_pages(page);
mem_cgroup_update_lru_size(lruvec, lru, nr_pages);
list_move(&page->lru, &lruvec->lists[lru]);
if (put_page_testzero(page)) {
__ClearPageLRU(page);
__ClearPageActive(page);
del_page_from_lru_list(page, lruvec, lru);
if (unlikely(PageCompound(page))) {
spin_unlock_irq(&zone->lru_lock);
(*get_compound_page_dtor(page))(page);
spin_lock_irq(&zone->lru_lock);
} else
list_add(&page->lru, pages_to_free);
}
}
__mod_zone_page_state(zone, NR_LRU_BASE + lru, pgmoved);
if (!is_active_lru(lru))
__count_vm_events(PGDEACTIVATE, pgmoved);
}

KAMEZAWA Hiroyuki
committed
static void shrink_active_list(unsigned long nr_to_scan,
struct lruvec *lruvec,

Johannes Weiner
committed
struct scan_control *sc,
enum lru_list lru)
unsigned long nr_taken;
LIST_HEAD(l_hold); /* The pages which were snipped off */
LIST_HEAD(l_active);
LIST_HEAD(l_inactive);
struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat;
unsigned long nr_rotated = 0;
isolate_mode_t isolate_mode = 0;
int file = is_file_lru(lru);
struct zone *zone = lruvec_zone(lruvec);
if (!sc->may_unmap)
isolate_mode |= ISOLATE_UNMAPPED;
if (!sc->may_writepage)
isolate_mode |= ISOLATE_CLEAN;
nr_taken = isolate_lru_pages(nr_to_scan, lruvec, &l_hold,
&nr_scanned, sc, isolate_mode, lru);
if (global_reclaim(sc))
reclaim_stat->recent_scanned[file] += nr_taken;

KAMEZAWA Hiroyuki
committed
__count_zone_vm_events(PGREFILL, zone, nr_scanned);
__mod_zone_page_state(zone, NR_LRU_BASE + lru, -nr_taken);
__mod_zone_page_state(zone, NR_ISOLATED_ANON + file, nr_taken);
spin_unlock_irq(&zone->lru_lock);
while (!list_empty(&l_hold)) {
cond_resched();
page = lru_to_page(&l_hold);
list_del(&page->lru);
if (unlikely(!page_evictable(page))) {
putback_lru_page(page);
continue;
}

Mel Gorman
committed
if (unlikely(buffer_heads_over_limit)) {
if (page_has_private(page) && trylock_page(page)) {
if (page_has_private(page))
try_to_release_page(page, 0);
unlock_page(page);
}
}

Johannes Weiner
committed
if (page_referenced(page, 0, sc->target_mem_cgroup,
&vm_flags)) {
nr_rotated += hpage_nr_pages(page);
/*
* Identify referenced, file-backed active pages and
* give them one more trip around the active list. So
* that executable code get better chances to stay in
* memory under moderate memory pressure. Anon pages
* are not likely to be evicted by use-once streaming
* IO, plus JVM can create lots of anon VM_EXEC pages,
* so we ignore them here.
*/
if ((vm_flags & VM_EXEC) && page_is_file_cache(page)) {
list_add(&page->lru, &l_active);
continue;
}
}

KOSAKI Motohiro
committed
ClearPageActive(page); /* we are de-activating */
* Move pages back to the lru list.
spin_lock_irq(&zone->lru_lock);
* Count referenced pages from currently used mappings as rotated,
* even though only some of them are actually re-activated. This
* helps balance scan pressure between file and anonymous pages in
* get_scan_ratio.
reclaim_stat->recent_rotated[file] += nr_rotated;
move_active_pages_to_lru(lruvec, &l_active, &l_hold, lru);
move_active_pages_to_lru(lruvec, &l_inactive, &l_hold, lru - LRU_ACTIVE);
__mod_zone_page_state(zone, NR_ISOLATED_ANON + file, -nr_taken);
spin_unlock_irq(&zone->lru_lock);
free_hot_cold_page_list(&l_hold, 1);
#ifdef CONFIG_SWAP
static int inactive_anon_is_low_global(struct zone *zone)
{
unsigned long active, inactive;
active = zone_page_state(zone, NR_ACTIVE_ANON);
inactive = zone_page_state(zone, NR_INACTIVE_ANON);
if (inactive * zone->inactive_ratio < active)
return 1;
return 0;
}
/**
* inactive_anon_is_low - check if anonymous pages need to be deactivated
* @lruvec: LRU vector to check
*
* Returns true if the zone does not have enough inactive anon pages,
* meaning some active anon pages need to be deactivated.
*/
static int inactive_anon_is_low(struct lruvec *lruvec)
/*
* If we don't have swap space, anonymous page deactivation
* is pointless.
*/
if (!total_swap_pages)
return 0;
if (!mem_cgroup_disabled())
return mem_cgroup_inactive_anon_is_low(lruvec);

Johannes Weiner
committed
return inactive_anon_is_low_global(lruvec_zone(lruvec));
static inline int inactive_anon_is_low(struct lruvec *lruvec)
{
return 0;
}
#endif
/**
* inactive_file_is_low - check if file pages need to be deactivated
* @lruvec: LRU vector to check
*
* When the system is doing streaming IO, memory pressure here
* ensures that active file pages get deactivated, until more
* than half of the file pages are on the inactive list.
*
* Once we get to that situation, protect the system's working
* set from being evicted by disabling active file page aging.
*
* This uses a different ratio than the anonymous pages, because
* the page cache uses a use-once replacement algorithm.
*/
static int inactive_file_is_low(struct lruvec *lruvec)
unsigned long inactive;
unsigned long active;
inactive = get_lru_size(lruvec, LRU_INACTIVE_FILE);
active = get_lru_size(lruvec, LRU_ACTIVE_FILE);
return active > inactive;
static int inactive_list_is_low(struct lruvec *lruvec, enum lru_list lru)
return inactive_file_is_low(lruvec);
else
return inactive_anon_is_low(lruvec);
}
static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan,
struct lruvec *lruvec, struct scan_control *sc)
if (is_active_lru(lru)) {
shrink_active_list(nr_to_scan, lruvec, sc, lru);
return 0;
}
return shrink_inactive_list(nr_to_scan, lruvec, sc, lru);
static int vmscan_swappiness(struct scan_control *sc)
{
if (global_reclaim(sc))
return vm_swappiness;
return mem_cgroup_swappiness(sc->target_mem_cgroup);
}
enum scan_balance {
SCAN_EQUAL,
SCAN_FRACT,
SCAN_ANON,
SCAN_FILE,
};
/*
* Determine how aggressively the anon and file LRU lists should be
* scanned. The relative value of each set of LRU lists is determined
* by looking at the fraction of the pages scanned we did rotate back
* onto the active list instead of evict.
*
* nr[0] = anon inactive pages to scan; nr[1] = anon active pages to scan
* nr[2] = file inactive pages to scan; nr[3] = file active pages to scan
static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
unsigned long *nr)
struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat;
u64 fraction[2];
u64 denominator = 0; /* gcc */
struct zone *zone = lruvec_zone(lruvec);
unsigned long anon_prio, file_prio;
enum scan_balance scan_balance;
unsigned long anon, file, free;
bool force_scan = false;
/*
* If the zone or memcg is small, nr[l] can be 0. This
* results in no scanning on this priority and a potential
* priority drop. Global direct reclaim can go to the next
* zone and tends to have no problems. Global kswapd is for
* zone balancing and it needs to scan a minimum amount. When
* reclaiming for a memcg, a priority drop can cause high
* latencies, so it's better to scan a minimum amount there as
* well.
*/
if (current_is_kswapd() && !zone_reclaimable(zone))
force_scan = true;
if (!global_reclaim(sc))
force_scan = true;
/* If we have no swap space, do not bother scanning anon pages. */
if (!sc->may_swap || (get_nr_swap_pages() <= 0)) {
/*
* Global reclaim will swap to prevent OOM even with no
* swappiness, but memcg users want to use this knob to
* disable swapping for individual groups completely when
* using the memory controller's swap limit feature would be
* too expensive.
*/
if (!global_reclaim(sc) && !vmscan_swappiness(sc)) {
goto out;
}
/*
* Do not apply any pressure balancing cleverness when the
* system is close to OOM, scan both anon and file equally
* (unless the swappiness setting disagrees with swapping).
*/
if (!sc->priority && vmscan_swappiness(sc)) {
goto out;
}
anon = get_lru_size(lruvec, LRU_ACTIVE_ANON) +
get_lru_size(lruvec, LRU_INACTIVE_ANON);
file = get_lru_size(lruvec, LRU_ACTIVE_FILE) +
get_lru_size(lruvec, LRU_INACTIVE_FILE);
/*
* If it's foreseeable that reclaiming the file cache won't be
* enough to get the zone back into a desirable shape, we have
* to swap. Better start now and leave the - probably heavily
* thrashing - remaining file pages alone.
*/
if (global_reclaim(sc)) {
free = zone_page_state(zone, NR_FREE_PAGES);
if (unlikely(file + free <= high_wmark_pages(zone))) {
/*
* There is enough inactive page cache, do not reclaim
* anything from the anonymous working set right now.
*/
if (!inactive_file_is_low(lruvec)) {
goto out;
}
scan_balance = SCAN_FRACT;
/*
* With swappiness at 100, anonymous and file have the same priority.
* This scanning priority is essentially the inverse of IO cost.
*/
anon_prio = vmscan_swappiness(sc);
/*
* OK, so we have swap space and a fair amount of page cache
* pages. We use the recently rotated / recently scanned
* ratios to determine how valuable each cache is.
*
* Because workloads change over time (and to avoid overflow)
* we keep these statistics as a floating average, which ends
* up weighing recent references more than old ones.
*
* anon in [0], file in [1]
*/
spin_lock_irq(&zone->lru_lock);
if (unlikely(reclaim_stat->recent_scanned[0] > anon / 4)) {
reclaim_stat->recent_scanned[0] /= 2;
reclaim_stat->recent_rotated[0] /= 2;
if (unlikely(reclaim_stat->recent_scanned[1] > file / 4)) {
reclaim_stat->recent_scanned[1] /= 2;
reclaim_stat->recent_rotated[1] /= 2;
* The amount of pressure on anon vs file pages is inversely
* proportional to the fraction of recently scanned pages on
* each list that were recently referenced and in active use.
ap = anon_prio * (reclaim_stat->recent_scanned[0] + 1);
ap /= reclaim_stat->recent_rotated[0] + 1;
fp = file_prio * (reclaim_stat->recent_scanned[1] + 1);
fp /= reclaim_stat->recent_rotated[1] + 1;
spin_unlock_irq(&zone->lru_lock);
fraction[0] = ap;
fraction[1] = fp;
denominator = ap + fp + 1;
out:
for_each_evictable_lru(lru) {
int file = is_file_lru(lru);
unsigned long size;
size = get_lru_size(lruvec, lru);
scan = size >> sc->priority;
if (!scan && force_scan)
scan = min(size, SWAP_CLUSTER_MAX);
switch (scan_balance) {
case SCAN_EQUAL:
/* Scan lists relative to size */
break;
case SCAN_FRACT:
/*
* Scan types proportional to swappiness and
* their relative recent reclaim efficiency.
*/
scan = div64_u64(scan * fraction[file], denominator);
break;
case SCAN_FILE:
case SCAN_ANON:
/* Scan one type exclusively */
if ((scan_balance == SCAN_FILE) != file)
scan = 0;