Skip to content
Snippets Groups Projects
vmscan.c 93.3 KiB
Newer Older
  • Learn to ignore specific revisions
  • /*
     * Percentage of pages in a zone that must be unmapped for zone_reclaim to
     * occur.
     */
    int sysctl_min_unmapped_ratio = 1;
    
    
    /*
     * If the number of slab pages in a zone grows beyond this percentage then
     * slab reclaim needs to occur.
     */
    int sysctl_min_slab_ratio = 5;
    
    
    static inline unsigned long zone_unmapped_file_pages(struct zone *zone)
    {
    	unsigned long file_mapped = zone_page_state(zone, NR_FILE_MAPPED);
    	unsigned long file_lru = zone_page_state(zone, NR_INACTIVE_FILE) +
    		zone_page_state(zone, NR_ACTIVE_FILE);
    
    	/*
    	 * It's possible for there to be more file mapped pages than
    	 * accounted for by the pages on the file LRU lists because
    	 * tmpfs pages accounted for as ANON can also be FILE_MAPPED
    	 */
    	return (file_lru > file_mapped) ? (file_lru - file_mapped) : 0;
    }
    
    /* Work out how many page cache pages we can reclaim in this reclaim_mode */
    static long zone_pagecache_reclaimable(struct zone *zone)
    {
    	long nr_pagecache_reclaimable;
    	long delta = 0;
    
    	/*
    	 * If RECLAIM_SWAP is set, then all file pages are considered
    	 * potentially reclaimable. Otherwise, we have to worry about
    	 * pages like swapcache and zone_unmapped_file_pages() provides
    	 * a better estimate
    	 */
    	if (zone_reclaim_mode & RECLAIM_SWAP)
    		nr_pagecache_reclaimable = zone_page_state(zone, NR_FILE_PAGES);
    	else
    		nr_pagecache_reclaimable = zone_unmapped_file_pages(zone);
    
    	/* If we can't clean pages, remove dirty pages from consideration */
    	if (!(zone_reclaim_mode & RECLAIM_WRITE))
    		delta += zone_page_state(zone, NR_FILE_DIRTY);
    
    	/* Watch for any possible underflows due to delta */
    	if (unlikely(delta > nr_pagecache_reclaimable))
    		delta = nr_pagecache_reclaimable;
    
    	return nr_pagecache_reclaimable - delta;
    }
    
    
    /*
     * Try to free up some pages from this zone through reclaim.
     */
    
    static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
    
    	/* Minimum pages needed in order to stay on node */
    
    	const unsigned long nr_pages = 1 << order;
    
    	struct task_struct *p = current;
    	struct reclaim_state reclaim_state;
    
    	struct scan_control sc = {
    		.may_writepage = !!(zone_reclaim_mode & RECLAIM_WRITE),
    
    		.may_unmap = !!(zone_reclaim_mode & RECLAIM_SWAP),
    
    		.may_swap = 1,
    
    		.nr_to_reclaim = max_t(unsigned long, nr_pages,
    				       SWAP_CLUSTER_MAX),
    
    		.gfp_mask = gfp_mask,
    
    		.priority = ZONE_RECLAIM_PRIORITY,
    
    	struct shrink_control shrink = {
    		.gfp_mask = sc.gfp_mask,
    	};
    
    	unsigned long nr_slab_pages0, nr_slab_pages1;
    
    	/*
    	 * We need to be able to allocate from the reserves for RECLAIM_SWAP
    	 * and we also need to be able to write out pages for RECLAIM_WRITE
    	 * and RECLAIM_SWAP.
    	 */
    	p->flags |= PF_MEMALLOC | PF_SWAPWRITE;
    
    	lockdep_set_current_reclaim_state(gfp_mask);
    
    	reclaim_state.reclaimed_slab = 0;
    	p->reclaim_state = &reclaim_state;
    
    	if (zone_pagecache_reclaimable(zone) > zone->min_unmapped_pages) {
    
    		/*
    		 * Free memory by calling shrink zone with increasing
    		 * priorities until we have enough memory freed.
    		 */
    		do {
    
    			shrink_zone(zone, &sc);
    		} while (sc.nr_reclaimed < nr_pages && --sc.priority >= 0);
    
    	nr_slab_pages0 = zone_page_state(zone, NR_SLAB_RECLAIMABLE);
    	if (nr_slab_pages0 > zone->min_slab_pages) {
    
    		 * shrink_slab() does not currently allow us to determine how
    
    		 * many pages were freed in this zone. So we take the current
    		 * number of slab pages and shake the slab until it is reduced
    		 * by the same nr_pages that we used for reclaiming unmapped
    		 * pages.
    
    		 * Note that shrink_slab will free memory on all zones and may
    		 * take a long time.
    
    		for (;;) {
    			unsigned long lru_pages = zone_reclaimable_pages(zone);
    
    			/* No reclaimable slab or very low memory pressure */
    
    			if (!shrink_slab(&shrink, sc.nr_scanned, lru_pages))
    
    				break;
    
    			/* Freed enough memory */
    			nr_slab_pages1 = zone_page_state(zone,
    							NR_SLAB_RECLAIMABLE);
    			if (nr_slab_pages1 + nr_pages <= nr_slab_pages0)
    				break;
    		}
    
    
    		/*
    		 * Update nr_reclaimed by the number of slab pages we
    		 * reclaimed from this zone.
    		 */
    
    		nr_slab_pages1 = zone_page_state(zone, NR_SLAB_RECLAIMABLE);
    		if (nr_slab_pages1 < nr_slab_pages0)
    			sc.nr_reclaimed += nr_slab_pages0 - nr_slab_pages1;
    
    	p->reclaim_state = NULL;
    
    	current->flags &= ~(PF_MEMALLOC | PF_SWAPWRITE);
    
    	lockdep_clear_current_reclaim_state();
    
    	return sc.nr_reclaimed >= nr_pages;
    
    
    int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
    {
    	int node_id;
    
    	 * Zone reclaim reclaims unmapped file backed pages and
    	 * slab pages if we are over the defined limits.
    
    	 * A small portion of unmapped file backed pages is needed for
    	 * file I/O otherwise pages read by file I/O will be immediately
    	 * thrown out if the zone is overallocated. So we do not reclaim
    	 * if less than a specified percentage of the zone is used by
    	 * unmapped file backed pages.
    
    	if (zone_pagecache_reclaimable(zone) <= zone->min_unmapped_pages &&
    	    zone_page_state(zone, NR_SLAB_RECLAIMABLE) <= zone->min_slab_pages)
    
    	if (zone->all_unreclaimable)
    
    	 * Do not scan if the allocation should not be delayed.
    
    	if (!(gfp_mask & __GFP_WAIT) || (current->flags & PF_MEMALLOC))
    
    
    	/*
    	 * Only run zone reclaim on the local zone or on zones that do not
    	 * have associated processors. This will favor the local processor
    	 * over remote processors and spread off node memory allocations
    	 * as wide as possible.
    	 */
    
    	node_id = zone_to_nid(zone);
    
    	if (node_state(node_id, N_CPU) && node_id != numa_node_id())
    
    
    	if (zone_test_and_set_flag(zone, ZONE_RECLAIM_LOCKED))
    
    	ret = __zone_reclaim(zone, gfp_mask, order);
    	zone_clear_flag(zone, ZONE_RECLAIM_LOCKED);
    
    
    	if (!ret)
    		count_vm_event(PGSCAN_ZONE_RECLAIM_FAILED);
    
    
    
    /*
     * page_evictable - test whether a page is evictable
     * @page: the page to test
     * @vma: the VMA in which the page is or will be mapped, may be NULL
     *
     * Test whether page is evictable--i.e., should be placed on active/inactive
    
     * lists vs unevictable list.  The vma argument is !NULL when called from the
     * fault path to determine how to instantate a new page.
    
     *
     * Reasons page might not be evictable:
    
     * (1) page's mapping marked unevictable
    
     * (2) page is part of an mlocked VMA
    
     */
    int page_evictable(struct page *page, struct vm_area_struct *vma)
    {
    
    
    	if (mapping_unevictable(page_mapping(page)))
    		return 0;
    
    
    	if (PageMlocked(page) || (vma && mlocked_vma_newpage(vma, page)))
    
    #ifdef CONFIG_SHMEM
    
     * check_move_unevictable_pages - check pages for evictability and move to appropriate zone lru list
     * @pages:	array of pages to check
     * @nr_pages:	number of pages to check
    
     * Checks pages for evictability and moves them to the appropriate lru list.
    
     *
     * This function is only used for SysV IPC SHM_UNLOCK.
    
    void check_move_unevictable_pages(struct page **pages, int nr_pages)
    
    	struct lruvec *lruvec;
    
    	struct zone *zone = NULL;
    	int pgscanned = 0;
    	int pgrescued = 0;
    	int i;
    
    	for (i = 0; i < nr_pages; i++) {
    		struct page *page = pages[i];
    		struct zone *pagezone;
    
    		pgscanned++;
    		pagezone = page_zone(page);
    		if (pagezone != zone) {
    			if (zone)
    				spin_unlock_irq(&zone->lru_lock);
    			zone = pagezone;
    			spin_lock_irq(&zone->lru_lock);
    		}
    
    		lruvec = mem_cgroup_page_lruvec(page, zone);
    
    		if (!PageLRU(page) || !PageUnevictable(page))
    			continue;
    
    		if (page_evictable(page, NULL)) {
    			enum lru_list lru = page_lru_base_type(page);
    
    			VM_BUG_ON(PageActive(page));
    			ClearPageUnevictable(page);
    
    			del_page_from_lru_list(page, lruvec, LRU_UNEVICTABLE);
    			add_page_to_lru_list(page, lruvec, lru);
    
    	if (zone) {
    		__count_vm_events(UNEVICTABLE_PGRESCUED, pgrescued);
    		__count_vm_events(UNEVICTABLE_PGSCANNED, pgscanned);
    		spin_unlock_irq(&zone->lru_lock);
    
    #endif /* CONFIG_SHMEM */
    
    static void warn_scan_unevictable_pages(void)
    
    		    "%s: The scan_unevictable_pages sysctl/node-interface has been "
    
    		    "disabled for lack of a legitimate use case.  If you have "
    
    		    "one, please send an email to linux-mm@kvack.org.\n",
    		    current->comm);
    
    }
    
    /*
     * scan_unevictable_pages [vm] sysctl handler.  On demand re-scan of
     * all nodes' unevictable lists for evictable pages
     */
    unsigned long scan_unevictable_pages;
    
    int scan_unevictable_handler(struct ctl_table *table, int write,
    
    			   size_t *length, loff_t *ppos)
    {
    
    	proc_doulongvec_minmax(table, write, buffer, length, ppos);
    
    	scan_unevictable_pages = 0;
    	return 0;
    }
    
    
    /*
     * per node 'scan_unevictable_pages' attribute.  On demand re-scan of
     * a specified node's per zone unevictable lists for evictable pages.
     */
    
    
    static ssize_t read_scan_unevictable_node(struct device *dev,
    					  struct device_attribute *attr,
    
    	return sprintf(buf, "0\n");	/* always zero; should fit... */
    }
    
    
    static ssize_t write_scan_unevictable_node(struct device *dev,
    					   struct device_attribute *attr,
    
    					const char *buf, size_t count)
    {
    
    static DEVICE_ATTR(scan_unevictable_pages, S_IRUGO | S_IWUSR,
    
    			read_scan_unevictable_node,
    			write_scan_unevictable_node);
    
    int scan_unevictable_register_node(struct node *node)
    {
    
    	return device_create_file(&node->dev, &dev_attr_scan_unevictable_pages);
    
    }
    
    void scan_unevictable_unregister_node(struct node *node)
    {
    
    	device_remove_file(&node->dev, &dev_attr_scan_unevictable_pages);