Skip to content
Snippets Groups Projects
vmscan.c 51.7 KiB
Newer Older
  • Learn to ignore specific revisions
  • Linus Torvalds's avatar
    Linus Torvalds committed
    /*
     *  linux/mm/vmscan.c
     *
     *  Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
     *
     *  Swap reorganised 29.12.95, Stephen Tweedie.
     *  kswapd added: 7.1.96  sct
     *  Removed kswapd_ctl limits, and swap out as many pages as needed
     *  to bring the system back to freepages.high: 2.4.97, Rik van Riel.
     *  Zone aware kswapd started 02/00, Kanoj Sarcar (kanoj@sgi.com).
     *  Multiqueue VM started 5.8.00, Rik van Riel.
     */
    
    #include <linux/mm.h>
    #include <linux/module.h>
    #include <linux/slab.h>
    #include <linux/kernel_stat.h>
    #include <linux/swap.h>
    #include <linux/pagemap.h>
    #include <linux/init.h>
    #include <linux/highmem.h>
    #include <linux/file.h>
    #include <linux/writeback.h>
    #include <linux/blkdev.h>
    #include <linux/buffer_head.h>	/* for try_to_release_page(),
    					buffer_heads_over_limit */
    #include <linux/mm_inline.h>
    #include <linux/pagevec.h>
    #include <linux/backing-dev.h>
    #include <linux/rmap.h>
    #include <linux/topology.h>
    #include <linux/cpu.h>
    #include <linux/cpuset.h>
    #include <linux/notifier.h>
    #include <linux/rwsem.h>
    
    #include <asm/tlbflush.h>
    #include <asm/div64.h>
    
    #include <linux/swapops.h>
    
    /* possible outcome of pageout() */
    typedef enum {
    	/* failed to write page out, page is locked */
    	PAGE_KEEP,
    	/* move page to the active list, page is locked */
    	PAGE_ACTIVATE,
    	/* page has been sent to the disk successfully, page is unlocked */
    	PAGE_SUCCESS,
    	/* page is clean and locked */
    	PAGE_CLEAN,
    } pageout_t;
    
    struct scan_control {
    	/* Incremented by the number of inactive pages that were scanned */
    	unsigned long nr_scanned;
    
    	unsigned long nr_mapped;	/* From page_state */
    
    	/* This context's GFP mask */
    
    	gfp_t gfp_mask;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    	int may_writepage;
    
    
    	/* Can pages be swapped as part of reclaim? */
    	int may_swap;
    
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	/* This context's SWAP_CLUSTER_MAX. If freeing memory for
    	 * suspend, we effectively ignore SWAP_CLUSTER_MAX.
    	 * In this context, it doesn't matter that we scan the
    	 * whole list at once. */
    	int swap_cluster_max;
    };
    
    /*
     * The list of shrinker callbacks used by to apply pressure to
     * ageable caches.
     */
    struct shrinker {
    	shrinker_t		shrinker;
    	struct list_head	list;
    	int			seeks;	/* seeks to recreate an obj */
    	long			nr;	/* objs pending delete */
    };
    
    #define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru))
    
    #ifdef ARCH_HAS_PREFETCH
    #define prefetch_prev_lru_page(_page, _base, _field)			\
    	do {								\
    		if ((_page)->lru.prev != _base) {			\
    			struct page *prev;				\
    									\
    			prev = lru_to_page(&(_page->lru));		\
    			prefetch(&prev->_field);			\
    		}							\
    	} while (0)
    #else
    #define prefetch_prev_lru_page(_page, _base, _field) do { } while (0)
    #endif
    
    #ifdef ARCH_HAS_PREFETCHW
    #define prefetchw_prev_lru_page(_page, _base, _field)			\
    	do {								\
    		if ((_page)->lru.prev != _base) {			\
    			struct page *prev;				\
    									\
    			prev = lru_to_page(&(_page->lru));		\
    			prefetchw(&prev->_field);			\
    		}							\
    	} while (0)
    #else
    #define prefetchw_prev_lru_page(_page, _base, _field) do { } while (0)
    #endif
    
    /*
     * From 0 .. 100.  Higher means more swappy.
     */
    int vm_swappiness = 60;
    static long total_memory;
    
    static LIST_HEAD(shrinker_list);
    static DECLARE_RWSEM(shrinker_rwsem);
    
    /*
     * Add a shrinker callback to be called from the vm
     */
    struct shrinker *set_shrinker(int seeks, shrinker_t theshrinker)
    {
            struct shrinker *shrinker;
    
            shrinker = kmalloc(sizeof(*shrinker), GFP_KERNEL);
            if (shrinker) {
    	        shrinker->shrinker = theshrinker;
    	        shrinker->seeks = seeks;
    	        shrinker->nr = 0;
    	        down_write(&shrinker_rwsem);
    	        list_add_tail(&shrinker->list, &shrinker_list);
    	        up_write(&shrinker_rwsem);
    	}
    	return shrinker;
    }
    EXPORT_SYMBOL(set_shrinker);
    
    /*
     * Remove one
     */
    void remove_shrinker(struct shrinker *shrinker)
    {
    	down_write(&shrinker_rwsem);
    	list_del(&shrinker->list);
    	up_write(&shrinker_rwsem);
    	kfree(shrinker);
    }
    EXPORT_SYMBOL(remove_shrinker);
    
    #define SHRINK_BATCH 128
    /*
     * Call the shrink functions to age shrinkable caches
     *
     * Here we assume it costs one seek to replace a lru page and that it also
     * takes a seek to recreate a cache object.  With this in mind we age equal
     * percentages of the lru and ageable caches.  This should balance the seeks
     * generated by these structures.
     *
     * If the vm encounted mapped pages on the LRU it increase the pressure on
     * slab to avoid swapping.
     *
     * We do weird things to avoid (scanned*seeks*entries) overflowing 32 bits.
     *
     * `lru_pages' represents the number of on-LRU pages in all the zones which
     * are eligible for the caller's allocation attempt.  It is used for balancing
     * slab reclaim versus page reclaim.
    
     *
     * Returns the number of slab objects which we shrunk.
    
    Linus Torvalds's avatar
    Linus Torvalds committed
     */
    
    unsigned long shrink_slab(unsigned long scanned, gfp_t gfp_mask,
    			unsigned long lru_pages)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    {
    	struct shrinker *shrinker;
    
    	unsigned long ret = 0;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    	if (scanned == 0)
    		scanned = SWAP_CLUSTER_MAX;
    
    	if (!down_read_trylock(&shrinker_rwsem))
    
    		return 1;	/* Assume we'll be able to shrink next time */
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    	list_for_each_entry(shrinker, &shrinker_list, list) {
    		unsigned long long delta;
    		unsigned long total_scan;
    
    		unsigned long max_pass = (*shrinker->shrinker)(0, gfp_mask);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    		delta = (4 * scanned) / shrinker->seeks;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		do_div(delta, lru_pages + 1);
    		shrinker->nr += delta;
    
    		if (shrinker->nr < 0) {
    			printk(KERN_ERR "%s: nr=%ld\n",
    					__FUNCTION__, shrinker->nr);
    			shrinker->nr = max_pass;
    		}
    
    		/*
    		 * Avoid risking looping forever due to too large nr value:
    		 * never try to free more than twice the estimate number of
    		 * freeable entries.
    		 */
    		if (shrinker->nr > max_pass * 2)
    			shrinker->nr = max_pass * 2;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    		total_scan = shrinker->nr;
    		shrinker->nr = 0;
    
    		while (total_scan >= SHRINK_BATCH) {
    			long this_scan = SHRINK_BATCH;
    			int shrink_ret;
    
    			int nr_before;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    			nr_before = (*shrinker->shrinker)(0, gfp_mask);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    			shrink_ret = (*shrinker->shrinker)(this_scan, gfp_mask);
    			if (shrink_ret == -1)
    				break;
    
    			if (shrink_ret < nr_before)
    				ret += nr_before - shrink_ret;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    			mod_page_state(slabs_scanned, this_scan);
    			total_scan -= this_scan;
    
    			cond_resched();
    		}
    
    		shrinker->nr += total_scan;
    	}
    	up_read(&shrinker_rwsem);
    
    	return ret;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    }
    
    /* Called without lock on whether page is mapped, so answer is unstable */
    static inline int page_mapping_inuse(struct page *page)
    {
    	struct address_space *mapping;
    
    	/* Page is in somebody's page tables. */
    	if (page_mapped(page))
    		return 1;
    
    	/* Be more reluctant to reclaim swapcache than pagecache */
    	if (PageSwapCache(page))
    		return 1;
    
    	mapping = page_mapping(page);
    	if (!mapping)
    		return 0;
    
    	/* File is mmap'd by somebody? */
    	return mapping_mapped(mapping);
    }
    
    static inline int is_page_cache_freeable(struct page *page)
    {
    	return page_count(page) - !!PagePrivate(page) == 2;
    }
    
    static int may_write_to_queue(struct backing_dev_info *bdi)
    {
    
    	if (current->flags & PF_SWAPWRITE)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		return 1;
    	if (!bdi_write_congested(bdi))
    		return 1;
    	if (bdi == current->backing_dev_info)
    		return 1;
    	return 0;
    }
    
    /*
     * We detected a synchronous write error writing a page out.  Probably
     * -ENOSPC.  We need to propagate that into the address_space for a subsequent
     * fsync(), msync() or close().
     *
     * The tricky part is that after writepage we cannot touch the mapping: nothing
     * prevents it from being freed up.  But we have a ref on the page and once
     * that page is locked, the mapping is pinned.
     *
     * We're allowed to run sleeping lock_page() here because we know the caller has
     * __GFP_FS.
     */
    static void handle_write_error(struct address_space *mapping,
    				struct page *page, int error)
    {
    	lock_page(page);
    	if (page_mapping(page) == mapping) {
    		if (error == -ENOSPC)
    			set_bit(AS_ENOSPC, &mapping->flags);
    		else
    			set_bit(AS_EIO, &mapping->flags);
    	}
    	unlock_page(page);
    }
    
    /*
    
     * pageout is called by shrink_page_list() for each dirty page.
     * Calls ->writepage().
    
    Linus Torvalds's avatar
    Linus Torvalds committed
     */
    static pageout_t pageout(struct page *page, struct address_space *mapping)
    {
    	/*
    	 * If the page is dirty, only perform writeback if that write
    	 * will be non-blocking.  To prevent this allocation from being
    	 * stalled by pagecache activity.  But note that there may be
    	 * stalls if we need to run get_block().  We could test
    	 * PagePrivate for that.
    	 *
    	 * If this process is currently in generic_file_write() against
    	 * this page's queue, we can perform writeback even if that
    	 * will block.
    	 *
    	 * If the page is swapcache, write it back even if that would
    	 * block, for some throttling. This happens by accident, because
    	 * swap_backing_dev_info is bust: it doesn't reflect the
    	 * congestion state of the swapdevs.  Easy to fix, if needed.
    	 * See swapfile.c:page_queue_congested().
    	 */
    	if (!is_page_cache_freeable(page))
    		return PAGE_KEEP;
    	if (!mapping) {
    		/*
    		 * Some data journaling orphaned pages can have
    		 * page->mapping == NULL while being dirty with clean buffers.
    		 */
    
    		if (PagePrivate(page)) {
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    			if (try_to_free_buffers(page)) {
    				ClearPageDirty(page);
    				printk("%s: orphaned page\n", __FUNCTION__);
    				return PAGE_CLEAN;
    			}
    		}
    		return PAGE_KEEP;
    	}
    	if (mapping->a_ops->writepage == NULL)
    		return PAGE_ACTIVATE;
    	if (!may_write_to_queue(mapping->backing_dev_info))
    		return PAGE_KEEP;
    
    	if (clear_page_dirty_for_io(page)) {
    		int res;
    		struct writeback_control wbc = {
    			.sync_mode = WB_SYNC_NONE,
    			.nr_to_write = SWAP_CLUSTER_MAX,
    			.nonblocking = 1,
    			.for_reclaim = 1,
    		};
    
    		SetPageReclaim(page);
    		res = mapping->a_ops->writepage(page, &wbc);
    		if (res < 0)
    			handle_write_error(mapping, page, res);
    
    		if (res == AOP_WRITEPAGE_ACTIVATE) {
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    			ClearPageReclaim(page);
    			return PAGE_ACTIVATE;
    		}
    		if (!PageWriteback(page)) {
    			/* synchronous write or broken a_ops? */
    			ClearPageReclaim(page);
    		}
    
    		return PAGE_SUCCESS;
    	}
    
    	return PAGE_CLEAN;
    }
    
    
    static int remove_mapping(struct address_space *mapping, struct page *page)
    {
    	if (!mapping)
    		return 0;		/* truncate got there first */
    
    	write_lock_irq(&mapping->tree_lock);
    
    	/*
    	 * The non-racy check for busy page.  It is critical to check
    	 * PageDirty _after_ making sure that the page is freeable and
    	 * not in use by anybody. 	(pagecache + us == 2)
    	 */
    	if (unlikely(page_count(page) != 2))
    		goto cannot_free;
    	smp_rmb();
    	if (unlikely(PageDirty(page)))
    		goto cannot_free;
    
    	if (PageSwapCache(page)) {
    		swp_entry_t swap = { .val = page_private(page) };
    		__delete_from_swap_cache(page);
    		write_unlock_irq(&mapping->tree_lock);
    		swap_free(swap);
    		__put_page(page);	/* The pagecache ref */
    		return 1;
    	}
    
    	__remove_from_page_cache(page);
    	write_unlock_irq(&mapping->tree_lock);
    	__put_page(page);
    	return 1;
    
    cannot_free:
    	write_unlock_irq(&mapping->tree_lock);
    	return 0;
    }
    
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    /*
    
     * shrink_page_list() returns the number of reclaimed pages
    
    Linus Torvalds's avatar
    Linus Torvalds committed
     */
    
    static unsigned long shrink_page_list(struct list_head *page_list,
    					struct scan_control *sc)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    {
    	LIST_HEAD(ret_pages);
    	struct pagevec freed_pvec;
    	int pgactivate = 0;
    
    	unsigned long nr_reclaimed = 0;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    	cond_resched();
    
    	pagevec_init(&freed_pvec, 1);
    	while (!list_empty(page_list)) {
    		struct address_space *mapping;
    		struct page *page;
    		int may_enter_fs;
    		int referenced;
    
    		cond_resched();
    
    		page = lru_to_page(page_list);
    		list_del(&page->lru);
    
    		if (TestSetPageLocked(page))
    			goto keep;
    
    		BUG_ON(PageActive(page));
    
    		sc->nr_scanned++;
    
    
    		if (!sc->may_swap && page_mapped(page))
    			goto keep_locked;
    
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		/* Double the slab pressure for mapped and swapcache pages */
    		if (page_mapped(page) || PageSwapCache(page))
    			sc->nr_scanned++;
    
    		if (PageWriteback(page))
    			goto keep_locked;
    
    
    		referenced = page_referenced(page, 1);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		/* In active use or really unfreeable?  Activate it. */
    		if (referenced && page_mapping_inuse(page))
    			goto activate_locked;
    
    #ifdef CONFIG_SWAP
    		/*
    		 * Anonymous process memory has backing store?
    		 * Try to allocate it some swap space here.
    		 */
    
    		if (PageAnon(page) && !PageSwapCache(page)) {
    
    			if (!sc->may_swap)
    				goto keep_locked;
    
    			if (!add_to_swap(page, GFP_ATOMIC))
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    				goto activate_locked;
    		}
    #endif /* CONFIG_SWAP */
    
    		mapping = page_mapping(page);
    		may_enter_fs = (sc->gfp_mask & __GFP_FS) ||
    			(PageSwapCache(page) && (sc->gfp_mask & __GFP_IO));
    
    		/*
    		 * The page is mapped into the page tables of one or more
    		 * processes. Try to unmap it here.
    		 */
    		if (page_mapped(page) && mapping) {
    
    			/*
    			 * No unmapping if we do not swap
    			 */
    			if (!sc->may_swap)
    				goto keep_locked;
    
    
    			switch (try_to_unmap(page, 0)) {
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    			case SWAP_FAIL:
    				goto activate_locked;
    			case SWAP_AGAIN:
    				goto keep_locked;
    			case SWAP_SUCCESS:
    				; /* try to free the page below */
    			}
    		}
    
    		if (PageDirty(page)) {
    			if (referenced)
    				goto keep_locked;
    			if (!may_enter_fs)
    				goto keep_locked;
    
    			if (!sc->may_writepage)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    				goto keep_locked;
    
    			/* Page is dirty, try to write it out here */
    			switch(pageout(page, mapping)) {
    			case PAGE_KEEP:
    				goto keep_locked;
    			case PAGE_ACTIVATE:
    				goto activate_locked;
    			case PAGE_SUCCESS:
    				if (PageWriteback(page) || PageDirty(page))
    					goto keep;
    				/*
    				 * A synchronous write - probably a ramdisk.  Go
    				 * ahead and try to reclaim the page.
    				 */
    				if (TestSetPageLocked(page))
    					goto keep;
    				if (PageDirty(page) || PageWriteback(page))
    					goto keep_locked;
    				mapping = page_mapping(page);
    			case PAGE_CLEAN:
    				; /* try to free the page below */
    			}
    		}
    
    		/*
    		 * If the page has buffers, try to free the buffer mappings
    		 * associated with this page. If we succeed we try to free
    		 * the page as well.
    		 *
    		 * We do this even if the page is PageDirty().
    		 * try_to_release_page() does not perform I/O, but it is
    		 * possible for a page to have PageDirty set, but it is actually
    		 * clean (all its buffers are clean).  This happens if the
    		 * buffers were written out directly, with submit_bh(). ext3
    		 * will do this, as well as the blockdev mapping. 
    		 * try_to_release_page() will discover that cleanness and will
    		 * drop the buffers and mark the page clean - it can be freed.
    		 *
    		 * Rarely, pages can have buffers and no ->mapping.  These are
    		 * the pages which were not successfully invalidated in
    		 * truncate_complete_page().  We try to drop those buffers here
    		 * and if that worked, and the page is no longer mapped into
    		 * process address space (page_count == 1) it can be freed.
    		 * Otherwise, leave the page on the LRU so it is swappable.
    		 */
    		if (PagePrivate(page)) {
    			if (!try_to_release_page(page, sc->gfp_mask))
    				goto activate_locked;
    			if (!mapping && page_count(page) == 1)
    				goto free_it;
    		}
    
    
    		if (!remove_mapping(mapping, page))
    			goto keep_locked;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    free_it:
    		unlock_page(page);
    
    		nr_reclaimed++;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		if (!pagevec_add(&freed_pvec, page))
    			__pagevec_release_nonlru(&freed_pvec);
    		continue;
    
    activate_locked:
    		SetPageActive(page);
    		pgactivate++;
    keep_locked:
    		unlock_page(page);
    keep:
    		list_add(&page->lru, &ret_pages);
    		BUG_ON(PageLRU(page));
    	}
    	list_splice(&ret_pages, page_list);
    	if (pagevec_count(&freed_pvec))
    		__pagevec_release_nonlru(&freed_pvec);
    	mod_page_state(pgactivate, pgactivate);
    
    	return nr_reclaimed;
    
    static inline void move_to_lru(struct page *page)
    {
    	list_del(&page->lru);
    	if (PageActive(page)) {
    		/*
    		 * lru_cache_add_active checks that
    		 * the PG_active bit is off.
    		 */
    		ClearPageActive(page);
    		lru_cache_add_active(page);
    	} else {
    		lru_cache_add(page);
    	}
    	put_page(page);
    }
    
    /*
    
     * Add isolated pages on the list back to the LRU.
    
     *
     * returns the number of pages put back.
     */
    
    unsigned long putback_lru_pages(struct list_head *l)
    
    {
    	struct page *page;
    	struct page *page2;
    
    	unsigned long count = 0;
    
    
    	list_for_each_entry_safe(page, page2, l, lru) {
    		move_to_lru(page);
    		count++;
    	}
    	return count;
    }
    
    
    /*
     * Non migratable page
     */
    int fail_migrate_page(struct page *newpage, struct page *page)
    {
    	return -EIO;
    }
    EXPORT_SYMBOL(fail_migrate_page);
    
    
    /*
     * swapout a single page
     * page is locked upon entry, unlocked on exit
     */
    static int swap_page(struct page *page)
    {
    	struct address_space *mapping = page_mapping(page);
    
    	if (page_mapped(page) && mapping)
    
    		if (try_to_unmap(page, 1) != SWAP_SUCCESS)
    
    			goto unlock_retry;
    
    	if (PageDirty(page)) {
    		/* Page is dirty, try to write it out here */
    		switch(pageout(page, mapping)) {
    		case PAGE_KEEP:
    		case PAGE_ACTIVATE:
    			goto unlock_retry;
    
    		case PAGE_SUCCESS:
    			goto retry;
    
    		case PAGE_CLEAN:
    			; /* try to free the page below */
    		}
    	}
    
    	if (PagePrivate(page)) {
    		if (!try_to_release_page(page, GFP_KERNEL) ||
    		    (!mapping && page_count(page) == 1))
    			goto unlock_retry;
    	}
    
    	if (remove_mapping(mapping, page)) {
    		/* Success */
    		unlock_page(page);
    		return 0;
    	}
    
    unlock_retry:
    	unlock_page(page);
    
    retry:
    
    
    /*
     * Page migration was first developed in the context of the memory hotplug
     * project. The main authors of the migration code are:
     *
     * IWAMOTO Toshihiro <iwamoto@valinux.co.jp>
     * Hirokazu Takahashi <taka@valinux.co.jp>
     * Dave Hansen <haveblue@us.ibm.com>
     * Christoph Lameter <clameter@sgi.com>
     */
    
    /*
     * Remove references for a page and establish the new page with the correct
     * basic settings to be able to stop accesses to the page.
     */
    
    int migrate_page_remove_references(struct page *newpage,
    
    				struct page *page, int nr_refs)
    {
    	struct address_space *mapping = page_mapping(page);
    	struct page **radix_pointer;
    
    	/*
    	 * Avoid doing any of the following work if the page count
    	 * indicates that the page is in use or truncate has removed
    	 * the page.
    	 */
    	if (!mapping || page_mapcount(page) + nr_refs != page_count(page))
    
    
    	/*
    	 * Establish swap ptes for anonymous pages or destroy pte
    	 * maps for files.
    	 *
    	 * In order to reestablish file backed mappings the fault handlers
    	 * will take the radix tree_lock which may then be used to stop
      	 * processses from accessing this page until the new page is ready.
    	 *
    	 * A process accessing via a swap pte (an anonymous page) will take a
    	 * page_lock on the old page which will block the process until the
    	 * migration attempt is complete. At that time the PageSwapCache bit
    	 * will be examined. If the page was migrated then the PageSwapCache
    	 * bit will be clear and the operation to retrieve the page will be
    	 * retried which will find the new page in the radix tree. Then a new
    	 * direct mapping may be generated based on the radix tree contents.
    	 *
    	 * If the page was not migrated then the PageSwapCache bit
    	 * is still set and the operation may continue.
    	 */
    
    	if (try_to_unmap(page, 1) == SWAP_FAIL)
    		/* A vma has VM_LOCKED set -> Permanent failure */
    		return -EPERM;
    
    
    	/*
    	 * Give up if we were unable to remove all mappings.
    	 */
    	if (page_mapcount(page))
    
    
    	write_lock_irq(&mapping->tree_lock);
    
    	radix_pointer = (struct page **)radix_tree_lookup_slot(
    						&mapping->page_tree,
    						page_index(page));
    
    	if (!page_mapping(page) || page_count(page) != nr_refs ||
    			*radix_pointer != page) {
    		write_unlock_irq(&mapping->tree_lock);
    
    	}
    
    	/*
    	 * Now we know that no one else is looking at the page.
    	 *
    	 * Certain minimal information about a page must be available
    	 * in order for other subsystems to properly handle the page if they
    	 * find it through the radix tree update before we are finished
    	 * copying the page.
    	 */
    	get_page(newpage);
    	newpage->index = page->index;
    	newpage->mapping = page->mapping;
    	if (PageSwapCache(page)) {
    		SetPageSwapCache(newpage);
    		set_page_private(newpage, page_private(page));
    	}
    
    	*radix_pointer = newpage;
    	__put_page(page);
    	write_unlock_irq(&mapping->tree_lock);
    
    	return 0;
    }
    
    EXPORT_SYMBOL(migrate_page_remove_references);
    
    
    /*
     * Copy the page to its new location
     */
    void migrate_page_copy(struct page *newpage, struct page *page)
    {
    	copy_highpage(newpage, page);
    
    	if (PageError(page))
    		SetPageError(newpage);
    	if (PageReferenced(page))
    		SetPageReferenced(newpage);
    	if (PageUptodate(page))
    		SetPageUptodate(newpage);
    	if (PageActive(page))
    		SetPageActive(newpage);
    	if (PageChecked(page))
    		SetPageChecked(newpage);
    	if (PageMappedToDisk(page))
    		SetPageMappedToDisk(newpage);
    
    	if (PageDirty(page)) {
    		clear_page_dirty_for_io(page);
    		set_page_dirty(newpage);
     	}
    
    	ClearPageSwapCache(page);
    	ClearPageActive(page);
    	ClearPagePrivate(page);
    	set_page_private(page, 0);
    	page->mapping = NULL;
    
    	/*
    	 * If any waiters have accumulated on the new page then
    	 * wake them up.
    	 */
    	if (PageWriteback(newpage))
    		end_page_writeback(newpage);
    }
    
    
    /*
     * Common logic to directly migrate a single page suitable for
     * pages that do not use PagePrivate.
     *
     * Pages are locked upon entry and exit.
     */
    int migrate_page(struct page *newpage, struct page *page)
    {
    
    	BUG_ON(PageWriteback(page));	/* Writeback must be complete */
    
    
    	rc = migrate_page_remove_references(newpage, page, 2);
    
    	if (rc)
    		return rc;
    
    	/*
    	 * Remove auxiliary swap entries and replace
    	 * them with real ptes.
    	 *
    	 * Note that a real pte entry will allow processes that are not
    	 * waiting on the page lock to use the new page via the page tables
    	 * before the new page is unlocked.
    	 */
    	remove_from_swap(newpage);
    
    /*
     * migrate_pages
     *
     * Two lists are passed to this function. The first list
     * contains the pages isolated from the LRU to be migrated.
     * The second list contains new pages that the pages isolated
     * can be moved to. If the second list is NULL then all
     * pages are swapped out.
     *
     * The function returns after 10 attempts or if no pages
    
     * are movable anymore because to has become empty
    
     * or no retryable pages exist anymore.
     *
    
     * Return: Number of pages not migrated when "to" ran empty.
    
    unsigned long migrate_pages(struct list_head *from, struct list_head *to,
    
    		  struct list_head *moved, struct list_head *failed)
    
    	unsigned long retry;
    	unsigned long nr_failed = 0;
    
    	int pass = 0;
    	struct page *page;
    	struct page *page2;
    	int swapwrite = current->flags & PF_SWAPWRITE;
    
    
    	if (!swapwrite)
    		current->flags |= PF_SWAPWRITE;
    
    redo:
    	retry = 0;
    
    
    	list_for_each_entry_safe(page, page2, from, lru) {
    
    		struct page *newpage = NULL;
    		struct address_space *mapping;
    
    
    			/* page was freed from under us. So we are done. */
    
    		/*
    		 * Skip locked pages during the first two passes to give the
    
    		 * functions holding the lock time to release the page. Later we
    		 * use lock_page() to have a higher chance of acquiring the
    		 * lock.
    
    		if (pass > 2)
    			lock_page(page);
    		else
    			if (TestSetPageLocked(page))
    
    
    		/*
    		 * Only wait on writeback if we have already done a pass where
    		 * we we may have triggered writeouts for lots of pages.
    		 */
    
    			wait_on_page_writeback(page);
    
    			if (PageWriteback(page))
    				goto unlock_page;
    
    		/*
    		 * Anonymous pages must have swap cache references otherwise
    		 * the information contained in the page maps cannot be
    		 * preserved.
    		 */
    
    		if (PageAnon(page) && !PageSwapCache(page)) {
    
    			if (!add_to_swap(page, GFP_KERNEL)) {
    
    		if (!to) {
    			rc = swap_page(page);
    			goto next;
    		}
    
    		newpage = lru_to_page(to);
    		lock_page(newpage);
    
    
    		 * Pages are properly locked and writeback is complete.
    
    		 * Try to migrate the page.
    		 */
    
    		mapping = page_mapping(page);
    		if (!mapping)
    			goto unlock_both;
    
    
    			/*
    			 * Most pages have a mapping and most filesystems
    			 * should provide a migration function. Anonymous
    			 * pages are part of swap space which also has its
    			 * own migration function. This is the most common
    			 * path for page migration.
    			 */
    
    			rc = mapping->a_ops->migratepage(newpage, page);
    			goto unlock_both;
                    }
    
    
    		 * Default handling if a filesystem does not provide
    		 * a migration function. We can only migrate clean
    		 * pages so try to write out any dirty pages first.
    
    		 */
    		if (PageDirty(page)) {
    			switch (pageout(page, mapping)) {
    			case PAGE_KEEP:
    			case PAGE_ACTIVATE:
    				goto unlock_both;
    
    			case PAGE_SUCCESS:
    				unlock_page(newpage);
    				goto next;
    
    			case PAGE_CLEAN:
    				; /* try to migrate the page below */
    			}
                    }
    
    		 * Buffers are managed in a filesystem specific way.
    		 * We must have no buffers or drop them.
    
    		 */
    		if (!page_has_buffers(page) ||
    		    try_to_release_page(page, GFP_KERNEL)) {
    			rc = migrate_page(newpage, page);
    			goto unlock_both;
    		}
    
    		/*
    		 * On early passes with mapped pages simply
    		 * retry. There may be a lock held for some
    		 * buffers that may go away. Later
    		 * swap them out.
    		 */
    		if (pass > 4) {
    
    			/*
    			 * Persistently unable to drop buffers..... As a
    			 * measure of last resort we fall back to
    			 * swap_page().
    			 */
    
    			unlock_page(newpage);
    			newpage = NULL;
    			rc = swap_page(page);
    			goto next;
    		}
    
    unlock_both:
    		unlock_page(newpage);
    
    
    unlock_page:
    		unlock_page(page);
    
    next:
    		if (rc == -EAGAIN) {