Skip to content
Snippets Groups Projects
memory-failure.c 35.1 KiB
Newer Older
  • Learn to ignore specific revisions
  • 		 * shake_page could have turned it free.
    		 */
    		if (is_free_buddy_page(p)) {
    			action_result(pfn, "free buddy, 2nd try", DELAYED);
    			return 0;
    		}
    
    		action_result(pfn, "non LRU", IGNORED);
    		put_page(p);
    		return -EBUSY;
    	}
    
    
    	/*
    	 * Lock the page and wait for writeback to finish.
    	 * It's very difficult to mess with pages currently under IO
    	 * and in many cases impossible, so we just avoid it here.
    	 */
    
    
    	/*
    	 * unpoison always clear PG_hwpoison inside page lock
    	 */
    	if (!PageHWPoison(p)) {
    
    		printk(KERN_ERR "MCE %#lx: just unpoisoned\n", pfn);
    
    		res = 0;
    		goto out;
    	}
    
    	if (hwpoison_filter(p)) {
    		if (TestClearPageHWPoison(p))
    
    			atomic_long_sub(nr_pages, &mce_bad_pages);
    
    		unlock_page(hpage);
    		put_page(hpage);
    
    	/*
    	 * For error on the tail page, we should set PG_hwpoison
    	 * on the head page to show that the hugepage is hwpoisoned
    	 */
    	if (PageTail(p) && TestSetPageHWPoison(hpage)) {
    		action_result(pfn, "hugepage already hardware poisoned",
    				IGNORED);
    		unlock_page(hpage);
    		put_page(hpage);
    		return 0;
    	}
    	/*
    	 * Set PG_hwpoison on all pages in an error hugepage,
    	 * because containment is done in hugepage unit for now.
    	 * Since we have done TestSetPageHWPoison() for the head page with
    	 * page lock held, we can safely set PG_hwpoison bits on tail pages.
    	 */
    	if (PageHuge(p))
    		set_page_hwpoison_huge_page(hpage);
    
    
    	wait_on_page_writeback(p);
    
    	/*
    	 * Now take care of user space mappings.
    
    	 * Abort on fail: __remove_from_page_cache() assumes unmapped page.
    
    	if (hwpoison_user_mappings(p, pfn, trapno) != SWAP_SUCCESS) {
    		printk(KERN_ERR "MCE %#lx: cannot unmap page, give up\n", pfn);
    		res = -EBUSY;
    		goto out;
    	}
    
    	if (PageLRU(p) && !PageSwapCache(p) && p->mapping == NULL) {
    
    		action_result(pfn, "already truncated LRU", IGNORED);
    
    		goto out;
    	}
    
    	res = -EBUSY;
    	for (ps = error_states;; ps++) {
    
    		if ((p->flags & ps->mask) == ps->res) {
    
    	return res;
    }
    EXPORT_SYMBOL_GPL(__memory_failure);
    
    /**
     * memory_failure - Handle memory failure of a page.
     * @pfn: Page Number of the corrupted page
     * @trapno: Trap number reported in the signal to user space.
     *
     * This function is called by the low level machine check code
     * of an architecture when it detects hardware memory corruption
     * of a page. It tries its best to recover, which includes
     * dropping pages, killing processes etc.
     *
     * The function is primarily of use for corruptions that
     * happen outside the current execution context (e.g. when
     * detected by a background scrubber)
     *
     * Must run in process context (e.g. a work queue) with interrupts
     * enabled and no spinlocks hold.
     */
    void memory_failure(unsigned long pfn, int trapno)
    {
    	__memory_failure(pfn, trapno, 0);
    }
    
    
    /**
     * unpoison_memory - Unpoison a previously poisoned page
     * @pfn: Page number of the to be unpoisoned page
     *
     * Software-unpoison a page that has been poisoned by
     * memory_failure() earlier.
     *
     * This is only done on the software-level, so it only works
     * for linux injected failures, not real hardware failures
     *
     * Returns 0 for success, otherwise -errno.
     */
    int unpoison_memory(unsigned long pfn)
    {
    	struct page *page;
    	struct page *p;
    	int freeit = 0;
    
    
    	if (!pfn_valid(pfn))
    		return -ENXIO;
    
    	p = pfn_to_page(pfn);
    	page = compound_head(p);
    
    	if (!PageHWPoison(p)) {
    		pr_debug("MCE: Page was already unpoisoned %#lx\n", pfn);
    		return 0;
    	}
    
    
    	if (!get_page_unless_zero(page)) {
    		if (TestClearPageHWPoison(p))
    
    			atomic_long_sub(nr_pages, &mce_bad_pages);
    
    		pr_debug("MCE: Software-unpoisoned free page %#lx\n", pfn);
    		return 0;
    	}
    
    	lock_page_nosync(page);
    	/*
    	 * This test is racy because PG_hwpoison is set outside of page lock.
    	 * That's acceptable because that won't trigger kernel panic. Instead,
    	 * the PG_hwpoison page will be caught and isolated on the entrance to
    	 * the free buddy page pool.
    	 */
    
    	if (TestClearPageHWPoison(page)) {
    
    		pr_debug("MCE: Software-unpoisoned page %#lx\n", pfn);
    
    		atomic_long_sub(nr_pages, &mce_bad_pages);
    
    	if (PageHuge(p))
    		clear_page_hwpoison_huge_page(page);
    
    	unlock_page(page);
    
    	put_page(page);
    	if (freeit)
    		put_page(page);
    
    	return 0;
    }
    EXPORT_SYMBOL(unpoison_memory);
    
    
    static struct page *new_page(struct page *p, unsigned long private, int **x)
    {
    
    	int nid = page_to_nid(p);
    	return alloc_pages_exact_node(nid, GFP_HIGHUSER_MOVABLE, 0);
    
    }
    
    /*
     * Safely get reference count of an arbitrary page.
     * Returns 0 for a free page, -EIO for a zero refcount page
     * that is not free, and 1 for any other page type.
     * For 1 the page is returned with increased page count, otherwise not.
     */
    static int get_any_page(struct page *p, unsigned long pfn, int flags)
    {
    	int ret;
    
    	if (flags & MF_COUNT_INCREASED)
    		return 1;
    
    	/*
    	 * The lock_system_sleep prevents a race with memory hotplug,
    	 * because the isolation assumes there's only a single user.
    	 * This is a big hammer, a better would be nicer.
    	 */
    	lock_system_sleep();
    
    	/*
    	 * Isolate the page, so that it doesn't get reallocated if it
    	 * was free.
    	 */
    	set_migratetype_isolate(p);
    	if (!get_page_unless_zero(compound_head(p))) {
    		if (is_free_buddy_page(p)) {
    			pr_debug("get_any_page: %#lx free buddy page\n", pfn);
    			/* Set hwpoison bit while page is still isolated */
    			SetPageHWPoison(p);
    			ret = 0;
    		} else {
    			pr_debug("get_any_page: %#lx: unknown zero refcount page type %lx\n",
    				pfn, p->flags);
    			ret = -EIO;
    		}
    	} else {
    		/* Not a free page */
    		ret = 1;
    	}
    	unset_migratetype_isolate(p);
    	unlock_system_sleep();
    	return ret;
    }
    
    /**
     * soft_offline_page - Soft offline a page.
     * @page: page to offline
     * @flags: flags. Same as memory_failure().
     *
     * Returns 0 on success, otherwise negated errno.
     *
     * Soft offline a page, by migration or invalidation,
     * without killing anything. This is for the case when
     * a page is not corrupted yet (so it's still valid to access),
     * but has had a number of corrected errors and is better taken
     * out.
     *
     * The actual policy on when to do that is maintained by
     * user space.
     *
     * This should never impact any application or cause data loss,
     * however it might take some time.
     *
     * This is not a 100% solution for all memory, but tries to be
     * ``good enough'' for the majority of memory.
     */
    int soft_offline_page(struct page *page, int flags)
    {
    	int ret;
    	unsigned long pfn = page_to_pfn(page);
    
    	ret = get_any_page(page, pfn, flags);
    	if (ret < 0)
    		return ret;
    	if (ret == 0)
    		goto done;
    
    	/*
    	 * Page cache page we can handle?
    	 */
    	if (!PageLRU(page)) {
    		/*
    		 * Try to free it.
    		 */
    		put_page(page);
    		shake_page(page, 1);
    
    		/*
    		 * Did it turn free?
    		 */
    		ret = get_any_page(page, pfn, 0);
    		if (ret < 0)
    			return ret;
    		if (ret == 0)
    			goto done;
    	}
    	if (!PageLRU(page)) {
    		pr_debug("soft_offline: %#lx: unknown non LRU page type %lx\n",
    				pfn, page->flags);
    		return -EIO;
    	}
    
    	lock_page(page);
    	wait_on_page_writeback(page);
    
    	/*
    	 * Synchronized using the page lock with memory_failure()
    	 */
    	if (PageHWPoison(page)) {
    		unlock_page(page);
    		put_page(page);
    		pr_debug("soft offline: %#lx page already poisoned\n", pfn);
    		return -EBUSY;
    	}
    
    	/*
    	 * Try to invalidate first. This should work for
    	 * non dirty unmapped page cache pages.
    	 */
    	ret = invalidate_inode_page(page);
    	unlock_page(page);
    
    	/*
    	 * Drop count because page migration doesn't like raised
    	 * counts. The page could get re-allocated, but if it becomes
    	 * LRU the isolation will just fail.
    	 * RED-PEN would be better to keep it isolated here, but we
    	 * would need to fix isolation locking first.
    	 */
    	put_page(page);
    	if (ret == 1) {
    		ret = 0;
    		pr_debug("soft_offline: %#lx: invalidated\n", pfn);
    		goto done;
    	}
    
    	/*
    	 * Simple invalidation didn't work.
    	 * Try to migrate to a new page instead. migrate.c
    	 * handles a large number of cases for us.
    	 */
    	ret = isolate_lru_page(page);
    	if (!ret) {
    		LIST_HEAD(pagelist);
    
    		list_add(&page->lru, &pagelist);
    		ret = migrate_pages(&pagelist, new_page, MPOL_MF_MOVE_ALL, 0);
    		if (ret) {
    			pr_debug("soft offline: %#lx: migration failed %d, type %lx\n",
    				pfn, ret, page->flags);
    			if (ret > 0)
    				ret = -EIO;
    		}
    	} else {
    		pr_debug("soft offline: %#lx: isolation failed: %d, page count %d, type %lx\n",
    				pfn, ret, page_count(page), page->flags);
    	}
    	if (ret)
    		return ret;
    
    done:
    	atomic_long_add(1, &mce_bad_pages);
    	SetPageHWPoison(page);
    	/* keep elevated page count for bad page */
    	return ret;
    }