Newer
Older
action_result(pfn, "high order kernel", IGNORED);
return -EBUSY;
}
/*
* We ignore non-LRU pages for good reasons.
* - PG_locked is only well defined for LRU pages and a few others
* - to avoid races with __set_page_locked()
* - to avoid races with __SetPageSlab*() (and more non-atomic ops)
* The check (unnecessarily) ignores LRU pages being isolated and
* walked by the page reclaim code, however that's not a big loss.
*/
if (!PageLRU(p) && !PageHuge(p))
if (!PageLRU(p) && !PageHuge(p)) {
/*
* shake_page could have turned it free.
*/
if (is_free_buddy_page(p)) {
action_result(pfn, "free buddy, 2nd try", DELAYED);
return 0;
}
action_result(pfn, "non LRU", IGNORED);
put_page(p);
return -EBUSY;
}
/*
* Lock the page and wait for writeback to finish.
* It's very difficult to mess with pages currently under IO
* and in many cases impossible, so we just avoid it here.
*/
lock_page_nosync(hpage);
/*
* unpoison always clear PG_hwpoison inside page lock
*/
if (!PageHWPoison(p)) {
printk(KERN_ERR "MCE %#lx: just unpoisoned\n", pfn);
if (hwpoison_filter(p)) {
if (TestClearPageHWPoison(p))
atomic_long_sub(nr_pages, &mce_bad_pages);
unlock_page(hpage);
put_page(hpage);
/*
* For error on the tail page, we should set PG_hwpoison
* on the head page to show that the hugepage is hwpoisoned
*/
if (PageTail(p) && TestSetPageHWPoison(hpage)) {
action_result(pfn, "hugepage already hardware poisoned",
IGNORED);
unlock_page(hpage);
put_page(hpage);
return 0;
}
/*
* Set PG_hwpoison on all pages in an error hugepage,
* because containment is done in hugepage unit for now.
* Since we have done TestSetPageHWPoison() for the head page with
* page lock held, we can safely set PG_hwpoison bits on tail pages.
*/
if (PageHuge(p))
set_page_hwpoison_huge_page(hpage);
wait_on_page_writeback(p);
/*
* Now take care of user space mappings.
* Abort on fail: __remove_from_page_cache() assumes unmapped page.
if (hwpoison_user_mappings(p, pfn, trapno) != SWAP_SUCCESS) {
printk(KERN_ERR "MCE %#lx: cannot unmap page, give up\n", pfn);
res = -EBUSY;
goto out;
}
/*
* Torn down by someone else?
*/
if (PageLRU(p) && !PageSwapCache(p) && p->mapping == NULL) {
action_result(pfn, "already truncated LRU", IGNORED);
goto out;
}
res = -EBUSY;
for (ps = error_states;; ps++) {
if ((p->flags & ps->mask) == ps->res) {

Wu Fengguang
committed
res = page_action(ps, p, pfn);
break;
}
}
out:
unlock_page(hpage);
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
return res;
}
EXPORT_SYMBOL_GPL(__memory_failure);
/**
* memory_failure - Handle memory failure of a page.
* @pfn: Page Number of the corrupted page
* @trapno: Trap number reported in the signal to user space.
*
* This function is called by the low level machine check code
* of an architecture when it detects hardware memory corruption
* of a page. It tries its best to recover, which includes
* dropping pages, killing processes etc.
*
* The function is primarily of use for corruptions that
* happen outside the current execution context (e.g. when
* detected by a background scrubber)
*
* Must run in process context (e.g. a work queue) with interrupts
* enabled and no spinlocks hold.
*/
void memory_failure(unsigned long pfn, int trapno)
{
__memory_failure(pfn, trapno, 0);
}
/**
* unpoison_memory - Unpoison a previously poisoned page
* @pfn: Page number of the to be unpoisoned page
*
* Software-unpoison a page that has been poisoned by
* memory_failure() earlier.
*
* This is only done on the software-level, so it only works
* for linux injected failures, not real hardware failures
*
* Returns 0 for success, otherwise -errno.
*/
int unpoison_memory(unsigned long pfn)
{
struct page *page;
struct page *p;
int freeit = 0;
unsigned int nr_pages;
if (!pfn_valid(pfn))
return -ENXIO;
p = pfn_to_page(pfn);
page = compound_head(p);
if (!PageHWPoison(p)) {
pr_info("MCE: Page was already unpoisoned %#lx\n", pfn);
nr_pages = 1 << compound_order(page);
if (!get_page_unless_zero(page)) {
if (TestClearPageHWPoison(p))
atomic_long_sub(nr_pages, &mce_bad_pages);
pr_info("MCE: Software-unpoisoned free page %#lx\n", pfn);
return 0;
}
lock_page_nosync(page);
/*
* This test is racy because PG_hwpoison is set outside of page lock.
* That's acceptable because that won't trigger kernel panic. Instead,
* the PG_hwpoison page will be caught and isolated on the entrance to
* the free buddy page pool.
*/
if (TestClearPageHWPoison(page)) {
pr_info("MCE: Software-unpoisoned page %#lx\n", pfn);
atomic_long_sub(nr_pages, &mce_bad_pages);
if (PageHuge(p))
clear_page_hwpoison_huge_page(page);
unlock_page(page);
put_page(page);
if (freeit)
put_page(page);
return 0;
}
EXPORT_SYMBOL(unpoison_memory);
static struct page *new_page(struct page *p, unsigned long private, int **x)
{
int nid = page_to_nid(p);
return alloc_pages_exact_node(nid, GFP_HIGHUSER_MOVABLE, 0);
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
}
/*
* Safely get reference count of an arbitrary page.
* Returns 0 for a free page, -EIO for a zero refcount page
* that is not free, and 1 for any other page type.
* For 1 the page is returned with increased page count, otherwise not.
*/
static int get_any_page(struct page *p, unsigned long pfn, int flags)
{
int ret;
if (flags & MF_COUNT_INCREASED)
return 1;
/*
* The lock_system_sleep prevents a race with memory hotplug,
* because the isolation assumes there's only a single user.
* This is a big hammer, a better would be nicer.
*/
lock_system_sleep();
/*
* Isolate the page, so that it doesn't get reallocated if it
* was free.
*/
set_migratetype_isolate(p);
if (!get_page_unless_zero(compound_head(p))) {
if (is_free_buddy_page(p)) {
pr_info("get_any_page: %#lx free buddy page\n", pfn);
/* Set hwpoison bit while page is still isolated */
SetPageHWPoison(p);
ret = 0;
} else {
pr_info("get_any_page: %#lx: unknown zero refcount page type %lx\n",
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
pfn, p->flags);
ret = -EIO;
}
} else {
/* Not a free page */
ret = 1;
}
unset_migratetype_isolate(p);
unlock_system_sleep();
return ret;
}
/**
* soft_offline_page - Soft offline a page.
* @page: page to offline
* @flags: flags. Same as memory_failure().
*
* Returns 0 on success, otherwise negated errno.
*
* Soft offline a page, by migration or invalidation,
* without killing anything. This is for the case when
* a page is not corrupted yet (so it's still valid to access),
* but has had a number of corrected errors and is better taken
* out.
*
* The actual policy on when to do that is maintained by
* user space.
*
* This should never impact any application or cause data loss,
* however it might take some time.
*
* This is not a 100% solution for all memory, but tries to be
* ``good enough'' for the majority of memory.
*/
int soft_offline_page(struct page *page, int flags)
{
int ret;
unsigned long pfn = page_to_pfn(page);
ret = get_any_page(page, pfn, flags);
if (ret < 0)
return ret;
if (ret == 0)
goto done;
/*
* Page cache page we can handle?
*/
if (!PageLRU(page)) {
/*
* Try to free it.
*/
put_page(page);
shake_page(page, 1);
/*
* Did it turn free?
*/
ret = get_any_page(page, pfn, 0);
if (ret < 0)
return ret;
if (ret == 0)
goto done;
}
if (!PageLRU(page)) {
pr_info("soft_offline: %#lx: unknown non LRU page type %lx\n",
pfn, page->flags);
return -EIO;
}
lock_page(page);
wait_on_page_writeback(page);
/*
* Synchronized using the page lock with memory_failure()
*/
if (PageHWPoison(page)) {
unlock_page(page);
put_page(page);
pr_info("soft offline: %#lx page already poisoned\n", pfn);
return -EBUSY;
}
/*
* Try to invalidate first. This should work for
* non dirty unmapped page cache pages.
*/
ret = invalidate_inode_page(page);
unlock_page(page);
/*
* Drop count because page migration doesn't like raised
* counts. The page could get re-allocated, but if it becomes
* LRU the isolation will just fail.
* RED-PEN would be better to keep it isolated here, but we
* would need to fix isolation locking first.
*/
put_page(page);
if (ret == 1) {
ret = 0;
pr_info("soft_offline: %#lx: invalidated\n", pfn);
goto done;
}
/*
* Simple invalidation didn't work.
* Try to migrate to a new page instead. migrate.c
* handles a large number of cases for us.
*/
ret = isolate_lru_page(page);
if (!ret) {
LIST_HEAD(pagelist);
list_add(&page->lru, &pagelist);
ret = migrate_pages(&pagelist, new_page, MPOL_MF_MOVE_ALL, 0);
if (ret) {
pr_info("soft offline: %#lx: migration failed %d, type %lx\n",
pfn, ret, page->flags);
if (ret > 0)
ret = -EIO;
}
} else {
pr_info("soft offline: %#lx: isolation failed: %d, page count %d, type %lx\n",
pfn, ret, page_count(page), page->flags);
}
if (ret)
return ret;
done:
atomic_long_add(1, &mce_bad_pages);
SetPageHWPoison(page);
/* keep elevated page count for bad page */
return ret;
}
/*
* The caller must hold current->mm->mmap_sem in read mode.
*/
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
int is_hwpoison_address(unsigned long addr)
{
pgd_t *pgdp;
pud_t pud, *pudp;
pmd_t pmd, *pmdp;
pte_t pte, *ptep;
swp_entry_t entry;
pgdp = pgd_offset(current->mm, addr);
if (!pgd_present(*pgdp))
return 0;
pudp = pud_offset(pgdp, addr);
pud = *pudp;
if (!pud_present(pud) || pud_large(pud))
return 0;
pmdp = pmd_offset(pudp, addr);
pmd = *pmdp;
if (!pmd_present(pmd) || pmd_large(pmd))
return 0;
ptep = pte_offset_map(pmdp, addr);
pte = *ptep;
pte_unmap(ptep);
if (!is_swap_pte(pte))
return 0;
entry = pte_to_swp_entry(pte);
return is_hwpoison_entry(entry);
}
EXPORT_SYMBOL_GPL(is_hwpoison_address);