Newer
Older
* shake_page could have turned it free.
*/
if (is_free_buddy_page(p)) {
action_result(pfn, "free buddy, 2nd try", DELAYED);
return 0;
}
action_result(pfn, "non LRU", IGNORED);
put_page(p);
return -EBUSY;
}
/*
* Lock the page and wait for writeback to finish.
* It's very difficult to mess with pages currently under IO
* and in many cases impossible, so we just avoid it here.
*/
lock_page_nosync(hpage);
/*
* unpoison always clear PG_hwpoison inside page lock
*/
if (!PageHWPoison(p)) {
printk(KERN_ERR "MCE %#lx: just unpoisoned\n", pfn);
if (hwpoison_filter(p)) {
if (TestClearPageHWPoison(p))
atomic_long_sub(nr_pages, &mce_bad_pages);
unlock_page(hpage);
put_page(hpage);
/*
* For error on the tail page, we should set PG_hwpoison
* on the head page to show that the hugepage is hwpoisoned
*/
if (PageTail(p) && TestSetPageHWPoison(hpage)) {
action_result(pfn, "hugepage already hardware poisoned",
IGNORED);
unlock_page(hpage);
put_page(hpage);
return 0;
}
/*
* Set PG_hwpoison on all pages in an error hugepage,
* because containment is done in hugepage unit for now.
* Since we have done TestSetPageHWPoison() for the head page with
* page lock held, we can safely set PG_hwpoison bits on tail pages.
*/
if (PageHuge(p))
set_page_hwpoison_huge_page(hpage);
wait_on_page_writeback(p);
/*
* Now take care of user space mappings.
* Abort on fail: __remove_from_page_cache() assumes unmapped page.
if (hwpoison_user_mappings(p, pfn, trapno) != SWAP_SUCCESS) {
printk(KERN_ERR "MCE %#lx: cannot unmap page, give up\n", pfn);
res = -EBUSY;
goto out;
}
/*
* Torn down by someone else?
*/
if (PageLRU(p) && !PageSwapCache(p) && p->mapping == NULL) {
action_result(pfn, "already truncated LRU", IGNORED);
goto out;
}
res = -EBUSY;
for (ps = error_states;; ps++) {
if ((p->flags & ps->mask) == ps->res) {

Wu Fengguang
committed
res = page_action(ps, p, pfn);
break;
}
}
out:
unlock_page(hpage);
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
return res;
}
EXPORT_SYMBOL_GPL(__memory_failure);
/**
* memory_failure - Handle memory failure of a page.
* @pfn: Page Number of the corrupted page
* @trapno: Trap number reported in the signal to user space.
*
* This function is called by the low level machine check code
* of an architecture when it detects hardware memory corruption
* of a page. It tries its best to recover, which includes
* dropping pages, killing processes etc.
*
* The function is primarily of use for corruptions that
* happen outside the current execution context (e.g. when
* detected by a background scrubber)
*
* Must run in process context (e.g. a work queue) with interrupts
* enabled and no spinlocks hold.
*/
void memory_failure(unsigned long pfn, int trapno)
{
__memory_failure(pfn, trapno, 0);
}
/**
* unpoison_memory - Unpoison a previously poisoned page
* @pfn: Page number of the to be unpoisoned page
*
* Software-unpoison a page that has been poisoned by
* memory_failure() earlier.
*
* This is only done on the software-level, so it only works
* for linux injected failures, not real hardware failures
*
* Returns 0 for success, otherwise -errno.
*/
int unpoison_memory(unsigned long pfn)
{
struct page *page;
struct page *p;
int freeit = 0;
unsigned int nr_pages;
if (!pfn_valid(pfn))
return -ENXIO;
p = pfn_to_page(pfn);
page = compound_head(p);
if (!PageHWPoison(p)) {
pr_debug("MCE: Page was already unpoisoned %#lx\n", pfn);
return 0;
}
nr_pages = 1 << compound_order(page);
if (!get_page_unless_zero(page)) {
if (TestClearPageHWPoison(p))
atomic_long_sub(nr_pages, &mce_bad_pages);
pr_debug("MCE: Software-unpoisoned free page %#lx\n", pfn);
return 0;
}
lock_page_nosync(page);
/*
* This test is racy because PG_hwpoison is set outside of page lock.
* That's acceptable because that won't trigger kernel panic. Instead,
* the PG_hwpoison page will be caught and isolated on the entrance to
* the free buddy page pool.
*/
if (TestClearPageHWPoison(page)) {
pr_debug("MCE: Software-unpoisoned page %#lx\n", pfn);
atomic_long_sub(nr_pages, &mce_bad_pages);
if (PageHuge(p))
clear_page_hwpoison_huge_page(page);
unlock_page(page);
put_page(page);
if (freeit)
put_page(page);
return 0;
}
EXPORT_SYMBOL(unpoison_memory);
static struct page *new_page(struct page *p, unsigned long private, int **x)
{
int nid = page_to_nid(p);
return alloc_pages_exact_node(nid, GFP_HIGHUSER_MOVABLE, 0);
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
}
/*
* Safely get reference count of an arbitrary page.
* Returns 0 for a free page, -EIO for a zero refcount page
* that is not free, and 1 for any other page type.
* For 1 the page is returned with increased page count, otherwise not.
*/
static int get_any_page(struct page *p, unsigned long pfn, int flags)
{
int ret;
if (flags & MF_COUNT_INCREASED)
return 1;
/*
* The lock_system_sleep prevents a race with memory hotplug,
* because the isolation assumes there's only a single user.
* This is a big hammer, a better would be nicer.
*/
lock_system_sleep();
/*
* Isolate the page, so that it doesn't get reallocated if it
* was free.
*/
set_migratetype_isolate(p);
if (!get_page_unless_zero(compound_head(p))) {
if (is_free_buddy_page(p)) {
pr_debug("get_any_page: %#lx free buddy page\n", pfn);
/* Set hwpoison bit while page is still isolated */
SetPageHWPoison(p);
ret = 0;
} else {
pr_debug("get_any_page: %#lx: unknown zero refcount page type %lx\n",
pfn, p->flags);
ret = -EIO;
}
} else {
/* Not a free page */
ret = 1;
}
unset_migratetype_isolate(p);
unlock_system_sleep();
return ret;
}
/**
* soft_offline_page - Soft offline a page.
* @page: page to offline
* @flags: flags. Same as memory_failure().
*
* Returns 0 on success, otherwise negated errno.
*
* Soft offline a page, by migration or invalidation,
* without killing anything. This is for the case when
* a page is not corrupted yet (so it's still valid to access),
* but has had a number of corrected errors and is better taken
* out.
*
* The actual policy on when to do that is maintained by
* user space.
*
* This should never impact any application or cause data loss,
* however it might take some time.
*
* This is not a 100% solution for all memory, but tries to be
* ``good enough'' for the majority of memory.
*/
int soft_offline_page(struct page *page, int flags)
{
int ret;
unsigned long pfn = page_to_pfn(page);
ret = get_any_page(page, pfn, flags);
if (ret < 0)
return ret;
if (ret == 0)
goto done;
/*
* Page cache page we can handle?
*/
if (!PageLRU(page)) {
/*
* Try to free it.
*/
put_page(page);
shake_page(page, 1);
/*
* Did it turn free?
*/
ret = get_any_page(page, pfn, 0);
if (ret < 0)
return ret;
if (ret == 0)
goto done;
}
if (!PageLRU(page)) {
pr_debug("soft_offline: %#lx: unknown non LRU page type %lx\n",
pfn, page->flags);
return -EIO;
}
lock_page(page);
wait_on_page_writeback(page);
/*
* Synchronized using the page lock with memory_failure()
*/
if (PageHWPoison(page)) {
unlock_page(page);
put_page(page);
pr_debug("soft offline: %#lx page already poisoned\n", pfn);
return -EBUSY;
}
/*
* Try to invalidate first. This should work for
* non dirty unmapped page cache pages.
*/
ret = invalidate_inode_page(page);
unlock_page(page);
/*
* Drop count because page migration doesn't like raised
* counts. The page could get re-allocated, but if it becomes
* LRU the isolation will just fail.
* RED-PEN would be better to keep it isolated here, but we
* would need to fix isolation locking first.
*/
put_page(page);
if (ret == 1) {
ret = 0;
pr_debug("soft_offline: %#lx: invalidated\n", pfn);
goto done;
}
/*
* Simple invalidation didn't work.
* Try to migrate to a new page instead. migrate.c
* handles a large number of cases for us.
*/
ret = isolate_lru_page(page);
if (!ret) {
LIST_HEAD(pagelist);
list_add(&page->lru, &pagelist);
ret = migrate_pages(&pagelist, new_page, MPOL_MF_MOVE_ALL, 0);
if (ret) {
pr_debug("soft offline: %#lx: migration failed %d, type %lx\n",
pfn, ret, page->flags);
if (ret > 0)
ret = -EIO;
}
} else {
pr_debug("soft offline: %#lx: isolation failed: %d, page count %d, type %lx\n",
pfn, ret, page_count(page), page->flags);
}
if (ret)
return ret;
done:
atomic_long_add(1, &mce_bad_pages);
SetPageHWPoison(page);
/* keep elevated page count for bad page */
return ret;
}