ead07f6a86
memory_failure() can run in 2 different mode (specified by MF_COUNT_INCREASED) in page refcount perspective. When MF_COUNT_INCREASED is set, memory_failure() assumes that the caller takes a refcount of the target page. And if cleared, memory_failure() takes it in it's own. In current code, however, refcounting is done differently in each caller. For example, madvise_hwpoison() uses get_user_pages_fast() and hwpoison_inject() uses get_page_unless_zero(). So this inconsistent refcounting causes refcount failure especially for thp tail pages. Typical user visible effects are like memory leak or VM_BUG_ON_PAGE(!page_count(page)) in isolate_lru_page(). To fix this refcounting issue, this patch introduces get_hwpoison_page() to handle thp tail pages in the same manner for each caller of hwpoison code. memory_failure() might fail to split thp and in such case it returns without completing page isolation. This is not good because PageHWPoison on the thp is still set and there's no easy way to unpoison such thps. So this patch try to roll back any action to the thp in "non anonymous thp" case and "thp split failed" case, expecting an MCE(SRAR) generated by later access afterward will properly free such thps. [akpm@linux-foundation.org: fix CONFIG_HWPOISON_INJECT=m] Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com> Cc: Andi Kleen <andi@firstfloor.org> Cc: Tony Luck <tony.luck@intel.com> Cc: "Kirill A. Shutemov" <kirill@shutemov.name> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
145 lines
3.3 KiB
C
145 lines
3.3 KiB
C
/* Inject a hwpoison memory failure on a arbitrary pfn */
|
|
#include <linux/module.h>
|
|
#include <linux/debugfs.h>
|
|
#include <linux/kernel.h>
|
|
#include <linux/mm.h>
|
|
#include <linux/swap.h>
|
|
#include <linux/pagemap.h>
|
|
#include <linux/hugetlb.h>
|
|
#include "internal.h"
|
|
|
|
static struct dentry *hwpoison_dir;
|
|
|
|
static int hwpoison_inject(void *data, u64 val)
|
|
{
|
|
unsigned long pfn = val;
|
|
struct page *p;
|
|
struct page *hpage;
|
|
int err;
|
|
|
|
if (!capable(CAP_SYS_ADMIN))
|
|
return -EPERM;
|
|
|
|
if (!pfn_valid(pfn))
|
|
return -ENXIO;
|
|
|
|
p = pfn_to_page(pfn);
|
|
hpage = compound_head(p);
|
|
/*
|
|
* This implies unable to support free buddy pages.
|
|
*/
|
|
if (!get_hwpoison_page(p))
|
|
return 0;
|
|
|
|
if (!hwpoison_filter_enable)
|
|
goto inject;
|
|
|
|
if (!PageLRU(hpage) && !PageHuge(p))
|
|
shake_page(hpage, 0);
|
|
/*
|
|
* This implies unable to support non-LRU pages.
|
|
*/
|
|
if (!PageLRU(hpage) && !PageHuge(p))
|
|
goto put_out;
|
|
|
|
/*
|
|
* do a racy check with elevated page count, to make sure PG_hwpoison
|
|
* will only be set for the targeted owner (or on a free page).
|
|
* We temporarily take page lock for try_get_mem_cgroup_from_page().
|
|
* memory_failure() will redo the check reliably inside page lock.
|
|
*/
|
|
lock_page(hpage);
|
|
err = hwpoison_filter(hpage);
|
|
unlock_page(hpage);
|
|
if (err)
|
|
goto put_out;
|
|
|
|
inject:
|
|
pr_info("Injecting memory failure at pfn %#lx\n", pfn);
|
|
return memory_failure(pfn, 18, MF_COUNT_INCREASED);
|
|
put_out:
|
|
put_page(p);
|
|
return 0;
|
|
}
|
|
|
|
static int hwpoison_unpoison(void *data, u64 val)
|
|
{
|
|
if (!capable(CAP_SYS_ADMIN))
|
|
return -EPERM;
|
|
|
|
return unpoison_memory(val);
|
|
}
|
|
|
|
DEFINE_SIMPLE_ATTRIBUTE(hwpoison_fops, NULL, hwpoison_inject, "%lli\n");
|
|
DEFINE_SIMPLE_ATTRIBUTE(unpoison_fops, NULL, hwpoison_unpoison, "%lli\n");
|
|
|
|
static void pfn_inject_exit(void)
|
|
{
|
|
debugfs_remove_recursive(hwpoison_dir);
|
|
}
|
|
|
|
static int pfn_inject_init(void)
|
|
{
|
|
struct dentry *dentry;
|
|
|
|
hwpoison_dir = debugfs_create_dir("hwpoison", NULL);
|
|
if (hwpoison_dir == NULL)
|
|
return -ENOMEM;
|
|
|
|
/*
|
|
* Note that the below poison/unpoison interfaces do not involve
|
|
* hardware status change, hence do not require hardware support.
|
|
* They are mainly for testing hwpoison in software level.
|
|
*/
|
|
dentry = debugfs_create_file("corrupt-pfn", 0200, hwpoison_dir,
|
|
NULL, &hwpoison_fops);
|
|
if (!dentry)
|
|
goto fail;
|
|
|
|
dentry = debugfs_create_file("unpoison-pfn", 0200, hwpoison_dir,
|
|
NULL, &unpoison_fops);
|
|
if (!dentry)
|
|
goto fail;
|
|
|
|
dentry = debugfs_create_u32("corrupt-filter-enable", 0600,
|
|
hwpoison_dir, &hwpoison_filter_enable);
|
|
if (!dentry)
|
|
goto fail;
|
|
|
|
dentry = debugfs_create_u32("corrupt-filter-dev-major", 0600,
|
|
hwpoison_dir, &hwpoison_filter_dev_major);
|
|
if (!dentry)
|
|
goto fail;
|
|
|
|
dentry = debugfs_create_u32("corrupt-filter-dev-minor", 0600,
|
|
hwpoison_dir, &hwpoison_filter_dev_minor);
|
|
if (!dentry)
|
|
goto fail;
|
|
|
|
dentry = debugfs_create_u64("corrupt-filter-flags-mask", 0600,
|
|
hwpoison_dir, &hwpoison_filter_flags_mask);
|
|
if (!dentry)
|
|
goto fail;
|
|
|
|
dentry = debugfs_create_u64("corrupt-filter-flags-value", 0600,
|
|
hwpoison_dir, &hwpoison_filter_flags_value);
|
|
if (!dentry)
|
|
goto fail;
|
|
|
|
#ifdef CONFIG_MEMCG_SWAP
|
|
dentry = debugfs_create_u64("corrupt-filter-memcg", 0600,
|
|
hwpoison_dir, &hwpoison_filter_memcg);
|
|
if (!dentry)
|
|
goto fail;
|
|
#endif
|
|
|
|
return 0;
|
|
fail:
|
|
pfn_inject_exit();
|
|
return -ENOMEM;
|
|
}
|
|
|
|
module_init(pfn_inject_init);
|
|
module_exit(pfn_inject_exit);
|
|
MODULE_LICENSE("GPL");
|