mm,hwpoison: send SIGBUS with error virutal address

Now an action required MCE in already hwpoisoned address surely sends a
SIGBUS to current process, but the SIGBUS doesn't convey error virtual
address.  That's not optimal for hwpoison-aware applications.

To fix the issue, make memory_failure() call kill_accessing_process(),
that does pagetable walk to find the error virtual address.  It could find
multiple virtual addresses for the same error page, and it seems hard to
tell which virtual address is correct one.  But that's rare and sending
incorrect virtual address could be better than no address.  So let's
report the first found virtual address for now.

[naoya.horiguchi@nec.com: fix walk_page_range() return]
  Link: https://lkml.kernel.org/r/20210603051055.GA244241@hori.linux.bs1.fc.nec.co.jp

Link: https://lkml.kernel.org/r/20210521030156.2612074-4-nao.horiguchi@gmail.com
Signed-off-by: Naoya Horiguchi <naoya.horiguchi@nec.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Aili Yao <yaoaili@kingsoft.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: David Hildenbrand <david@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Jue Wang <juew@google.com>
Cc: Borislav Petkov <bp@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
Naoya Horiguchi 2021-06-28 19:43:14 -07:00 committed by Linus Torvalds
parent 203c06eef5
commit a3f5d80ea4
3 changed files with 165 additions and 3 deletions

View File

@ -1257,19 +1257,28 @@ static void kill_me_maybe(struct callback_head *cb)
{ {
struct task_struct *p = container_of(cb, struct task_struct, mce_kill_me); struct task_struct *p = container_of(cb, struct task_struct, mce_kill_me);
int flags = MF_ACTION_REQUIRED; int flags = MF_ACTION_REQUIRED;
int ret;
pr_err("Uncorrected hardware memory error in user-access at %llx", p->mce_addr); pr_err("Uncorrected hardware memory error in user-access at %llx", p->mce_addr);
if (!p->mce_ripv) if (!p->mce_ripv)
flags |= MF_MUST_KILL; flags |= MF_MUST_KILL;
if (!memory_failure(p->mce_addr >> PAGE_SHIFT, flags) && ret = memory_failure(p->mce_addr >> PAGE_SHIFT, flags);
!(p->mce_kflags & MCE_IN_KERNEL_COPYIN)) { if (!ret && !(p->mce_kflags & MCE_IN_KERNEL_COPYIN)) {
set_mce_nospec(p->mce_addr >> PAGE_SHIFT, p->mce_whole_page); set_mce_nospec(p->mce_addr >> PAGE_SHIFT, p->mce_whole_page);
sync_core(); sync_core();
return; return;
} }
/*
* -EHWPOISON from memory_failure() means that it already sent SIGBUS
* to the current process with the proper error info, so no need to
* send SIGBUS here again.
*/
if (ret == -EHWPOISON)
return;
if (p->mce_vaddr != (void __user *)-1l) { if (p->mce_vaddr != (void __user *)-1l) {
force_sig_mceerr(BUS_MCEERR_AR, p->mce_vaddr, PAGE_SHIFT); force_sig_mceerr(BUS_MCEERR_AR, p->mce_vaddr, PAGE_SHIFT);
} else { } else {

View File

@ -330,6 +330,11 @@ static inline int is_hwpoison_entry(swp_entry_t entry)
return swp_type(entry) == SWP_HWPOISON; return swp_type(entry) == SWP_HWPOISON;
} }
static inline unsigned long hwpoison_entry_to_pfn(swp_entry_t entry)
{
return swp_offset(entry);
}
static inline void num_poisoned_pages_inc(void) static inline void num_poisoned_pages_inc(void)
{ {
atomic_long_inc(&num_poisoned_pages); atomic_long_inc(&num_poisoned_pages);

View File

@ -56,6 +56,7 @@
#include <linux/kfifo.h> #include <linux/kfifo.h>
#include <linux/ratelimit.h> #include <linux/ratelimit.h>
#include <linux/page-isolation.h> #include <linux/page-isolation.h>
#include <linux/pagewalk.h>
#include "internal.h" #include "internal.h"
#include "ras/ras_event.h" #include "ras/ras_event.h"
@ -554,6 +555,148 @@ static void collect_procs(struct page *page, struct list_head *tokill,
collect_procs_file(page, tokill, force_early); collect_procs_file(page, tokill, force_early);
} }
struct hwp_walk {
struct to_kill tk;
unsigned long pfn;
int flags;
};
static void set_to_kill(struct to_kill *tk, unsigned long addr, short shift)
{
tk->addr = addr;
tk->size_shift = shift;
}
static int check_hwpoisoned_entry(pte_t pte, unsigned long addr, short shift,
unsigned long poisoned_pfn, struct to_kill *tk)
{
unsigned long pfn = 0;
if (pte_present(pte)) {
pfn = pte_pfn(pte);
} else {
swp_entry_t swp = pte_to_swp_entry(pte);
if (is_hwpoison_entry(swp))
pfn = hwpoison_entry_to_pfn(swp);
}
if (!pfn || pfn != poisoned_pfn)
return 0;
set_to_kill(tk, addr, shift);
return 1;
}
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
static int check_hwpoisoned_pmd_entry(pmd_t *pmdp, unsigned long addr,
struct hwp_walk *hwp)
{
pmd_t pmd = *pmdp;
unsigned long pfn;
unsigned long hwpoison_vaddr;
if (!pmd_present(pmd))
return 0;
pfn = pmd_pfn(pmd);
if (pfn <= hwp->pfn && hwp->pfn < pfn + HPAGE_PMD_NR) {
hwpoison_vaddr = addr + ((hwp->pfn - pfn) << PAGE_SHIFT);
set_to_kill(&hwp->tk, hwpoison_vaddr, PAGE_SHIFT);
return 1;
}
return 0;
}
#else
static int check_hwpoisoned_pmd_entry(pmd_t *pmdp, unsigned long addr,
struct hwp_walk *hwp)
{
return 0;
}
#endif
static int hwpoison_pte_range(pmd_t *pmdp, unsigned long addr,
unsigned long end, struct mm_walk *walk)
{
struct hwp_walk *hwp = (struct hwp_walk *)walk->private;
int ret = 0;
pte_t *ptep;
spinlock_t *ptl;
ptl = pmd_trans_huge_lock(pmdp, walk->vma);
if (ptl) {
ret = check_hwpoisoned_pmd_entry(pmdp, addr, hwp);
spin_unlock(ptl);
goto out;
}
if (pmd_trans_unstable(pmdp))
goto out;
ptep = pte_offset_map_lock(walk->vma->vm_mm, pmdp, addr, &ptl);
for (; addr != end; ptep++, addr += PAGE_SIZE) {
ret = check_hwpoisoned_entry(*ptep, addr, PAGE_SHIFT,
hwp->pfn, &hwp->tk);
if (ret == 1)
break;
}
pte_unmap_unlock(ptep - 1, ptl);
out:
cond_resched();
return ret;
}
#ifdef CONFIG_HUGETLB_PAGE
static int hwpoison_hugetlb_range(pte_t *ptep, unsigned long hmask,
unsigned long addr, unsigned long end,
struct mm_walk *walk)
{
struct hwp_walk *hwp = (struct hwp_walk *)walk->private;
pte_t pte = huge_ptep_get(ptep);
struct hstate *h = hstate_vma(walk->vma);
return check_hwpoisoned_entry(pte, addr, huge_page_shift(h),
hwp->pfn, &hwp->tk);
}
#else
#define hwpoison_hugetlb_range NULL
#endif
static struct mm_walk_ops hwp_walk_ops = {
.pmd_entry = hwpoison_pte_range,
.hugetlb_entry = hwpoison_hugetlb_range,
};
/*
* Sends SIGBUS to the current process with error info.
*
* This function is intended to handle "Action Required" MCEs on already
* hardware poisoned pages. They could happen, for example, when
* memory_failure() failed to unmap the error page at the first call, or
* when multiple local machine checks happened on different CPUs.
*
* MCE handler currently has no easy access to the error virtual address,
* so this function walks page table to find it. The returned virtual address
* is proper in most cases, but it could be wrong when the application
* process has multiple entries mapping the error page.
*/
static int kill_accessing_process(struct task_struct *p, unsigned long pfn,
int flags)
{
int ret;
struct hwp_walk priv = {
.pfn = pfn,
};
priv.tk.tsk = p;
mmap_read_lock(p->mm);
ret = walk_page_range(p->mm, 0, TASK_SIZE, &hwp_walk_ops,
(void *)&priv);
if (ret == 1 && priv.tk.addr)
kill_proc(&priv.tk, pfn, flags);
mmap_read_unlock(p->mm);
return ret ? -EFAULT : -EHWPOISON;
}
static const char *action_name[] = { static const char *action_name[] = {
[MF_IGNORED] = "Ignored", [MF_IGNORED] = "Ignored",
[MF_FAILED] = "Failed", [MF_FAILED] = "Failed",
@ -1267,7 +1410,10 @@ static int memory_failure_hugetlb(unsigned long pfn, int flags)
if (TestSetPageHWPoison(head)) { if (TestSetPageHWPoison(head)) {
pr_err("Memory failure: %#lx: already hardware poisoned\n", pr_err("Memory failure: %#lx: already hardware poisoned\n",
pfn); pfn);
return -EHWPOISON; res = -EHWPOISON;
if (flags & MF_ACTION_REQUIRED)
res = kill_accessing_process(current, page_to_pfn(head), flags);
return res;
} }
num_poisoned_pages_inc(); num_poisoned_pages_inc();
@ -1476,6 +1622,8 @@ try_again:
pr_err("Memory failure: %#lx: already hardware poisoned\n", pr_err("Memory failure: %#lx: already hardware poisoned\n",
pfn); pfn);
res = -EHWPOISON; res = -EHWPOISON;
if (flags & MF_ACTION_REQUIRED)
res = kill_accessing_process(current, pfn, flags);
goto unlock_mutex; goto unlock_mutex;
} }