mm,hwpoison: send SIGBUS with error virutal address
Now an action required MCE in already hwpoisoned address surely sends a SIGBUS to current process, but the SIGBUS doesn't convey error virtual address. That's not optimal for hwpoison-aware applications. To fix the issue, make memory_failure() call kill_accessing_process(), that does pagetable walk to find the error virtual address. It could find multiple virtual addresses for the same error page, and it seems hard to tell which virtual address is correct one. But that's rare and sending incorrect virtual address could be better than no address. So let's report the first found virtual address for now. [naoya.horiguchi@nec.com: fix walk_page_range() return] Link: https://lkml.kernel.org/r/20210603051055.GA244241@hori.linux.bs1.fc.nec.co.jp Link: https://lkml.kernel.org/r/20210521030156.2612074-4-nao.horiguchi@gmail.com Signed-off-by: Naoya Horiguchi <naoya.horiguchi@nec.com> Cc: Tony Luck <tony.luck@intel.com> Cc: Aili Yao <yaoaili@kingsoft.com> Cc: Oscar Salvador <osalvador@suse.de> Cc: David Hildenbrand <david@redhat.com> Cc: Borislav Petkov <bp@alien8.de> Cc: Andy Lutomirski <luto@kernel.org> Cc: Jue Wang <juew@google.com> Cc: Borislav Petkov <bp@suse.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
parent
203c06eef5
commit
a3f5d80ea4
@ -1257,19 +1257,28 @@ static void kill_me_maybe(struct callback_head *cb)
|
||||
{
|
||||
struct task_struct *p = container_of(cb, struct task_struct, mce_kill_me);
|
||||
int flags = MF_ACTION_REQUIRED;
|
||||
int ret;
|
||||
|
||||
pr_err("Uncorrected hardware memory error in user-access at %llx", p->mce_addr);
|
||||
|
||||
if (!p->mce_ripv)
|
||||
flags |= MF_MUST_KILL;
|
||||
|
||||
if (!memory_failure(p->mce_addr >> PAGE_SHIFT, flags) &&
|
||||
!(p->mce_kflags & MCE_IN_KERNEL_COPYIN)) {
|
||||
ret = memory_failure(p->mce_addr >> PAGE_SHIFT, flags);
|
||||
if (!ret && !(p->mce_kflags & MCE_IN_KERNEL_COPYIN)) {
|
||||
set_mce_nospec(p->mce_addr >> PAGE_SHIFT, p->mce_whole_page);
|
||||
sync_core();
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* -EHWPOISON from memory_failure() means that it already sent SIGBUS
|
||||
* to the current process with the proper error info, so no need to
|
||||
* send SIGBUS here again.
|
||||
*/
|
||||
if (ret == -EHWPOISON)
|
||||
return;
|
||||
|
||||
if (p->mce_vaddr != (void __user *)-1l) {
|
||||
force_sig_mceerr(BUS_MCEERR_AR, p->mce_vaddr, PAGE_SHIFT);
|
||||
} else {
|
||||
|
@ -330,6 +330,11 @@ static inline int is_hwpoison_entry(swp_entry_t entry)
|
||||
return swp_type(entry) == SWP_HWPOISON;
|
||||
}
|
||||
|
||||
static inline unsigned long hwpoison_entry_to_pfn(swp_entry_t entry)
|
||||
{
|
||||
return swp_offset(entry);
|
||||
}
|
||||
|
||||
static inline void num_poisoned_pages_inc(void)
|
||||
{
|
||||
atomic_long_inc(&num_poisoned_pages);
|
||||
|
@ -56,6 +56,7 @@
|
||||
#include <linux/kfifo.h>
|
||||
#include <linux/ratelimit.h>
|
||||
#include <linux/page-isolation.h>
|
||||
#include <linux/pagewalk.h>
|
||||
#include "internal.h"
|
||||
#include "ras/ras_event.h"
|
||||
|
||||
@ -554,6 +555,148 @@ static void collect_procs(struct page *page, struct list_head *tokill,
|
||||
collect_procs_file(page, tokill, force_early);
|
||||
}
|
||||
|
||||
struct hwp_walk {
|
||||
struct to_kill tk;
|
||||
unsigned long pfn;
|
||||
int flags;
|
||||
};
|
||||
|
||||
static void set_to_kill(struct to_kill *tk, unsigned long addr, short shift)
|
||||
{
|
||||
tk->addr = addr;
|
||||
tk->size_shift = shift;
|
||||
}
|
||||
|
||||
static int check_hwpoisoned_entry(pte_t pte, unsigned long addr, short shift,
|
||||
unsigned long poisoned_pfn, struct to_kill *tk)
|
||||
{
|
||||
unsigned long pfn = 0;
|
||||
|
||||
if (pte_present(pte)) {
|
||||
pfn = pte_pfn(pte);
|
||||
} else {
|
||||
swp_entry_t swp = pte_to_swp_entry(pte);
|
||||
|
||||
if (is_hwpoison_entry(swp))
|
||||
pfn = hwpoison_entry_to_pfn(swp);
|
||||
}
|
||||
|
||||
if (!pfn || pfn != poisoned_pfn)
|
||||
return 0;
|
||||
|
||||
set_to_kill(tk, addr, shift);
|
||||
return 1;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
|
||||
static int check_hwpoisoned_pmd_entry(pmd_t *pmdp, unsigned long addr,
|
||||
struct hwp_walk *hwp)
|
||||
{
|
||||
pmd_t pmd = *pmdp;
|
||||
unsigned long pfn;
|
||||
unsigned long hwpoison_vaddr;
|
||||
|
||||
if (!pmd_present(pmd))
|
||||
return 0;
|
||||
pfn = pmd_pfn(pmd);
|
||||
if (pfn <= hwp->pfn && hwp->pfn < pfn + HPAGE_PMD_NR) {
|
||||
hwpoison_vaddr = addr + ((hwp->pfn - pfn) << PAGE_SHIFT);
|
||||
set_to_kill(&hwp->tk, hwpoison_vaddr, PAGE_SHIFT);
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
#else
|
||||
static int check_hwpoisoned_pmd_entry(pmd_t *pmdp, unsigned long addr,
|
||||
struct hwp_walk *hwp)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
static int hwpoison_pte_range(pmd_t *pmdp, unsigned long addr,
|
||||
unsigned long end, struct mm_walk *walk)
|
||||
{
|
||||
struct hwp_walk *hwp = (struct hwp_walk *)walk->private;
|
||||
int ret = 0;
|
||||
pte_t *ptep;
|
||||
spinlock_t *ptl;
|
||||
|
||||
ptl = pmd_trans_huge_lock(pmdp, walk->vma);
|
||||
if (ptl) {
|
||||
ret = check_hwpoisoned_pmd_entry(pmdp, addr, hwp);
|
||||
spin_unlock(ptl);
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (pmd_trans_unstable(pmdp))
|
||||
goto out;
|
||||
|
||||
ptep = pte_offset_map_lock(walk->vma->vm_mm, pmdp, addr, &ptl);
|
||||
for (; addr != end; ptep++, addr += PAGE_SIZE) {
|
||||
ret = check_hwpoisoned_entry(*ptep, addr, PAGE_SHIFT,
|
||||
hwp->pfn, &hwp->tk);
|
||||
if (ret == 1)
|
||||
break;
|
||||
}
|
||||
pte_unmap_unlock(ptep - 1, ptl);
|
||||
out:
|
||||
cond_resched();
|
||||
return ret;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_HUGETLB_PAGE
|
||||
static int hwpoison_hugetlb_range(pte_t *ptep, unsigned long hmask,
|
||||
unsigned long addr, unsigned long end,
|
||||
struct mm_walk *walk)
|
||||
{
|
||||
struct hwp_walk *hwp = (struct hwp_walk *)walk->private;
|
||||
pte_t pte = huge_ptep_get(ptep);
|
||||
struct hstate *h = hstate_vma(walk->vma);
|
||||
|
||||
return check_hwpoisoned_entry(pte, addr, huge_page_shift(h),
|
||||
hwp->pfn, &hwp->tk);
|
||||
}
|
||||
#else
|
||||
#define hwpoison_hugetlb_range NULL
|
||||
#endif
|
||||
|
||||
static struct mm_walk_ops hwp_walk_ops = {
|
||||
.pmd_entry = hwpoison_pte_range,
|
||||
.hugetlb_entry = hwpoison_hugetlb_range,
|
||||
};
|
||||
|
||||
/*
|
||||
* Sends SIGBUS to the current process with error info.
|
||||
*
|
||||
* This function is intended to handle "Action Required" MCEs on already
|
||||
* hardware poisoned pages. They could happen, for example, when
|
||||
* memory_failure() failed to unmap the error page at the first call, or
|
||||
* when multiple local machine checks happened on different CPUs.
|
||||
*
|
||||
* MCE handler currently has no easy access to the error virtual address,
|
||||
* so this function walks page table to find it. The returned virtual address
|
||||
* is proper in most cases, but it could be wrong when the application
|
||||
* process has multiple entries mapping the error page.
|
||||
*/
|
||||
static int kill_accessing_process(struct task_struct *p, unsigned long pfn,
|
||||
int flags)
|
||||
{
|
||||
int ret;
|
||||
struct hwp_walk priv = {
|
||||
.pfn = pfn,
|
||||
};
|
||||
priv.tk.tsk = p;
|
||||
|
||||
mmap_read_lock(p->mm);
|
||||
ret = walk_page_range(p->mm, 0, TASK_SIZE, &hwp_walk_ops,
|
||||
(void *)&priv);
|
||||
if (ret == 1 && priv.tk.addr)
|
||||
kill_proc(&priv.tk, pfn, flags);
|
||||
mmap_read_unlock(p->mm);
|
||||
return ret ? -EFAULT : -EHWPOISON;
|
||||
}
|
||||
|
||||
static const char *action_name[] = {
|
||||
[MF_IGNORED] = "Ignored",
|
||||
[MF_FAILED] = "Failed",
|
||||
@ -1267,7 +1410,10 @@ static int memory_failure_hugetlb(unsigned long pfn, int flags)
|
||||
if (TestSetPageHWPoison(head)) {
|
||||
pr_err("Memory failure: %#lx: already hardware poisoned\n",
|
||||
pfn);
|
||||
return -EHWPOISON;
|
||||
res = -EHWPOISON;
|
||||
if (flags & MF_ACTION_REQUIRED)
|
||||
res = kill_accessing_process(current, page_to_pfn(head), flags);
|
||||
return res;
|
||||
}
|
||||
|
||||
num_poisoned_pages_inc();
|
||||
@ -1476,6 +1622,8 @@ try_again:
|
||||
pr_err("Memory failure: %#lx: already hardware poisoned\n",
|
||||
pfn);
|
||||
res = -EHWPOISON;
|
||||
if (flags & MF_ACTION_REQUIRED)
|
||||
res = kill_accessing_process(current, pfn, flags);
|
||||
goto unlock_mutex;
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user