2ce0bdfebc
Syzkaller reported the following issue: kernel BUG at mm/khugepaged.c:1823! invalid opcode: 0000 [#1] PREEMPT SMP KASAN CPU: 1 PID: 5097 Comm: syz-executor220 Not tainted 6.2.0-syzkaller-13154-g857f1268a591 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 02/16/2023 RIP: 0010:collapse_file mm/khugepaged.c:1823 [inline] RIP: 0010:hpage_collapse_scan_file+0x67c8/0x7580 mm/khugepaged.c:2233 Code: 00 00 89 de e8 c9 66 a3 ff 31 ff 89 de e8 c0 66 a3 ff 45 84 f6 0f 85 28 0d 00 00 e8 22 64 a3 ff e9 dc f7 ff ff e8 18 64 a3 ff <0f> 0b f3 0f 1e fa e8 0d 64 a3 ff e9 93 f6 ff ff f3 0f 1e fa 4c 89 RSP: 0018:ffffc90003dff4e0 EFLAGS: 00010093 RAX: ffffffff81e95988 RBX: 00000000000001c1 RCX: ffff8880205b3a80 RDX: 0000000000000000 RSI: 00000000000001c0 RDI: 00000000000001c1 RBP: ffffc90003dff830 R08: ffffffff81e90e67 R09: fffffbfff1a433c3 R10: 0000000000000000 R11: dffffc0000000001 R12: 0000000000000000 R13: ffffc90003dff6c0 R14: 00000000000001c0 R15: 0000000000000000 FS: 00007fdbae5ee700(0000) GS:ffff8880b9900000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fdbae6901e0 CR3: 000000007b2dd000 CR4: 00000000003506e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: <TASK> madvise_collapse+0x721/0xf50 mm/khugepaged.c:2693 madvise_vma_behavior mm/madvise.c:1086 [inline] madvise_walk_vmas mm/madvise.c:1260 [inline] do_madvise+0x9e5/0x4680 mm/madvise.c:1439 __do_sys_madvise mm/madvise.c:1452 [inline] __se_sys_madvise mm/madvise.c:1450 [inline] __x64_sys_madvise+0xa5/0xb0 mm/madvise.c:1450 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd The xas_store() call during page cache scanning can potentially translate 'xas' into the error state (with the reproducer provided by the syzkaller the error code is -ENOMEM). However, there are no further checks after the 'xas_store', and the next call of 'xas_next' at the start of the scanning cycle doesn't increase the xa_index, and the issue occurs. This patch will add the xarray state error checking after the xas_store() and the corresponding result error code. Tested via syzbot. [akpm@linux-foundation.org: update include/trace/events/huge_memory.h's SCAN_STATUS] Link: https://lkml.kernel.org/r/20230329145330.23191-1-ivan.orlov0322@gmail.com Link: https://syzkaller.appspot.com/bug?id=7d6bb3760e026ece7524500fe44fb024a0e959fc Signed-off-by: Ivan Orlov <ivan.orlov0322@gmail.com> Reported-by: syzbot+9578faa5475acb35fa50@syzkaller.appspotmail.com Tested-by: Zach O'Keefe <zokeefe@google.com> Cc: Yang Shi <shy828301@gmail.com> Cc: Himadri Pandya <himadrispandya@gmail.com> Cc: Ivan Orlov <ivan.orlov0322@gmail.com> Cc: Shuah Khan <skhan@linuxfoundation.org> Cc: Song Liu <songliubraving@fb.com> Cc: Rik van Riel <riel@surriel.com> Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
247 lines
6.5 KiB
C
247 lines
6.5 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
#undef TRACE_SYSTEM
|
|
#define TRACE_SYSTEM huge_memory
|
|
|
|
#if !defined(__HUGE_MEMORY_H) || defined(TRACE_HEADER_MULTI_READ)
|
|
#define __HUGE_MEMORY_H
|
|
|
|
#include <linux/tracepoint.h>
|
|
|
|
#define SCAN_STATUS \
|
|
EM( SCAN_FAIL, "failed") \
|
|
EM( SCAN_SUCCEED, "succeeded") \
|
|
EM( SCAN_PMD_NULL, "pmd_null") \
|
|
EM( SCAN_PMD_NONE, "pmd_none") \
|
|
EM( SCAN_PMD_MAPPED, "page_pmd_mapped") \
|
|
EM( SCAN_EXCEED_NONE_PTE, "exceed_none_pte") \
|
|
EM( SCAN_EXCEED_SWAP_PTE, "exceed_swap_pte") \
|
|
EM( SCAN_EXCEED_SHARED_PTE, "exceed_shared_pte") \
|
|
EM( SCAN_PTE_NON_PRESENT, "pte_non_present") \
|
|
EM( SCAN_PTE_UFFD_WP, "pte_uffd_wp") \
|
|
EM( SCAN_PTE_MAPPED_HUGEPAGE, "pte_mapped_hugepage") \
|
|
EM( SCAN_PAGE_RO, "no_writable_page") \
|
|
EM( SCAN_LACK_REFERENCED_PAGE, "lack_referenced_page") \
|
|
EM( SCAN_PAGE_NULL, "page_null") \
|
|
EM( SCAN_SCAN_ABORT, "scan_aborted") \
|
|
EM( SCAN_PAGE_COUNT, "not_suitable_page_count") \
|
|
EM( SCAN_PAGE_LRU, "page_not_in_lru") \
|
|
EM( SCAN_PAGE_LOCK, "page_locked") \
|
|
EM( SCAN_PAGE_ANON, "page_not_anon") \
|
|
EM( SCAN_PAGE_COMPOUND, "page_compound") \
|
|
EM( SCAN_ANY_PROCESS, "no_process_for_page") \
|
|
EM( SCAN_VMA_NULL, "vma_null") \
|
|
EM( SCAN_VMA_CHECK, "vma_check_failed") \
|
|
EM( SCAN_ADDRESS_RANGE, "not_suitable_address_range") \
|
|
EM( SCAN_DEL_PAGE_LRU, "could_not_delete_page_from_lru")\
|
|
EM( SCAN_ALLOC_HUGE_PAGE_FAIL, "alloc_huge_page_failed") \
|
|
EM( SCAN_CGROUP_CHARGE_FAIL, "ccgroup_charge_failed") \
|
|
EM( SCAN_TRUNCATED, "truncated") \
|
|
EM( SCAN_PAGE_HAS_PRIVATE, "page_has_private") \
|
|
EMe(SCAN_STORE_FAILED, "store_failed")
|
|
|
|
#undef EM
|
|
#undef EMe
|
|
#define EM(a, b) TRACE_DEFINE_ENUM(a);
|
|
#define EMe(a, b) TRACE_DEFINE_ENUM(a);
|
|
|
|
SCAN_STATUS
|
|
|
|
#undef EM
|
|
#undef EMe
|
|
#define EM(a, b) {a, b},
|
|
#define EMe(a, b) {a, b}
|
|
|
|
TRACE_EVENT(mm_khugepaged_scan_pmd,
|
|
|
|
TP_PROTO(struct mm_struct *mm, struct page *page, bool writable,
|
|
int referenced, int none_or_zero, int status, int unmapped),
|
|
|
|
TP_ARGS(mm, page, writable, referenced, none_or_zero, status, unmapped),
|
|
|
|
TP_STRUCT__entry(
|
|
__field(struct mm_struct *, mm)
|
|
__field(unsigned long, pfn)
|
|
__field(bool, writable)
|
|
__field(int, referenced)
|
|
__field(int, none_or_zero)
|
|
__field(int, status)
|
|
__field(int, unmapped)
|
|
),
|
|
|
|
TP_fast_assign(
|
|
__entry->mm = mm;
|
|
__entry->pfn = page ? page_to_pfn(page) : -1;
|
|
__entry->writable = writable;
|
|
__entry->referenced = referenced;
|
|
__entry->none_or_zero = none_or_zero;
|
|
__entry->status = status;
|
|
__entry->unmapped = unmapped;
|
|
),
|
|
|
|
TP_printk("mm=%p, scan_pfn=0x%lx, writable=%d, referenced=%d, none_or_zero=%d, status=%s, unmapped=%d",
|
|
__entry->mm,
|
|
__entry->pfn,
|
|
__entry->writable,
|
|
__entry->referenced,
|
|
__entry->none_or_zero,
|
|
__print_symbolic(__entry->status, SCAN_STATUS),
|
|
__entry->unmapped)
|
|
);
|
|
|
|
TRACE_EVENT(mm_collapse_huge_page,
|
|
|
|
TP_PROTO(struct mm_struct *mm, int isolated, int status),
|
|
|
|
TP_ARGS(mm, isolated, status),
|
|
|
|
TP_STRUCT__entry(
|
|
__field(struct mm_struct *, mm)
|
|
__field(int, isolated)
|
|
__field(int, status)
|
|
),
|
|
|
|
TP_fast_assign(
|
|
__entry->mm = mm;
|
|
__entry->isolated = isolated;
|
|
__entry->status = status;
|
|
),
|
|
|
|
TP_printk("mm=%p, isolated=%d, status=%s",
|
|
__entry->mm,
|
|
__entry->isolated,
|
|
__print_symbolic(__entry->status, SCAN_STATUS))
|
|
);
|
|
|
|
TRACE_EVENT(mm_collapse_huge_page_isolate,
|
|
|
|
TP_PROTO(struct page *page, int none_or_zero,
|
|
int referenced, bool writable, int status),
|
|
|
|
TP_ARGS(page, none_or_zero, referenced, writable, status),
|
|
|
|
TP_STRUCT__entry(
|
|
__field(unsigned long, pfn)
|
|
__field(int, none_or_zero)
|
|
__field(int, referenced)
|
|
__field(bool, writable)
|
|
__field(int, status)
|
|
),
|
|
|
|
TP_fast_assign(
|
|
__entry->pfn = page ? page_to_pfn(page) : -1;
|
|
__entry->none_or_zero = none_or_zero;
|
|
__entry->referenced = referenced;
|
|
__entry->writable = writable;
|
|
__entry->status = status;
|
|
),
|
|
|
|
TP_printk("scan_pfn=0x%lx, none_or_zero=%d, referenced=%d, writable=%d, status=%s",
|
|
__entry->pfn,
|
|
__entry->none_or_zero,
|
|
__entry->referenced,
|
|
__entry->writable,
|
|
__print_symbolic(__entry->status, SCAN_STATUS))
|
|
);
|
|
|
|
TRACE_EVENT(mm_collapse_huge_page_swapin,
|
|
|
|
TP_PROTO(struct mm_struct *mm, int swapped_in, int referenced, int ret),
|
|
|
|
TP_ARGS(mm, swapped_in, referenced, ret),
|
|
|
|
TP_STRUCT__entry(
|
|
__field(struct mm_struct *, mm)
|
|
__field(int, swapped_in)
|
|
__field(int, referenced)
|
|
__field(int, ret)
|
|
),
|
|
|
|
TP_fast_assign(
|
|
__entry->mm = mm;
|
|
__entry->swapped_in = swapped_in;
|
|
__entry->referenced = referenced;
|
|
__entry->ret = ret;
|
|
),
|
|
|
|
TP_printk("mm=%p, swapped_in=%d, referenced=%d, ret=%d",
|
|
__entry->mm,
|
|
__entry->swapped_in,
|
|
__entry->referenced,
|
|
__entry->ret)
|
|
);
|
|
|
|
TRACE_EVENT(mm_khugepaged_scan_file,
|
|
|
|
TP_PROTO(struct mm_struct *mm, struct page *page, struct file *file,
|
|
int present, int swap, int result),
|
|
|
|
TP_ARGS(mm, page, file, present, swap, result),
|
|
|
|
TP_STRUCT__entry(
|
|
__field(struct mm_struct *, mm)
|
|
__field(unsigned long, pfn)
|
|
__string(filename, file->f_path.dentry->d_iname)
|
|
__field(int, present)
|
|
__field(int, swap)
|
|
__field(int, result)
|
|
),
|
|
|
|
TP_fast_assign(
|
|
__entry->mm = mm;
|
|
__entry->pfn = page ? page_to_pfn(page) : -1;
|
|
__assign_str(filename, file->f_path.dentry->d_iname);
|
|
__entry->present = present;
|
|
__entry->swap = swap;
|
|
__entry->result = result;
|
|
),
|
|
|
|
TP_printk("mm=%p, scan_pfn=0x%lx, filename=%s, present=%d, swap=%d, result=%s",
|
|
__entry->mm,
|
|
__entry->pfn,
|
|
__get_str(filename),
|
|
__entry->present,
|
|
__entry->swap,
|
|
__print_symbolic(__entry->result, SCAN_STATUS))
|
|
);
|
|
|
|
TRACE_EVENT(mm_khugepaged_collapse_file,
|
|
TP_PROTO(struct mm_struct *mm, struct page *hpage, pgoff_t index,
|
|
bool is_shmem, unsigned long addr, struct file *file,
|
|
int nr, int result),
|
|
TP_ARGS(mm, hpage, index, addr, is_shmem, file, nr, result),
|
|
TP_STRUCT__entry(
|
|
__field(struct mm_struct *, mm)
|
|
__field(unsigned long, hpfn)
|
|
__field(pgoff_t, index)
|
|
__field(unsigned long, addr)
|
|
__field(bool, is_shmem)
|
|
__string(filename, file->f_path.dentry->d_iname)
|
|
__field(int, nr)
|
|
__field(int, result)
|
|
),
|
|
|
|
TP_fast_assign(
|
|
__entry->mm = mm;
|
|
__entry->hpfn = hpage ? page_to_pfn(hpage) : -1;
|
|
__entry->index = index;
|
|
__entry->addr = addr;
|
|
__entry->is_shmem = is_shmem;
|
|
__assign_str(filename, file->f_path.dentry->d_iname);
|
|
__entry->nr = nr;
|
|
__entry->result = result;
|
|
),
|
|
|
|
TP_printk("mm=%p, hpage_pfn=0x%lx, index=%ld, addr=%ld, is_shmem=%d, filename=%s, nr=%d, result=%s",
|
|
__entry->mm,
|
|
__entry->hpfn,
|
|
__entry->index,
|
|
__entry->addr,
|
|
__entry->is_shmem,
|
|
__get_str(filename),
|
|
__entry->nr,
|
|
__print_symbolic(__entry->result, SCAN_STATUS))
|
|
);
|
|
|
|
#endif /* __HUGE_MEMORY_H */
|
|
#include <trace/define_trace.h>
|