b0b592cf08
Since e2a1256b17b1 ("x86/speculation: Restore speculation related MSRs during S3 resume") kmemleak reports this issue: unreferenced object 0xffff888009cedc00 (size 256): comm "swapper/0", pid 1, jiffies 4294693823 (age 73.764s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 48 00 00 00 00 00 00 00 ........H....... 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: msr_build_context (include/linux/slab.h:621) pm_check_save_msr (arch/x86/power/cpu.c:520) do_one_initcall (init/main.c:1298) kernel_init_freeable (init/main.c:1370) kernel_init (init/main.c:1504) ret_from_fork (arch/x86/entry/entry_64.S:304) Reproducer: - boot the VM with a debug kernel config (see https://github.com/multipath-tcp/mptcp_net-next/issues/268) - wait ~1 minute - start a kmemleak scan The root cause here is alignment within the packed struct saved_context (from suspend_64.h). Kmemleak only searches for pointers that are aligned (see how pointers are scanned in kmemleak.c), but pahole shows that the saved_msrs struct member and all members after it in the structure are unaligned: struct saved_context { struct pt_regs regs; /* 0 168 */ /* --- cacheline 2 boundary (128 bytes) was 40 bytes ago --- */ u16 ds; /* 168 2 */ ... u64 misc_enable; /* 232 8 */ bool misc_enable_saved; /* 240 1 */ /* Note below odd offset values for the remainder of this struct */ struct saved_msrs saved_msrs; /* 241 16 */ /* --- cacheline 4 boundary (256 bytes) was 1 bytes ago --- */ long unsigned int efer; /* 257 8 */ u16 gdt_pad; /* 265 2 */ struct desc_ptr gdt_desc; /* 267 10 */ u16 idt_pad; /* 277 2 */ struct desc_ptr idt; /* 279 10 */ u16 ldt; /* 289 2 */ u16 tss; /* 291 2 */ long unsigned int tr; /* 293 8 */ long unsigned int safety; /* 301 8 */ long unsigned int return_address; /* 309 8 */ /* size: 317, cachelines: 5, members: 25 */ /* last cacheline: 61 bytes */ } __attribute__((__packed__)); Move misc_enable_saved to the end of the struct declaration so that saved_msrs fits in before the cacheline 4 boundary. The comment above the saved_context declaration says to fix wakeup_64.S file and __save/__restore_processor_state() if the struct is modified: it looks like all the accesses in wakeup_64.S are done through offsets which are computed at build-time. Update that comment accordingly. At the end, the false positive kmemleak report is due to a limitation from kmemleak but it is always good to avoid unaligned members for optimisation purposes. Please note that it looks like this issue is not new, e.g. https://lore.kernel.org/all/9f1bb619-c4ee-21c4-a251-870bd4db04fa@lwfinger.net/ https://lore.kernel.org/all/94e48fcd-1dbd-ebd2-4c91-f39941735909@molgen.mpg.de/ [ bp: Massage + cleanup commit message. ] Fixes: 7a9c2dd08ead ("x86/pm: Introduce quirk framework to save/restore extra MSR registers around suspend/resume") Suggested-by: Mat Martineau <mathew.j.martineau@linux.intel.com> Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net> Signed-off-by: Borislav Petkov <bp@suse.de> Reviewed-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Link: https://lore.kernel.org/r/20220426202138.498310-1-matthieu.baerts@tessares.net
65 lines
1.8 KiB
C
65 lines
1.8 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
/*
|
|
* Copyright 2001-2003 Pavel Machek <pavel@suse.cz>
|
|
* Based on code
|
|
* Copyright 2001 Patrick Mochel <mochel@osdl.org>
|
|
*/
|
|
#ifndef _ASM_X86_SUSPEND_64_H
|
|
#define _ASM_X86_SUSPEND_64_H
|
|
|
|
#include <asm/desc.h>
|
|
#include <asm/fpu/api.h>
|
|
|
|
/*
|
|
* Image of the saved processor state, used by the low level ACPI suspend to
|
|
* RAM code and by the low level hibernation code.
|
|
*
|
|
* If you modify it, check how it is used in arch/x86/kernel/acpi/wakeup_64.S
|
|
* and make sure that __save/__restore_processor_state(), defined in
|
|
* arch/x86/power/cpu.c, still work as required.
|
|
*
|
|
* Because the structure is packed, make sure to avoid unaligned members. For
|
|
* optimisation purposes but also because tools like kmemleak only search for
|
|
* pointers that are aligned.
|
|
*/
|
|
struct saved_context {
|
|
struct pt_regs regs;
|
|
|
|
/*
|
|
* User CS and SS are saved in current_pt_regs(). The rest of the
|
|
* segment selectors need to be saved and restored here.
|
|
*/
|
|
u16 ds, es, fs, gs;
|
|
|
|
/*
|
|
* Usermode FSBASE and GSBASE may not match the fs and gs selectors,
|
|
* so we save them separately. We save the kernelmode GSBASE to
|
|
* restore percpu access after resume.
|
|
*/
|
|
unsigned long kernelmode_gs_base, usermode_gs_base, fs_base;
|
|
|
|
unsigned long cr0, cr2, cr3, cr4;
|
|
u64 misc_enable;
|
|
struct saved_msrs saved_msrs;
|
|
unsigned long efer;
|
|
u16 gdt_pad; /* Unused */
|
|
struct desc_ptr gdt_desc;
|
|
u16 idt_pad;
|
|
struct desc_ptr idt;
|
|
u16 ldt;
|
|
u16 tss;
|
|
unsigned long tr;
|
|
unsigned long safety;
|
|
unsigned long return_address;
|
|
bool misc_enable_saved;
|
|
} __attribute__((packed));
|
|
|
|
#define loaddebug(thread,register) \
|
|
set_debugreg((thread)->debugreg##register, register)
|
|
|
|
/* routines for saving/restoring kernel state */
|
|
extern char core_restore_code[];
|
|
extern char restore_registers[];
|
|
|
|
#endif /* _ASM_X86_SUSPEND_64_H */
|