In the LBR call stack mode, LBR information is used to reconstruct a call stack. To get the complete call stack, perf has to save/restore all LBR registers during a context switch. Due to a large number of the LBR registers, this process causes a high CPU overhead. To reduce the CPU overhead during a context switch, use the XSAVES/XRSTORS instructions. Every XSAVE area must follow a canonical format: the legacy region, an XSAVE header and the extended region. Although the LBR information is only kept in the extended region, a space for the legacy region and XSAVE header is still required. Add a new dedicated structure for LBR XSAVES support. Before enabling XSAVES support, the size of the LBR state has to be sanity checked, because: - the size of the software structure is calculated from the max number of the LBR depth, which is enumerated by the CPUID leaf for Arch LBR. The size of the LBR state is enumerated by the CPUID leaf for XSAVE support of Arch LBR. If the values from the two CPUID leaves are not consistent, it may trigger a buffer overflow. For example, a hypervisor may unconsciously set inconsistent values for the two emulated CPUID. - unlike other state components, the size of an LBR state depends on the max number of LBRs, which may vary from generation to generation. Expose the function xfeature_size() for the sanity check. The LBR XSAVES support will be disabled if the size of the LBR state enumerated by CPUID doesn't match with the size of the software structure. The XSAVE instruction requires 64-byte alignment for state buffers. A new macro is added to reflect the alignment requirement. A 64-byte aligned kmem_cache is created for architecture LBR. Currently, the structure for each state component is maintained in fpu/types.h. The structure for the new LBR state component should be maintained in the same place. Move structure lbr_entry to fpu/types.h as well for broader sharing. Add dedicated lbr_save/lbr_restore functions for LBR XSAVES support, which invokes the corresponding xstate helpers to XSAVES/XRSTORS LBR information at the context switch when the call stack mode is enabled. Since the XSAVES/XRSTORS instructions will be eventually invoked, the dedicated functions is named with '_xsaves'/'_xrstors' postfix. Signed-off-by: Kan Liang <kan.liang@linux.intel.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Reviewed-by: Dave Hansen <dave.hansen@intel.com> Link: https://lkml.kernel.org/r/1593780569-62993-23-git-send-email-kan.liang@linux.intel.com
120 lines
4.1 KiB
C
120 lines
4.1 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
#ifndef __ASM_X86_XSAVE_H
|
|
#define __ASM_X86_XSAVE_H
|
|
|
|
#include <linux/uaccess.h>
|
|
#include <linux/types.h>
|
|
|
|
#include <asm/processor.h>
|
|
#include <asm/user.h>
|
|
|
|
/* Bit 63 of XCR0 is reserved for future expansion */
|
|
#define XFEATURE_MASK_EXTEND (~(XFEATURE_MASK_FPSSE | (1ULL << 63)))
|
|
|
|
#define XSTATE_CPUID 0x0000000d
|
|
|
|
#define FXSAVE_SIZE 512
|
|
|
|
#define XSAVE_HDR_SIZE 64
|
|
#define XSAVE_HDR_OFFSET FXSAVE_SIZE
|
|
|
|
#define XSAVE_YMM_SIZE 256
|
|
#define XSAVE_YMM_OFFSET (XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET)
|
|
|
|
#define XSAVE_ALIGNMENT 64
|
|
|
|
/* All currently supported user features */
|
|
#define XFEATURE_MASK_USER_SUPPORTED (XFEATURE_MASK_FP | \
|
|
XFEATURE_MASK_SSE | \
|
|
XFEATURE_MASK_YMM | \
|
|
XFEATURE_MASK_OPMASK | \
|
|
XFEATURE_MASK_ZMM_Hi256 | \
|
|
XFEATURE_MASK_Hi16_ZMM | \
|
|
XFEATURE_MASK_PKRU | \
|
|
XFEATURE_MASK_BNDREGS | \
|
|
XFEATURE_MASK_BNDCSR)
|
|
|
|
/* All currently supported supervisor features */
|
|
#define XFEATURE_MASK_SUPERVISOR_SUPPORTED (0)
|
|
|
|
/*
|
|
* A supervisor state component may not always contain valuable information,
|
|
* and its size may be huge. Saving/restoring such supervisor state components
|
|
* at each context switch can cause high CPU and space overhead, which should
|
|
* be avoided. Such supervisor state components should only be saved/restored
|
|
* on demand. The on-demand dynamic supervisor features are set in this mask.
|
|
*
|
|
* Unlike the existing supported supervisor features, a dynamic supervisor
|
|
* feature does not allocate a buffer in task->fpu, and the corresponding
|
|
* supervisor state component cannot be saved/restored at each context switch.
|
|
*
|
|
* To support a dynamic supervisor feature, a developer should follow the
|
|
* dos and don'ts as below:
|
|
* - Do dynamically allocate a buffer for the supervisor state component.
|
|
* - Do manually invoke the XSAVES/XRSTORS instruction to save/restore the
|
|
* state component to/from the buffer.
|
|
* - Don't set the bit corresponding to the dynamic supervisor feature in
|
|
* IA32_XSS at run time, since it has been set at boot time.
|
|
*/
|
|
#define XFEATURE_MASK_DYNAMIC (XFEATURE_MASK_LBR)
|
|
|
|
/*
|
|
* Unsupported supervisor features. When a supervisor feature in this mask is
|
|
* supported in the future, move it to the supported supervisor feature mask.
|
|
*/
|
|
#define XFEATURE_MASK_SUPERVISOR_UNSUPPORTED (XFEATURE_MASK_PT)
|
|
|
|
/* All supervisor states including supported and unsupported states. */
|
|
#define XFEATURE_MASK_SUPERVISOR_ALL (XFEATURE_MASK_SUPERVISOR_SUPPORTED | \
|
|
XFEATURE_MASK_DYNAMIC | \
|
|
XFEATURE_MASK_SUPERVISOR_UNSUPPORTED)
|
|
|
|
#ifdef CONFIG_X86_64
|
|
#define REX_PREFIX "0x48, "
|
|
#else
|
|
#define REX_PREFIX
|
|
#endif
|
|
|
|
extern u64 xfeatures_mask_all;
|
|
|
|
static inline u64 xfeatures_mask_supervisor(void)
|
|
{
|
|
return xfeatures_mask_all & XFEATURE_MASK_SUPERVISOR_SUPPORTED;
|
|
}
|
|
|
|
static inline u64 xfeatures_mask_user(void)
|
|
{
|
|
return xfeatures_mask_all & XFEATURE_MASK_USER_SUPPORTED;
|
|
}
|
|
|
|
static inline u64 xfeatures_mask_dynamic(void)
|
|
{
|
|
if (!boot_cpu_has(X86_FEATURE_ARCH_LBR))
|
|
return XFEATURE_MASK_DYNAMIC & ~XFEATURE_MASK_LBR;
|
|
|
|
return XFEATURE_MASK_DYNAMIC;
|
|
}
|
|
|
|
extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS];
|
|
|
|
extern void __init update_regset_xstate_info(unsigned int size,
|
|
u64 xstate_mask);
|
|
|
|
void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr);
|
|
const void *get_xsave_field_ptr(int xfeature_nr);
|
|
int using_compacted_format(void);
|
|
int xfeature_size(int xfeature_nr);
|
|
int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int offset, unsigned int size);
|
|
int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned int offset, unsigned int size);
|
|
int copy_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf);
|
|
int copy_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf);
|
|
void copy_supervisor_to_kernel(struct xregs_state *xsave);
|
|
void copy_dynamic_supervisor_to_kernel(struct xregs_state *xstate, u64 mask);
|
|
void copy_kernel_to_dynamic_supervisor(struct xregs_state *xstate, u64 mask);
|
|
|
|
|
|
/* Validate an xstate header supplied by userspace (ptrace or sigreturn) */
|
|
int validate_user_xstate_header(const struct xstate_header *hdr);
|
|
|
|
#endif
|