perf tools changes for v5.15: 2nd batch
- Add missing fields and remove some duplicate fields when printing a perf_event_attr. - Fix hybrid config terms list corruption. - Update kernel header copies, some resulted in new kernel features being automagically added to 'perf trace' syscall/tracepoint argument id->string translators. - Add a file generated during the documentation build to .gitignore. - Add an option to build without libbfd, as some distros, like Debian consider its ABI unstable. - Add support to print a textual representation of IBS raw sample data in 'perf report'. - Fix bpf 'perf test' sample mismatch reporting - Fix passing arguments to stackcollapse report in a 'perf script' python script. - Allow build-id with trailing zeros. - Look for ImageBase in PE file to compute .text offset. Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> -----BEGIN PGP SIGNATURE----- iHUEABYIAB0WIQR2GiIUctdOfX2qHhGyPKLppCJ+JwUCYT0+hwAKCRCyPKLppCJ+ JxcPAQDO+iCKK/sF3TVN8f0T8xkFD6y8krBXPAtQHCAhVBeiqAD9F4R0VMX6nwy3 8rJnsNd2ODjywgFBO4uPy0N2fxBWjwo= =/hH1 -----END PGP SIGNATURE----- Merge tag 'perf-tools-for-v5.15-2021-09-11' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux Pull more perf tools updates from Arnaldo Carvalho de Melo: - Add missing fields and remove some duplicate fields when printing a perf_event_attr. - Fix hybrid config terms list corruption. - Update kernel header copies, some resulted in new kernel features being automagically added to 'perf trace' syscall/tracepoint argument id->string translators. - Add a file generated during the documentation build to .gitignore. - Add an option to build without libbfd, as some distros, like Debian consider its ABI unstable. - Add support to print a textual representation of IBS raw sample data in 'perf report'. - Fix bpf 'perf test' sample mismatch reporting - Fix passing arguments to stackcollapse report in a 'perf script' python script. - Allow build-id with trailing zeros. - Look for ImageBase in PE file to compute .text offset. * tag 'perf-tools-for-v5.15-2021-09-11' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux: (25 commits) tools headers UAPI: Update tools's copy of drm.h headers tools headers UAPI: Sync drm/i915_drm.h with the kernel sources tools headers UAPI: Sync linux/fs.h with the kernel sources tools headers UAPI: Sync linux/in.h copy with the kernel sources perf tools: Add an option to build without libbfd perf tools: Allow build-id with trailing zeros perf tools: Fix hybrid config terms list corruption perf tools: Factor out copy_config_terms() and free_config_terms() perf tools: Fix perf_event_attr__fprintf() missing/dupl. fields perf tools: Ignore Documentation dependency file perf bpf: Provide a weak btf__load_from_kernel_by_id() for older libbpf versions tools include UAPI: Update linux/mount.h copy perf beauty: Cover more flags in the move_mount syscall argument beautifier tools headers UAPI: Sync linux/prctl.h with the kernel sources tools include UAPI: Sync sound/asound.h copy with the kernel sources tools headers UAPI: Sync linux/kvm.h with the kernel sources tools headers UAPI: Sync x86's asm/kvm.h with the kernel sources perf report: Add support to print a textual representation of IBS raw sample data perf report: Add tools/arch/x86/include/asm/amd-ibs.h perf env: Add perf_env__cpuid, perf_env__{nr_}pmu_mappings ...
This commit is contained in:
commit
b5b65f1398
132
tools/arch/x86/include/asm/amd-ibs.h
Normal file
132
tools/arch/x86/include/asm/amd-ibs.h
Normal file
@ -0,0 +1,132 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/*
|
||||
* From PPR Vol 1 for AMD Family 19h Model 01h B1
|
||||
* 55898 Rev 0.35 - Feb 5, 2021
|
||||
*/
|
||||
|
||||
#include "msr-index.h"
|
||||
|
||||
/*
|
||||
* IBS Hardware MSRs
|
||||
*/
|
||||
|
||||
/* MSR 0xc0011030: IBS Fetch Control */
|
||||
union ibs_fetch_ctl {
|
||||
__u64 val;
|
||||
struct {
|
||||
__u64 fetch_maxcnt:16,/* 0-15: instruction fetch max. count */
|
||||
fetch_cnt:16, /* 16-31: instruction fetch count */
|
||||
fetch_lat:16, /* 32-47: instruction fetch latency */
|
||||
fetch_en:1, /* 48: instruction fetch enable */
|
||||
fetch_val:1, /* 49: instruction fetch valid */
|
||||
fetch_comp:1, /* 50: instruction fetch complete */
|
||||
ic_miss:1, /* 51: i-cache miss */
|
||||
phy_addr_valid:1,/* 52: physical address valid */
|
||||
l1tlb_pgsz:2, /* 53-54: i-cache L1TLB page size
|
||||
* (needs IbsPhyAddrValid) */
|
||||
l1tlb_miss:1, /* 55: i-cache fetch missed in L1TLB */
|
||||
l2tlb_miss:1, /* 56: i-cache fetch missed in L2TLB */
|
||||
rand_en:1, /* 57: random tagging enable */
|
||||
fetch_l2_miss:1,/* 58: L2 miss for sampled fetch
|
||||
* (needs IbsFetchComp) */
|
||||
reserved:5; /* 59-63: reserved */
|
||||
};
|
||||
};
|
||||
|
||||
/* MSR 0xc0011033: IBS Execution Control */
|
||||
union ibs_op_ctl {
|
||||
__u64 val;
|
||||
struct {
|
||||
__u64 opmaxcnt:16, /* 0-15: periodic op max. count */
|
||||
reserved0:1, /* 16: reserved */
|
||||
op_en:1, /* 17: op sampling enable */
|
||||
op_val:1, /* 18: op sample valid */
|
||||
cnt_ctl:1, /* 19: periodic op counter control */
|
||||
opmaxcnt_ext:7, /* 20-26: upper 7 bits of periodic op maximum count */
|
||||
reserved1:5, /* 27-31: reserved */
|
||||
opcurcnt:27, /* 32-58: periodic op counter current count */
|
||||
reserved2:5; /* 59-63: reserved */
|
||||
};
|
||||
};
|
||||
|
||||
/* MSR 0xc0011035: IBS Op Data 2 */
|
||||
union ibs_op_data {
|
||||
__u64 val;
|
||||
struct {
|
||||
__u64 comp_to_ret_ctr:16, /* 0-15: op completion to retire count */
|
||||
tag_to_ret_ctr:16, /* 15-31: op tag to retire count */
|
||||
reserved1:2, /* 32-33: reserved */
|
||||
op_return:1, /* 34: return op */
|
||||
op_brn_taken:1, /* 35: taken branch op */
|
||||
op_brn_misp:1, /* 36: mispredicted branch op */
|
||||
op_brn_ret:1, /* 37: branch op retired */
|
||||
op_rip_invalid:1, /* 38: RIP is invalid */
|
||||
op_brn_fuse:1, /* 39: fused branch op */
|
||||
op_microcode:1, /* 40: microcode op */
|
||||
reserved2:23; /* 41-63: reserved */
|
||||
};
|
||||
};
|
||||
|
||||
/* MSR 0xc0011036: IBS Op Data 2 */
|
||||
union ibs_op_data2 {
|
||||
__u64 val;
|
||||
struct {
|
||||
__u64 data_src:3, /* 0-2: data source */
|
||||
reserved0:1, /* 3: reserved */
|
||||
rmt_node:1, /* 4: destination node */
|
||||
cache_hit_st:1, /* 5: cache hit state */
|
||||
reserved1:57; /* 5-63: reserved */
|
||||
};
|
||||
};
|
||||
|
||||
/* MSR 0xc0011037: IBS Op Data 3 */
|
||||
union ibs_op_data3 {
|
||||
__u64 val;
|
||||
struct {
|
||||
__u64 ld_op:1, /* 0: load op */
|
||||
st_op:1, /* 1: store op */
|
||||
dc_l1tlb_miss:1, /* 2: data cache L1TLB miss */
|
||||
dc_l2tlb_miss:1, /* 3: data cache L2TLB hit in 2M page */
|
||||
dc_l1tlb_hit_2m:1, /* 4: data cache L1TLB hit in 2M page */
|
||||
dc_l1tlb_hit_1g:1, /* 5: data cache L1TLB hit in 1G page */
|
||||
dc_l2tlb_hit_2m:1, /* 6: data cache L2TLB hit in 2M page */
|
||||
dc_miss:1, /* 7: data cache miss */
|
||||
dc_mis_acc:1, /* 8: misaligned access */
|
||||
reserved:4, /* 9-12: reserved */
|
||||
dc_wc_mem_acc:1, /* 13: write combining memory access */
|
||||
dc_uc_mem_acc:1, /* 14: uncacheable memory access */
|
||||
dc_locked_op:1, /* 15: locked operation */
|
||||
dc_miss_no_mab_alloc:1, /* 16: DC miss with no MAB allocated */
|
||||
dc_lin_addr_valid:1, /* 17: data cache linear address valid */
|
||||
dc_phy_addr_valid:1, /* 18: data cache physical address valid */
|
||||
dc_l2_tlb_hit_1g:1, /* 19: data cache L2 hit in 1GB page */
|
||||
l2_miss:1, /* 20: L2 cache miss */
|
||||
sw_pf:1, /* 21: software prefetch */
|
||||
op_mem_width:4, /* 22-25: load/store size in bytes */
|
||||
op_dc_miss_open_mem_reqs:6, /* 26-31: outstanding mem reqs on DC fill */
|
||||
dc_miss_lat:16, /* 32-47: data cache miss latency */
|
||||
tlb_refill_lat:16; /* 48-63: L1 TLB refill latency */
|
||||
};
|
||||
};
|
||||
|
||||
/* MSR 0xc001103c: IBS Fetch Control Extended */
|
||||
union ic_ibs_extd_ctl {
|
||||
__u64 val;
|
||||
struct {
|
||||
__u64 itlb_refill_lat:16, /* 0-15: ITLB Refill latency for sampled fetch */
|
||||
reserved:48; /* 16-63: reserved */
|
||||
};
|
||||
};
|
||||
|
||||
/*
|
||||
* IBS driver related
|
||||
*/
|
||||
|
||||
struct perf_ibs_data {
|
||||
u32 size;
|
||||
union {
|
||||
u32 data[0]; /* data buffer starts here */
|
||||
u32 caps;
|
||||
};
|
||||
u64 regs[MSR_AMD64_IBS_REG_COUNT_MAX];
|
||||
};
|
@ -295,6 +295,7 @@ struct kvm_debug_exit_arch {
|
||||
#define KVM_GUESTDBG_USE_HW_BP 0x00020000
|
||||
#define KVM_GUESTDBG_INJECT_DB 0x00040000
|
||||
#define KVM_GUESTDBG_INJECT_BP 0x00080000
|
||||
#define KVM_GUESTDBG_BLOCKIRQ 0x00100000
|
||||
|
||||
/* for KVM_SET_GUEST_DEBUG */
|
||||
struct kvm_guest_debug_arch {
|
||||
|
@ -673,15 +673,15 @@ __SYSCALL(__NR_madvise, sys_madvise)
|
||||
#define __NR_remap_file_pages 234
|
||||
__SYSCALL(__NR_remap_file_pages, sys_remap_file_pages)
|
||||
#define __NR_mbind 235
|
||||
__SC_COMP(__NR_mbind, sys_mbind, compat_sys_mbind)
|
||||
__SYSCALL(__NR_mbind, sys_mbind)
|
||||
#define __NR_get_mempolicy 236
|
||||
__SC_COMP(__NR_get_mempolicy, sys_get_mempolicy, compat_sys_get_mempolicy)
|
||||
__SYSCALL(__NR_get_mempolicy, sys_get_mempolicy)
|
||||
#define __NR_set_mempolicy 237
|
||||
__SC_COMP(__NR_set_mempolicy, sys_set_mempolicy, compat_sys_set_mempolicy)
|
||||
__SYSCALL(__NR_set_mempolicy, sys_set_mempolicy)
|
||||
#define __NR_migrate_pages 238
|
||||
__SC_COMP(__NR_migrate_pages, sys_migrate_pages, compat_sys_migrate_pages)
|
||||
__SYSCALL(__NR_migrate_pages, sys_migrate_pages)
|
||||
#define __NR_move_pages 239
|
||||
__SC_COMP(__NR_move_pages, sys_move_pages, compat_sys_move_pages)
|
||||
__SYSCALL(__NR_move_pages, sys_move_pages)
|
||||
#endif
|
||||
|
||||
#define __NR_rt_tgsigqueueinfo 240
|
||||
@ -877,9 +877,11 @@ __SYSCALL(__NR_landlock_restrict_self, sys_landlock_restrict_self)
|
||||
#define __NR_memfd_secret 447
|
||||
__SYSCALL(__NR_memfd_secret, sys_memfd_secret)
|
||||
#endif
|
||||
#define __NR_process_mrelease 448
|
||||
__SYSCALL(__NR_process_mrelease, sys_process_mrelease)
|
||||
|
||||
#undef __NR_syscalls
|
||||
#define __NR_syscalls 448
|
||||
#define __NR_syscalls 449
|
||||
|
||||
/*
|
||||
* 32 bit systems traditionally used different
|
||||
|
@ -635,8 +635,8 @@ struct drm_gem_open {
|
||||
/**
|
||||
* DRM_CAP_VBLANK_HIGH_CRTC
|
||||
*
|
||||
* If set to 1, the kernel supports specifying a CRTC index in the high bits of
|
||||
* &drm_wait_vblank_request.type.
|
||||
* If set to 1, the kernel supports specifying a :ref:`CRTC index<crtc_index>`
|
||||
* in the high bits of &drm_wait_vblank_request.type.
|
||||
*
|
||||
* Starting kernel version 2.6.39, this capability is always set to 1.
|
||||
*/
|
||||
@ -1050,6 +1050,16 @@ extern "C" {
|
||||
#define DRM_IOCTL_MODE_GETPROPBLOB DRM_IOWR(0xAC, struct drm_mode_get_blob)
|
||||
#define DRM_IOCTL_MODE_GETFB DRM_IOWR(0xAD, struct drm_mode_fb_cmd)
|
||||
#define DRM_IOCTL_MODE_ADDFB DRM_IOWR(0xAE, struct drm_mode_fb_cmd)
|
||||
/**
|
||||
* DRM_IOCTL_MODE_RMFB - Remove a framebuffer.
|
||||
*
|
||||
* This removes a framebuffer previously added via ADDFB/ADDFB2. The IOCTL
|
||||
* argument is a framebuffer object ID.
|
||||
*
|
||||
* Warning: removing a framebuffer currently in-use on an enabled plane will
|
||||
* disable that plane. The CRTC the plane is linked to may also be disabled
|
||||
* (depending on driver capabilities).
|
||||
*/
|
||||
#define DRM_IOCTL_MODE_RMFB DRM_IOWR(0xAF, unsigned int)
|
||||
#define DRM_IOCTL_MODE_PAGE_FLIP DRM_IOWR(0xB0, struct drm_mode_crtc_page_flip)
|
||||
#define DRM_IOCTL_MODE_DIRTYFB DRM_IOWR(0xB1, struct drm_mode_fb_dirty_cmd)
|
||||
|
@ -572,6 +572,15 @@ typedef struct drm_i915_irq_wait {
|
||||
#define I915_SCHEDULER_CAP_PREEMPTION (1ul << 2)
|
||||
#define I915_SCHEDULER_CAP_SEMAPHORES (1ul << 3)
|
||||
#define I915_SCHEDULER_CAP_ENGINE_BUSY_STATS (1ul << 4)
|
||||
/*
|
||||
* Indicates the 2k user priority levels are statically mapped into 3 buckets as
|
||||
* follows:
|
||||
*
|
||||
* -1k to -1 Low priority
|
||||
* 0 Normal priority
|
||||
* 1 to 1k Highest priority
|
||||
*/
|
||||
#define I915_SCHEDULER_CAP_STATIC_PRIORITY_MAP (1ul << 5)
|
||||
|
||||
#define I915_PARAM_HUC_STATUS 42
|
||||
|
||||
@ -674,6 +683,9 @@ typedef struct drm_i915_irq_wait {
|
||||
*/
|
||||
#define I915_PARAM_HAS_EXEC_TIMELINE_FENCES 55
|
||||
|
||||
/* Query if the kernel supports the I915_USERPTR_PROBE flag. */
|
||||
#define I915_PARAM_HAS_USERPTR_PROBE 56
|
||||
|
||||
/* Must be kept compact -- no holes and well documented */
|
||||
|
||||
typedef struct drm_i915_getparam {
|
||||
@ -849,45 +861,113 @@ struct drm_i915_gem_mmap_gtt {
|
||||
__u64 offset;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct drm_i915_gem_mmap_offset - Retrieve an offset so we can mmap this buffer object.
|
||||
*
|
||||
* This struct is passed as argument to the `DRM_IOCTL_I915_GEM_MMAP_OFFSET` ioctl,
|
||||
* and is used to retrieve the fake offset to mmap an object specified by &handle.
|
||||
*
|
||||
* The legacy way of using `DRM_IOCTL_I915_GEM_MMAP` is removed on gen12+.
|
||||
* `DRM_IOCTL_I915_GEM_MMAP_GTT` is an older supported alias to this struct, but will behave
|
||||
* as setting the &extensions to 0, and &flags to `I915_MMAP_OFFSET_GTT`.
|
||||
*/
|
||||
struct drm_i915_gem_mmap_offset {
|
||||
/** Handle for the object being mapped. */
|
||||
/** @handle: Handle for the object being mapped. */
|
||||
__u32 handle;
|
||||
/** @pad: Must be zero */
|
||||
__u32 pad;
|
||||
/**
|
||||
* Fake offset to use for subsequent mmap call
|
||||
* @offset: The fake offset to use for subsequent mmap call
|
||||
*
|
||||
* This is a fixed-size type for 32/64 compatibility.
|
||||
*/
|
||||
__u64 offset;
|
||||
|
||||
/**
|
||||
* Flags for extended behaviour.
|
||||
* @flags: Flags for extended behaviour.
|
||||
*
|
||||
* It is mandatory that one of the MMAP_OFFSET types
|
||||
* (GTT, WC, WB, UC, etc) should be included.
|
||||
* It is mandatory that one of the `MMAP_OFFSET` types
|
||||
* should be included:
|
||||
*
|
||||
* - `I915_MMAP_OFFSET_GTT`: Use mmap with the object bound to GTT. (Write-Combined)
|
||||
* - `I915_MMAP_OFFSET_WC`: Use Write-Combined caching.
|
||||
* - `I915_MMAP_OFFSET_WB`: Use Write-Back caching.
|
||||
* - `I915_MMAP_OFFSET_FIXED`: Use object placement to determine caching.
|
||||
*
|
||||
* On devices with local memory `I915_MMAP_OFFSET_FIXED` is the only valid
|
||||
* type. On devices without local memory, this caching mode is invalid.
|
||||
*
|
||||
* As caching mode when specifying `I915_MMAP_OFFSET_FIXED`, WC or WB will
|
||||
* be used, depending on the object placement on creation. WB will be used
|
||||
* when the object can only exist in system memory, WC otherwise.
|
||||
*/
|
||||
__u64 flags;
|
||||
#define I915_MMAP_OFFSET_GTT 0
|
||||
#define I915_MMAP_OFFSET_WC 1
|
||||
#define I915_MMAP_OFFSET_WB 2
|
||||
#define I915_MMAP_OFFSET_UC 3
|
||||
|
||||
/*
|
||||
* Zero-terminated chain of extensions.
|
||||
#define I915_MMAP_OFFSET_GTT 0
|
||||
#define I915_MMAP_OFFSET_WC 1
|
||||
#define I915_MMAP_OFFSET_WB 2
|
||||
#define I915_MMAP_OFFSET_UC 3
|
||||
#define I915_MMAP_OFFSET_FIXED 4
|
||||
|
||||
/**
|
||||
* @extensions: Zero-terminated chain of extensions.
|
||||
*
|
||||
* No current extensions defined; mbz.
|
||||
*/
|
||||
__u64 extensions;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct drm_i915_gem_set_domain - Adjust the objects write or read domain, in
|
||||
* preparation for accessing the pages via some CPU domain.
|
||||
*
|
||||
* Specifying a new write or read domain will flush the object out of the
|
||||
* previous domain(if required), before then updating the objects domain
|
||||
* tracking with the new domain.
|
||||
*
|
||||
* Note this might involve waiting for the object first if it is still active on
|
||||
* the GPU.
|
||||
*
|
||||
* Supported values for @read_domains and @write_domain:
|
||||
*
|
||||
* - I915_GEM_DOMAIN_WC: Uncached write-combined domain
|
||||
* - I915_GEM_DOMAIN_CPU: CPU cache domain
|
||||
* - I915_GEM_DOMAIN_GTT: Mappable aperture domain
|
||||
*
|
||||
* All other domains are rejected.
|
||||
*
|
||||
* Note that for discrete, starting from DG1, this is no longer supported, and
|
||||
* is instead rejected. On such platforms the CPU domain is effectively static,
|
||||
* where we also only support a single &drm_i915_gem_mmap_offset cache mode,
|
||||
* which can't be set explicitly and instead depends on the object placements,
|
||||
* as per the below.
|
||||
*
|
||||
* Implicit caching rules, starting from DG1:
|
||||
*
|
||||
* - If any of the object placements (see &drm_i915_gem_create_ext_memory_regions)
|
||||
* contain I915_MEMORY_CLASS_DEVICE then the object will be allocated and
|
||||
* mapped as write-combined only.
|
||||
*
|
||||
* - Everything else is always allocated and mapped as write-back, with the
|
||||
* guarantee that everything is also coherent with the GPU.
|
||||
*
|
||||
* Note that this is likely to change in the future again, where we might need
|
||||
* more flexibility on future devices, so making this all explicit as part of a
|
||||
* new &drm_i915_gem_create_ext extension is probable.
|
||||
*/
|
||||
struct drm_i915_gem_set_domain {
|
||||
/** Handle for the object */
|
||||
/** @handle: Handle for the object. */
|
||||
__u32 handle;
|
||||
|
||||
/** New read domains */
|
||||
/** @read_domains: New read domains. */
|
||||
__u32 read_domains;
|
||||
|
||||
/** New write domain */
|
||||
/**
|
||||
* @write_domain: New write domain.
|
||||
*
|
||||
* Note that having something in the write domain implies it's in the
|
||||
* read domain, and only that read domain.
|
||||
*/
|
||||
__u32 write_domain;
|
||||
};
|
||||
|
||||
@ -1348,12 +1428,11 @@ struct drm_i915_gem_busy {
|
||||
* reading from the object simultaneously.
|
||||
*
|
||||
* The value of each engine class is the same as specified in the
|
||||
* I915_CONTEXT_SET_ENGINES parameter and via perf, i.e.
|
||||
* I915_CONTEXT_PARAM_ENGINES context parameter and via perf, i.e.
|
||||
* I915_ENGINE_CLASS_RENDER, I915_ENGINE_CLASS_COPY, etc.
|
||||
* reported as active itself. Some hardware may have parallel
|
||||
* execution engines, e.g. multiple media engines, which are
|
||||
* mapped to the same class identifier and so are not separately
|
||||
* reported for busyness.
|
||||
* Some hardware may have parallel execution engines, e.g. multiple
|
||||
* media engines, which are mapped to the same class identifier and so
|
||||
* are not separately reported for busyness.
|
||||
*
|
||||
* Caveat emptor:
|
||||
* Only the boolean result of this query is reliable; that is whether
|
||||
@ -1364,43 +1443,79 @@ struct drm_i915_gem_busy {
|
||||
};
|
||||
|
||||
/**
|
||||
* I915_CACHING_NONE
|
||||
* struct drm_i915_gem_caching - Set or get the caching for given object
|
||||
* handle.
|
||||
*
|
||||
* GPU access is not coherent with cpu caches. Default for machines without an
|
||||
* LLC.
|
||||
*/
|
||||
#define I915_CACHING_NONE 0
|
||||
/**
|
||||
* I915_CACHING_CACHED
|
||||
* Allow userspace to control the GTT caching bits for a given object when the
|
||||
* object is later mapped through the ppGTT(or GGTT on older platforms lacking
|
||||
* ppGTT support, or if the object is used for scanout). Note that this might
|
||||
* require unbinding the object from the GTT first, if its current caching value
|
||||
* doesn't match.
|
||||
*
|
||||
* GPU access is coherent with cpu caches and furthermore the data is cached in
|
||||
* last-level caches shared between cpu cores and the gpu GT. Default on
|
||||
* machines with HAS_LLC.
|
||||
*/
|
||||
#define I915_CACHING_CACHED 1
|
||||
/**
|
||||
* I915_CACHING_DISPLAY
|
||||
* Note that this all changes on discrete platforms, starting from DG1, the
|
||||
* set/get caching is no longer supported, and is now rejected. Instead the CPU
|
||||
* caching attributes(WB vs WC) will become an immutable creation time property
|
||||
* for the object, along with the GTT caching level. For now we don't expose any
|
||||
* new uAPI for this, instead on DG1 this is all implicit, although this largely
|
||||
* shouldn't matter since DG1 is coherent by default(without any way of
|
||||
* controlling it).
|
||||
*
|
||||
* Special GPU caching mode which is coherent with the scanout engines.
|
||||
* Transparently falls back to I915_CACHING_NONE on platforms where no special
|
||||
* cache mode (like write-through or gfdt flushing) is available. The kernel
|
||||
* automatically sets this mode when using a buffer as a scanout target.
|
||||
* Userspace can manually set this mode to avoid a costly stall and clflush in
|
||||
* the hotpath of drawing the first frame.
|
||||
* Implicit caching rules, starting from DG1:
|
||||
*
|
||||
* - If any of the object placements (see &drm_i915_gem_create_ext_memory_regions)
|
||||
* contain I915_MEMORY_CLASS_DEVICE then the object will be allocated and
|
||||
* mapped as write-combined only.
|
||||
*
|
||||
* - Everything else is always allocated and mapped as write-back, with the
|
||||
* guarantee that everything is also coherent with the GPU.
|
||||
*
|
||||
* Note that this is likely to change in the future again, where we might need
|
||||
* more flexibility on future devices, so making this all explicit as part of a
|
||||
* new &drm_i915_gem_create_ext extension is probable.
|
||||
*
|
||||
* Side note: Part of the reason for this is that changing the at-allocation-time CPU
|
||||
* caching attributes for the pages might be required(and is expensive) if we
|
||||
* need to then CPU map the pages later with different caching attributes. This
|
||||
* inconsistent caching behaviour, while supported on x86, is not universally
|
||||
* supported on other architectures. So for simplicity we opt for setting
|
||||
* everything at creation time, whilst also making it immutable, on discrete
|
||||
* platforms.
|
||||
*/
|
||||
#define I915_CACHING_DISPLAY 2
|
||||
|
||||
struct drm_i915_gem_caching {
|
||||
/**
|
||||
* Handle of the buffer to set/get the caching level of. */
|
||||
* @handle: Handle of the buffer to set/get the caching level.
|
||||
*/
|
||||
__u32 handle;
|
||||
|
||||
/**
|
||||
* Cacheing level to apply or return value
|
||||
* @caching: The GTT caching level to apply or possible return value.
|
||||
*
|
||||
* bits0-15 are for generic caching control (i.e. the above defined
|
||||
* values). bits16-31 are reserved for platform-specific variations
|
||||
* (e.g. l3$ caching on gen7). */
|
||||
* The supported @caching values:
|
||||
*
|
||||
* I915_CACHING_NONE:
|
||||
*
|
||||
* GPU access is not coherent with CPU caches. Default for machines
|
||||
* without an LLC. This means manual flushing might be needed, if we
|
||||
* want GPU access to be coherent.
|
||||
*
|
||||
* I915_CACHING_CACHED:
|
||||
*
|
||||
* GPU access is coherent with CPU caches and furthermore the data is
|
||||
* cached in last-level caches shared between CPU cores and the GPU GT.
|
||||
*
|
||||
* I915_CACHING_DISPLAY:
|
||||
*
|
||||
* Special GPU caching mode which is coherent with the scanout engines.
|
||||
* Transparently falls back to I915_CACHING_NONE on platforms where no
|
||||
* special cache mode (like write-through or gfdt flushing) is
|
||||
* available. The kernel automatically sets this mode when using a
|
||||
* buffer as a scanout target. Userspace can manually set this mode to
|
||||
* avoid a costly stall and clflush in the hotpath of drawing the first
|
||||
* frame.
|
||||
*/
|
||||
#define I915_CACHING_NONE 0
|
||||
#define I915_CACHING_CACHED 1
|
||||
#define I915_CACHING_DISPLAY 2
|
||||
__u32 caching;
|
||||
};
|
||||
|
||||
@ -1639,6 +1754,10 @@ struct drm_i915_gem_context_param {
|
||||
__u32 size;
|
||||
__u64 param;
|
||||
#define I915_CONTEXT_PARAM_BAN_PERIOD 0x1
|
||||
/* I915_CONTEXT_PARAM_NO_ZEROMAP has been removed. On the off chance
|
||||
* someone somewhere has attempted to use it, never re-use this context
|
||||
* param number.
|
||||
*/
|
||||
#define I915_CONTEXT_PARAM_NO_ZEROMAP 0x2
|
||||
#define I915_CONTEXT_PARAM_GTT_SIZE 0x3
|
||||
#define I915_CONTEXT_PARAM_NO_ERROR_CAPTURE 0x4
|
||||
@ -1723,24 +1842,8 @@ struct drm_i915_gem_context_param {
|
||||
*/
|
||||
#define I915_CONTEXT_PARAM_PERSISTENCE 0xb
|
||||
|
||||
/*
|
||||
* I915_CONTEXT_PARAM_RINGSIZE:
|
||||
*
|
||||
* Sets the size of the CS ringbuffer to use for logical ring contexts. This
|
||||
* applies a limit of how many batches can be queued to HW before the caller
|
||||
* is blocked due to lack of space for more commands.
|
||||
*
|
||||
* Only reliably possible to be set prior to first use, i.e. during
|
||||
* construction. At any later point, the current execution must be flushed as
|
||||
* the ring can only be changed while the context is idle. Note, the ringsize
|
||||
* can be specified as a constructor property, see
|
||||
* I915_CONTEXT_CREATE_EXT_SETPARAM, but can also be set later if required.
|
||||
*
|
||||
* Only applies to the current set of engine and lost when those engines
|
||||
* are replaced by a new mapping (see I915_CONTEXT_PARAM_ENGINES).
|
||||
*
|
||||
* Must be between 4 - 512 KiB, in intervals of page size [4 KiB].
|
||||
* Default is 16 KiB.
|
||||
/* This API has been removed. On the off chance someone somewhere has
|
||||
* attempted to use it, never re-use this context param number.
|
||||
*/
|
||||
#define I915_CONTEXT_PARAM_RINGSIZE 0xc
|
||||
/* Must be kept compact -- no holes and well documented */
|
||||
@ -1807,6 +1910,69 @@ struct drm_i915_gem_context_param_sseu {
|
||||
__u32 rsvd;
|
||||
};
|
||||
|
||||
/**
|
||||
* DOC: Virtual Engine uAPI
|
||||
*
|
||||
* Virtual engine is a concept where userspace is able to configure a set of
|
||||
* physical engines, submit a batch buffer, and let the driver execute it on any
|
||||
* engine from the set as it sees fit.
|
||||
*
|
||||
* This is primarily useful on parts which have multiple instances of a same
|
||||
* class engine, like for example GT3+ Skylake parts with their two VCS engines.
|
||||
*
|
||||
* For instance userspace can enumerate all engines of a certain class using the
|
||||
* previously described `Engine Discovery uAPI`_. After that userspace can
|
||||
* create a GEM context with a placeholder slot for the virtual engine (using
|
||||
* `I915_ENGINE_CLASS_INVALID` and `I915_ENGINE_CLASS_INVALID_NONE` for class
|
||||
* and instance respectively) and finally using the
|
||||
* `I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE` extension place a virtual engine in
|
||||
* the same reserved slot.
|
||||
*
|
||||
* Example of creating a virtual engine and submitting a batch buffer to it:
|
||||
*
|
||||
* .. code-block:: C
|
||||
*
|
||||
* I915_DEFINE_CONTEXT_ENGINES_LOAD_BALANCE(virtual, 2) = {
|
||||
* .base.name = I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE,
|
||||
* .engine_index = 0, // Place this virtual engine into engine map slot 0
|
||||
* .num_siblings = 2,
|
||||
* .engines = { { I915_ENGINE_CLASS_VIDEO, 0 },
|
||||
* { I915_ENGINE_CLASS_VIDEO, 1 }, },
|
||||
* };
|
||||
* I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, 1) = {
|
||||
* .engines = { { I915_ENGINE_CLASS_INVALID,
|
||||
* I915_ENGINE_CLASS_INVALID_NONE } },
|
||||
* .extensions = to_user_pointer(&virtual), // Chains after load_balance extension
|
||||
* };
|
||||
* struct drm_i915_gem_context_create_ext_setparam p_engines = {
|
||||
* .base = {
|
||||
* .name = I915_CONTEXT_CREATE_EXT_SETPARAM,
|
||||
* },
|
||||
* .param = {
|
||||
* .param = I915_CONTEXT_PARAM_ENGINES,
|
||||
* .value = to_user_pointer(&engines),
|
||||
* .size = sizeof(engines),
|
||||
* },
|
||||
* };
|
||||
* struct drm_i915_gem_context_create_ext create = {
|
||||
* .flags = I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS,
|
||||
* .extensions = to_user_pointer(&p_engines);
|
||||
* };
|
||||
*
|
||||
* ctx_id = gem_context_create_ext(drm_fd, &create);
|
||||
*
|
||||
* // Now we have created a GEM context with its engine map containing a
|
||||
* // single virtual engine. Submissions to this slot can go either to
|
||||
* // vcs0 or vcs1, depending on the load balancing algorithm used inside
|
||||
* // the driver. The load balancing is dynamic from one batch buffer to
|
||||
* // another and transparent to userspace.
|
||||
*
|
||||
* ...
|
||||
* execbuf.rsvd1 = ctx_id;
|
||||
* execbuf.flags = 0; // Submits to index 0 which is the virtual engine
|
||||
* gem_execbuf(drm_fd, &execbuf);
|
||||
*/
|
||||
|
||||
/*
|
||||
* i915_context_engines_load_balance:
|
||||
*
|
||||
@ -1883,6 +2049,61 @@ struct i915_context_engines_bond {
|
||||
struct i915_engine_class_instance engines[N__]; \
|
||||
} __attribute__((packed)) name__
|
||||
|
||||
/**
|
||||
* DOC: Context Engine Map uAPI
|
||||
*
|
||||
* Context engine map is a new way of addressing engines when submitting batch-
|
||||
* buffers, replacing the existing way of using identifiers like `I915_EXEC_BLT`
|
||||
* inside the flags field of `struct drm_i915_gem_execbuffer2`.
|
||||
*
|
||||
* To use it created GEM contexts need to be configured with a list of engines
|
||||
* the user is intending to submit to. This is accomplished using the
|
||||
* `I915_CONTEXT_PARAM_ENGINES` parameter and `struct
|
||||
* i915_context_param_engines`.
|
||||
*
|
||||
* For such contexts the `I915_EXEC_RING_MASK` field becomes an index into the
|
||||
* configured map.
|
||||
*
|
||||
* Example of creating such context and submitting against it:
|
||||
*
|
||||
* .. code-block:: C
|
||||
*
|
||||
* I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, 2) = {
|
||||
* .engines = { { I915_ENGINE_CLASS_RENDER, 0 },
|
||||
* { I915_ENGINE_CLASS_COPY, 0 } }
|
||||
* };
|
||||
* struct drm_i915_gem_context_create_ext_setparam p_engines = {
|
||||
* .base = {
|
||||
* .name = I915_CONTEXT_CREATE_EXT_SETPARAM,
|
||||
* },
|
||||
* .param = {
|
||||
* .param = I915_CONTEXT_PARAM_ENGINES,
|
||||
* .value = to_user_pointer(&engines),
|
||||
* .size = sizeof(engines),
|
||||
* },
|
||||
* };
|
||||
* struct drm_i915_gem_context_create_ext create = {
|
||||
* .flags = I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS,
|
||||
* .extensions = to_user_pointer(&p_engines);
|
||||
* };
|
||||
*
|
||||
* ctx_id = gem_context_create_ext(drm_fd, &create);
|
||||
*
|
||||
* // We have now created a GEM context with two engines in the map:
|
||||
* // Index 0 points to rcs0 while index 1 points to bcs0. Other engines
|
||||
* // will not be accessible from this context.
|
||||
*
|
||||
* ...
|
||||
* execbuf.rsvd1 = ctx_id;
|
||||
* execbuf.flags = 0; // Submits to index 0, which is rcs0 for this context
|
||||
* gem_execbuf(drm_fd, &execbuf);
|
||||
*
|
||||
* ...
|
||||
* execbuf.rsvd1 = ctx_id;
|
||||
* execbuf.flags = 1; // Submits to index 0, which is bcs0 for this context
|
||||
* gem_execbuf(drm_fd, &execbuf);
|
||||
*/
|
||||
|
||||
struct i915_context_param_engines {
|
||||
__u64 extensions; /* linked chain of extension blocks, 0 terminates */
|
||||
#define I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE 0 /* see i915_context_engines_load_balance */
|
||||
@ -1901,20 +2122,10 @@ struct drm_i915_gem_context_create_ext_setparam {
|
||||
struct drm_i915_gem_context_param param;
|
||||
};
|
||||
|
||||
struct drm_i915_gem_context_create_ext_clone {
|
||||
/* This API has been removed. On the off chance someone somewhere has
|
||||
* attempted to use it, never re-use this extension number.
|
||||
*/
|
||||
#define I915_CONTEXT_CREATE_EXT_CLONE 1
|
||||
struct i915_user_extension base;
|
||||
__u32 clone_id;
|
||||
__u32 flags;
|
||||
#define I915_CONTEXT_CLONE_ENGINES (1u << 0)
|
||||
#define I915_CONTEXT_CLONE_FLAGS (1u << 1)
|
||||
#define I915_CONTEXT_CLONE_SCHEDATTR (1u << 2)
|
||||
#define I915_CONTEXT_CLONE_SSEU (1u << 3)
|
||||
#define I915_CONTEXT_CLONE_TIMELINE (1u << 4)
|
||||
#define I915_CONTEXT_CLONE_VM (1u << 5)
|
||||
#define I915_CONTEXT_CLONE_UNKNOWN -(I915_CONTEXT_CLONE_VM << 1)
|
||||
__u64 rsvd;
|
||||
};
|
||||
|
||||
struct drm_i915_gem_context_destroy {
|
||||
__u32 ctx_id;
|
||||
@ -1986,14 +2197,69 @@ struct drm_i915_reset_stats {
|
||||
__u32 pad;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct drm_i915_gem_userptr - Create GEM object from user allocated memory.
|
||||
*
|
||||
* Userptr objects have several restrictions on what ioctls can be used with the
|
||||
* object handle.
|
||||
*/
|
||||
struct drm_i915_gem_userptr {
|
||||
/**
|
||||
* @user_ptr: The pointer to the allocated memory.
|
||||
*
|
||||
* Needs to be aligned to PAGE_SIZE.
|
||||
*/
|
||||
__u64 user_ptr;
|
||||
|
||||
/**
|
||||
* @user_size:
|
||||
*
|
||||
* The size in bytes for the allocated memory. This will also become the
|
||||
* object size.
|
||||
*
|
||||
* Needs to be aligned to PAGE_SIZE, and should be at least PAGE_SIZE,
|
||||
* or larger.
|
||||
*/
|
||||
__u64 user_size;
|
||||
|
||||
/**
|
||||
* @flags:
|
||||
*
|
||||
* Supported flags:
|
||||
*
|
||||
* I915_USERPTR_READ_ONLY:
|
||||
*
|
||||
* Mark the object as readonly, this also means GPU access can only be
|
||||
* readonly. This is only supported on HW which supports readonly access
|
||||
* through the GTT. If the HW can't support readonly access, an error is
|
||||
* returned.
|
||||
*
|
||||
* I915_USERPTR_PROBE:
|
||||
*
|
||||
* Probe the provided @user_ptr range and validate that the @user_ptr is
|
||||
* indeed pointing to normal memory and that the range is also valid.
|
||||
* For example if some garbage address is given to the kernel, then this
|
||||
* should complain.
|
||||
*
|
||||
* Returns -EFAULT if the probe failed.
|
||||
*
|
||||
* Note that this doesn't populate the backing pages, and also doesn't
|
||||
* guarantee that the object will remain valid when the object is
|
||||
* eventually used.
|
||||
*
|
||||
* The kernel supports this feature if I915_PARAM_HAS_USERPTR_PROBE
|
||||
* returns a non-zero value.
|
||||
*
|
||||
* I915_USERPTR_UNSYNCHRONIZED:
|
||||
*
|
||||
* NOT USED. Setting this flag will result in an error.
|
||||
*/
|
||||
__u32 flags;
|
||||
#define I915_USERPTR_READ_ONLY 0x1
|
||||
#define I915_USERPTR_PROBE 0x2
|
||||
#define I915_USERPTR_UNSYNCHRONIZED 0x80000000
|
||||
/**
|
||||
* Returned handle for the object.
|
||||
* @handle: Returned handle for the object.
|
||||
*
|
||||
* Object handles are nonzero.
|
||||
*/
|
||||
@ -2376,6 +2642,76 @@ struct drm_i915_query_topology_info {
|
||||
__u8 data[];
|
||||
};
|
||||
|
||||
/**
|
||||
* DOC: Engine Discovery uAPI
|
||||
*
|
||||
* Engine discovery uAPI is a way of enumerating physical engines present in a
|
||||
* GPU associated with an open i915 DRM file descriptor. This supersedes the old
|
||||
* way of using `DRM_IOCTL_I915_GETPARAM` and engine identifiers like
|
||||
* `I915_PARAM_HAS_BLT`.
|
||||
*
|
||||
* The need for this interface came starting with Icelake and newer GPUs, which
|
||||
* started to establish a pattern of having multiple engines of a same class,
|
||||
* where not all instances were always completely functionally equivalent.
|
||||
*
|
||||
* Entry point for this uapi is `DRM_IOCTL_I915_QUERY` with the
|
||||
* `DRM_I915_QUERY_ENGINE_INFO` as the queried item id.
|
||||
*
|
||||
* Example for getting the list of engines:
|
||||
*
|
||||
* .. code-block:: C
|
||||
*
|
||||
* struct drm_i915_query_engine_info *info;
|
||||
* struct drm_i915_query_item item = {
|
||||
* .query_id = DRM_I915_QUERY_ENGINE_INFO;
|
||||
* };
|
||||
* struct drm_i915_query query = {
|
||||
* .num_items = 1,
|
||||
* .items_ptr = (uintptr_t)&item,
|
||||
* };
|
||||
* int err, i;
|
||||
*
|
||||
* // First query the size of the blob we need, this needs to be large
|
||||
* // enough to hold our array of engines. The kernel will fill out the
|
||||
* // item.length for us, which is the number of bytes we need.
|
||||
* //
|
||||
* // Alternatively a large buffer can be allocated straight away enabling
|
||||
* // querying in one pass, in which case item.length should contain the
|
||||
* // length of the provided buffer.
|
||||
* err = ioctl(fd, DRM_IOCTL_I915_QUERY, &query);
|
||||
* if (err) ...
|
||||
*
|
||||
* info = calloc(1, item.length);
|
||||
* // Now that we allocated the required number of bytes, we call the ioctl
|
||||
* // again, this time with the data_ptr pointing to our newly allocated
|
||||
* // blob, which the kernel can then populate with info on all engines.
|
||||
* item.data_ptr = (uintptr_t)&info,
|
||||
*
|
||||
* err = ioctl(fd, DRM_IOCTL_I915_QUERY, &query);
|
||||
* if (err) ...
|
||||
*
|
||||
* // We can now access each engine in the array
|
||||
* for (i = 0; i < info->num_engines; i++) {
|
||||
* struct drm_i915_engine_info einfo = info->engines[i];
|
||||
* u16 class = einfo.engine.class;
|
||||
* u16 instance = einfo.engine.instance;
|
||||
* ....
|
||||
* }
|
||||
*
|
||||
* free(info);
|
||||
*
|
||||
* Each of the enumerated engines, apart from being defined by its class and
|
||||
* instance (see `struct i915_engine_class_instance`), also can have flags and
|
||||
* capabilities defined as documented in i915_drm.h.
|
||||
*
|
||||
* For instance video engines which support HEVC encoding will have the
|
||||
* `I915_VIDEO_CLASS_CAPABILITY_HEVC` capability bit set.
|
||||
*
|
||||
* Engine discovery only fully comes to its own when combined with the new way
|
||||
* of addressing engines when submitting batch buffers using contexts with
|
||||
* engine maps configured.
|
||||
*/
|
||||
|
||||
/**
|
||||
* struct drm_i915_engine_info
|
||||
*
|
||||
|
@ -184,6 +184,7 @@ struct fsxattr {
|
||||
#define BLKSECDISCARD _IO(0x12,125)
|
||||
#define BLKROTATIONAL _IO(0x12,126)
|
||||
#define BLKZEROOUT _IO(0x12,127)
|
||||
#define BLKGETDISKSEQ _IOR(0x12,128,__u64)
|
||||
/*
|
||||
* A jump here: 130-136 are reserved for zoned block devices
|
||||
* (see uapi/linux/blkzoned.h)
|
||||
|
@ -188,11 +188,22 @@ struct ip_mreq_source {
|
||||
};
|
||||
|
||||
struct ip_msfilter {
|
||||
__be32 imsf_multiaddr;
|
||||
__be32 imsf_interface;
|
||||
__u32 imsf_fmode;
|
||||
__u32 imsf_numsrc;
|
||||
__be32 imsf_slist[1];
|
||||
union {
|
||||
struct {
|
||||
__be32 imsf_multiaddr_aux;
|
||||
__be32 imsf_interface_aux;
|
||||
__u32 imsf_fmode_aux;
|
||||
__u32 imsf_numsrc_aux;
|
||||
__be32 imsf_slist[1];
|
||||
};
|
||||
struct {
|
||||
__be32 imsf_multiaddr;
|
||||
__be32 imsf_interface;
|
||||
__u32 imsf_fmode;
|
||||
__u32 imsf_numsrc;
|
||||
__be32 imsf_slist_flex[];
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
#define IP_MSFILTER_SIZE(numsrc) \
|
||||
@ -211,11 +222,22 @@ struct group_source_req {
|
||||
};
|
||||
|
||||
struct group_filter {
|
||||
__u32 gf_interface; /* interface index */
|
||||
struct __kernel_sockaddr_storage gf_group; /* multicast address */
|
||||
__u32 gf_fmode; /* filter mode */
|
||||
__u32 gf_numsrc; /* number of sources */
|
||||
struct __kernel_sockaddr_storage gf_slist[1]; /* interface index */
|
||||
union {
|
||||
struct {
|
||||
__u32 gf_interface_aux; /* interface index */
|
||||
struct __kernel_sockaddr_storage gf_group_aux; /* multicast address */
|
||||
__u32 gf_fmode_aux; /* filter mode */
|
||||
__u32 gf_numsrc_aux; /* number of sources */
|
||||
struct __kernel_sockaddr_storage gf_slist[1]; /* interface index */
|
||||
};
|
||||
struct {
|
||||
__u32 gf_interface; /* interface index */
|
||||
struct __kernel_sockaddr_storage gf_group; /* multicast address */
|
||||
__u32 gf_fmode; /* filter mode */
|
||||
__u32 gf_numsrc; /* number of sources */
|
||||
struct __kernel_sockaddr_storage gf_slist_flex[]; /* interface index */
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
#define GROUP_FILTER_SIZE(numsrc) \
|
||||
|
@ -1965,7 +1965,9 @@ struct kvm_stats_header {
|
||||
#define KVM_STATS_TYPE_CUMULATIVE (0x0 << KVM_STATS_TYPE_SHIFT)
|
||||
#define KVM_STATS_TYPE_INSTANT (0x1 << KVM_STATS_TYPE_SHIFT)
|
||||
#define KVM_STATS_TYPE_PEAK (0x2 << KVM_STATS_TYPE_SHIFT)
|
||||
#define KVM_STATS_TYPE_MAX KVM_STATS_TYPE_PEAK
|
||||
#define KVM_STATS_TYPE_LINEAR_HIST (0x3 << KVM_STATS_TYPE_SHIFT)
|
||||
#define KVM_STATS_TYPE_LOG_HIST (0x4 << KVM_STATS_TYPE_SHIFT)
|
||||
#define KVM_STATS_TYPE_MAX KVM_STATS_TYPE_LOG_HIST
|
||||
|
||||
#define KVM_STATS_UNIT_SHIFT 4
|
||||
#define KVM_STATS_UNIT_MASK (0xF << KVM_STATS_UNIT_SHIFT)
|
||||
@ -1988,8 +1990,9 @@ struct kvm_stats_header {
|
||||
* @size: The number of data items for this stats.
|
||||
* Every data item is of type __u64.
|
||||
* @offset: The offset of the stats to the start of stat structure in
|
||||
* struture kvm or kvm_vcpu.
|
||||
* @unused: Unused field for future usage. Always 0 for now.
|
||||
* structure kvm or kvm_vcpu.
|
||||
* @bucket_size: A parameter value used for histogram stats. It is only used
|
||||
* for linear histogram stats, specifying the size of the bucket;
|
||||
* @name: The name string for the stats. Its size is indicated by the
|
||||
* &kvm_stats_header->name_size.
|
||||
*/
|
||||
@ -1998,7 +2001,7 @@ struct kvm_stats_desc {
|
||||
__s16 exponent;
|
||||
__u16 size;
|
||||
__u32 offset;
|
||||
__u32 unused;
|
||||
__u32 bucket_size;
|
||||
char name[];
|
||||
};
|
||||
|
||||
|
@ -73,7 +73,8 @@
|
||||
#define MOVE_MOUNT_T_SYMLINKS 0x00000010 /* Follow symlinks on to path */
|
||||
#define MOVE_MOUNT_T_AUTOMOUNTS 0x00000020 /* Follow automounts on to path */
|
||||
#define MOVE_MOUNT_T_EMPTY_PATH 0x00000040 /* Empty to path permitted */
|
||||
#define MOVE_MOUNT__MASK 0x00000077
|
||||
#define MOVE_MOUNT_SET_GROUP 0x00000100 /* Set sharing group instead */
|
||||
#define MOVE_MOUNT__MASK 0x00000177
|
||||
|
||||
/*
|
||||
* fsopen() flags.
|
||||
|
@ -213,6 +213,7 @@ struct prctl_mm_map {
|
||||
/* Speculation control variants */
|
||||
# define PR_SPEC_STORE_BYPASS 0
|
||||
# define PR_SPEC_INDIRECT_BRANCH 1
|
||||
# define PR_SPEC_L1D_FLUSH 2
|
||||
/* Return and control values for PR_SET/GET_SPECULATION_CTRL */
|
||||
# define PR_SPEC_NOT_AFFECTED 0
|
||||
# define PR_SPEC_PRCTL (1UL << 0)
|
||||
@ -234,14 +235,15 @@ struct prctl_mm_map {
|
||||
#define PR_GET_TAGGED_ADDR_CTRL 56
|
||||
# define PR_TAGGED_ADDR_ENABLE (1UL << 0)
|
||||
/* MTE tag check fault modes */
|
||||
# define PR_MTE_TCF_SHIFT 1
|
||||
# define PR_MTE_TCF_NONE (0UL << PR_MTE_TCF_SHIFT)
|
||||
# define PR_MTE_TCF_SYNC (1UL << PR_MTE_TCF_SHIFT)
|
||||
# define PR_MTE_TCF_ASYNC (2UL << PR_MTE_TCF_SHIFT)
|
||||
# define PR_MTE_TCF_MASK (3UL << PR_MTE_TCF_SHIFT)
|
||||
# define PR_MTE_TCF_NONE 0
|
||||
# define PR_MTE_TCF_SYNC (1UL << 1)
|
||||
# define PR_MTE_TCF_ASYNC (1UL << 2)
|
||||
# define PR_MTE_TCF_MASK (PR_MTE_TCF_SYNC | PR_MTE_TCF_ASYNC)
|
||||
/* MTE tag inclusion mask */
|
||||
# define PR_MTE_TAG_SHIFT 3
|
||||
# define PR_MTE_TAG_MASK (0xffffUL << PR_MTE_TAG_SHIFT)
|
||||
/* Unused; kept only for source compatibility */
|
||||
# define PR_MTE_TCF_SHIFT 1
|
||||
|
||||
/* Control reclaim behavior when allocating memory */
|
||||
#define PR_SET_IO_FLUSHER 57
|
||||
|
@ -299,6 +299,7 @@ typedef int __bitwise snd_pcm_subformat_t;
|
||||
#define SNDRV_PCM_INFO_HAS_LINK_ABSOLUTE_ATIME 0x02000000 /* report absolute hardware link audio time, not reset on startup */
|
||||
#define SNDRV_PCM_INFO_HAS_LINK_ESTIMATED_ATIME 0x04000000 /* report estimated link audio time */
|
||||
#define SNDRV_PCM_INFO_HAS_LINK_SYNCHRONIZED_ATIME 0x08000000 /* report synchronized audio/system time */
|
||||
#define SNDRV_PCM_INFO_EXPLICIT_SYNC 0x10000000 /* needs explicit sync of pointers and data */
|
||||
|
||||
#define SNDRV_PCM_INFO_DRAIN_TRIGGER 0x40000000 /* internal kernel flag - trigger in drain */
|
||||
#define SNDRV_PCM_INFO_FIFO_IN_FRAMES 0x80000000 /* internal kernel flag - FIFO size is in frames */
|
||||
|
1
tools/perf/.gitignore
vendored
1
tools/perf/.gitignore
vendored
@ -39,3 +39,4 @@ pmu-events/jevents
|
||||
feature/
|
||||
fixdep
|
||||
libtraceevent-dynamic-list
|
||||
Documentation/doc.dep
|
||||
|
@ -827,33 +827,36 @@ else
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(feature-libbfd), 1)
|
||||
EXTLIBS += -lbfd -lopcodes
|
||||
else
|
||||
# we are on a system that requires -liberty and (maybe) -lz
|
||||
# to link against -lbfd; test each case individually here
|
||||
|
||||
# call all detections now so we get correct
|
||||
# status in VF output
|
||||
$(call feature_check,libbfd-liberty)
|
||||
$(call feature_check,libbfd-liberty-z)
|
||||
|
||||
ifeq ($(feature-libbfd-liberty), 1)
|
||||
EXTLIBS += -lbfd -lopcodes -liberty
|
||||
FEATURE_CHECK_LDFLAGS-disassembler-four-args += -liberty -ldl
|
||||
ifndef NO_LIBBFD
|
||||
ifeq ($(feature-libbfd), 1)
|
||||
EXTLIBS += -lbfd -lopcodes
|
||||
else
|
||||
ifeq ($(feature-libbfd-liberty-z), 1)
|
||||
EXTLIBS += -lbfd -lopcodes -liberty -lz
|
||||
FEATURE_CHECK_LDFLAGS-disassembler-four-args += -liberty -lz -ldl
|
||||
endif
|
||||
endif
|
||||
$(call feature_check,disassembler-four-args)
|
||||
endif
|
||||
# we are on a system that requires -liberty and (maybe) -lz
|
||||
# to link against -lbfd; test each case individually here
|
||||
|
||||
ifeq ($(feature-libbfd-buildid), 1)
|
||||
CFLAGS += -DHAVE_LIBBFD_BUILDID_SUPPORT
|
||||
else
|
||||
msg := $(warning Old version of libbfd/binutils things like PE executable profiling will not be available);
|
||||
# call all detections now so we get correct
|
||||
# status in VF output
|
||||
$(call feature_check,libbfd-liberty)
|
||||
$(call feature_check,libbfd-liberty-z)
|
||||
|
||||
ifeq ($(feature-libbfd-liberty), 1)
|
||||
EXTLIBS += -lbfd -lopcodes -liberty
|
||||
FEATURE_CHECK_LDFLAGS-disassembler-four-args += -liberty -ldl
|
||||
else
|
||||
ifeq ($(feature-libbfd-liberty-z), 1)
|
||||
EXTLIBS += -lbfd -lopcodes -liberty -lz
|
||||
FEATURE_CHECK_LDFLAGS-disassembler-four-args += -liberty -lz -ldl
|
||||
endif
|
||||
endif
|
||||
$(call feature_check,disassembler-four-args)
|
||||
endif
|
||||
|
||||
ifeq ($(feature-libbfd-buildid), 1)
|
||||
CFLAGS += -DHAVE_LIBBFD_BUILDID_SUPPORT
|
||||
else
|
||||
msg := $(warning Old version of libbfd/binutils things like PE executable profiling will not be available);
|
||||
endif
|
||||
endif
|
||||
|
||||
ifdef NO_DEMANGLE
|
||||
|
@ -361,3 +361,5 @@
|
||||
444 n64 landlock_create_ruleset sys_landlock_create_ruleset
|
||||
445 n64 landlock_add_rule sys_landlock_add_rule
|
||||
446 n64 landlock_restrict_self sys_landlock_restrict_self
|
||||
# 447 reserved for memfd_secret
|
||||
448 n64 process_mrelease sys_process_mrelease
|
||||
|
@ -330,10 +330,10 @@
|
||||
256 64 sys_debug_setcontext sys_ni_syscall
|
||||
256 spu sys_debug_setcontext sys_ni_syscall
|
||||
# 257 reserved for vserver
|
||||
258 nospu migrate_pages sys_migrate_pages compat_sys_migrate_pages
|
||||
259 nospu mbind sys_mbind compat_sys_mbind
|
||||
260 nospu get_mempolicy sys_get_mempolicy compat_sys_get_mempolicy
|
||||
261 nospu set_mempolicy sys_set_mempolicy compat_sys_set_mempolicy
|
||||
258 nospu migrate_pages sys_migrate_pages
|
||||
259 nospu mbind sys_mbind
|
||||
260 nospu get_mempolicy sys_get_mempolicy
|
||||
261 nospu set_mempolicy sys_set_mempolicy
|
||||
262 nospu mq_open sys_mq_open compat_sys_mq_open
|
||||
263 nospu mq_unlink sys_mq_unlink
|
||||
264 32 mq_timedsend sys_mq_timedsend_time32
|
||||
@ -381,7 +381,7 @@
|
||||
298 common faccessat sys_faccessat
|
||||
299 common get_robust_list sys_get_robust_list compat_sys_get_robust_list
|
||||
300 common set_robust_list sys_set_robust_list compat_sys_set_robust_list
|
||||
301 common move_pages sys_move_pages compat_sys_move_pages
|
||||
301 common move_pages sys_move_pages
|
||||
302 common getcpu sys_getcpu
|
||||
303 nospu epoll_pwait sys_epoll_pwait compat_sys_epoll_pwait
|
||||
304 32 utimensat sys_utimensat_time32
|
||||
@ -526,3 +526,5 @@
|
||||
444 common landlock_create_ruleset sys_landlock_create_ruleset
|
||||
445 common landlock_add_rule sys_landlock_add_rule
|
||||
446 common landlock_restrict_self sys_landlock_restrict_self
|
||||
# 447 reserved for memfd_secret
|
||||
448 common process_mrelease sys_process_mrelease
|
||||
|
@ -122,7 +122,7 @@
|
||||
131 common quotactl sys_quotactl sys_quotactl
|
||||
132 common getpgid sys_getpgid sys_getpgid
|
||||
133 common fchdir sys_fchdir sys_fchdir
|
||||
134 common bdflush - -
|
||||
134 common bdflush sys_ni_syscall sys_ni_syscall
|
||||
135 common sysfs sys_sysfs sys_sysfs
|
||||
136 common personality sys_s390_personality sys_s390_personality
|
||||
137 common afs_syscall - -
|
||||
@ -274,9 +274,9 @@
|
||||
265 common statfs64 sys_statfs64 compat_sys_statfs64
|
||||
266 common fstatfs64 sys_fstatfs64 compat_sys_fstatfs64
|
||||
267 common remap_file_pages sys_remap_file_pages sys_remap_file_pages
|
||||
268 common mbind sys_mbind compat_sys_mbind
|
||||
269 common get_mempolicy sys_get_mempolicy compat_sys_get_mempolicy
|
||||
270 common set_mempolicy sys_set_mempolicy compat_sys_set_mempolicy
|
||||
268 common mbind sys_mbind sys_mbind
|
||||
269 common get_mempolicy sys_get_mempolicy sys_get_mempolicy
|
||||
270 common set_mempolicy sys_set_mempolicy sys_set_mempolicy
|
||||
271 common mq_open sys_mq_open compat_sys_mq_open
|
||||
272 common mq_unlink sys_mq_unlink sys_mq_unlink
|
||||
273 common mq_timedsend sys_mq_timedsend sys_mq_timedsend_time32
|
||||
@ -293,7 +293,7 @@
|
||||
284 common inotify_init sys_inotify_init sys_inotify_init
|
||||
285 common inotify_add_watch sys_inotify_add_watch sys_inotify_add_watch
|
||||
286 common inotify_rm_watch sys_inotify_rm_watch sys_inotify_rm_watch
|
||||
287 common migrate_pages sys_migrate_pages compat_sys_migrate_pages
|
||||
287 common migrate_pages sys_migrate_pages sys_migrate_pages
|
||||
288 common openat sys_openat compat_sys_openat
|
||||
289 common mkdirat sys_mkdirat sys_mkdirat
|
||||
290 common mknodat sys_mknodat sys_mknodat
|
||||
@ -317,7 +317,7 @@
|
||||
307 common sync_file_range sys_sync_file_range compat_sys_s390_sync_file_range
|
||||
308 common tee sys_tee sys_tee
|
||||
309 common vmsplice sys_vmsplice sys_vmsplice
|
||||
310 common move_pages sys_move_pages compat_sys_move_pages
|
||||
310 common move_pages sys_move_pages sys_move_pages
|
||||
311 common getcpu sys_getcpu sys_getcpu
|
||||
312 common epoll_pwait sys_epoll_pwait compat_sys_epoll_pwait
|
||||
313 common utimes sys_utimes sys_utimes_time32
|
||||
@ -449,3 +449,5 @@
|
||||
444 common landlock_create_ruleset sys_landlock_create_ruleset sys_landlock_create_ruleset
|
||||
445 common landlock_add_rule sys_landlock_add_rule sys_landlock_add_rule
|
||||
446 common landlock_restrict_self sys_landlock_restrict_self sys_landlock_restrict_self
|
||||
# 447 reserved for memfd_secret
|
||||
448 common process_mrelease sys_process_mrelease sys_process_mrelease
|
||||
|
@ -369,6 +369,7 @@
|
||||
445 common landlock_add_rule sys_landlock_add_rule
|
||||
446 common landlock_restrict_self sys_landlock_restrict_self
|
||||
447 common memfd_secret sys_memfd_secret
|
||||
448 common process_mrelease sys_process_mrelease
|
||||
|
||||
#
|
||||
# Due to a historical design error, certain syscalls are numbered differently
|
||||
@ -397,7 +398,7 @@
|
||||
530 x32 set_robust_list compat_sys_set_robust_list
|
||||
531 x32 get_robust_list compat_sys_get_robust_list
|
||||
532 x32 vmsplice sys_vmsplice
|
||||
533 x32 move_pages compat_sys_move_pages
|
||||
533 x32 move_pages sys_move_pages
|
||||
534 x32 preadv compat_sys_preadv64
|
||||
535 x32 pwritev compat_sys_pwritev64
|
||||
536 x32 rt_tgsigqueueinfo compat_sys_rt_tgsigqueueinfo
|
||||
|
@ -144,6 +144,7 @@ done
|
||||
# diff with extra ignore lines
|
||||
check arch/x86/lib/memcpy_64.S '-I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>" -I"^SYM_FUNC_START\(_LOCAL\)*(memcpy_\(erms\|orig\))"'
|
||||
check arch/x86/lib/memset_64.S '-I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>" -I"^SYM_FUNC_START\(_LOCAL\)*(memset_\(erms\|orig\))"'
|
||||
check arch/x86/include/asm/amd-ibs.h '-I "^#include [<\"]\(asm/\)*msr-index.h"'
|
||||
check include/uapi/asm-generic/mman.h '-I "^#include <\(uapi/\)*asm-generic/mman-common\(-tools\)*.h>"'
|
||||
check include/uapi/linux/mman.h '-I "^#include <\(uapi/\)*asm/mman.h>"'
|
||||
check include/linux/build_bug.h '-I "^#\(ifndef\|endif\)\( \/\/\)* static_assert$"'
|
||||
|
@ -1,3 +1,3 @@
|
||||
#!/bin/sh
|
||||
# description: produce callgraphs in short form for scripting use
|
||||
perf script -s "$PERF_EXEC_PATH"/scripts/python/stackcollapse.py -- "$@"
|
||||
perf script -s "$PERF_EXEC_PATH"/scripts/python/stackcollapse.py "$@"
|
||||
|
@ -192,7 +192,7 @@ static int do_test(struct bpf_object *obj, int (*func)(void),
|
||||
}
|
||||
|
||||
if (count != expect * evlist->core.nr_entries) {
|
||||
pr_debug("BPF filter result incorrect, expected %d, got %d samples\n", expect, count);
|
||||
pr_debug("BPF filter result incorrect, expected %d, got %d samples\n", expect * evlist->core.nr_entries, count);
|
||||
goto out_delete_evlist;
|
||||
}
|
||||
|
||||
|
@ -223,8 +223,11 @@ struct ucred {
|
||||
* reuses AF_INET address family
|
||||
*/
|
||||
#define AF_XDP 44 /* XDP sockets */
|
||||
#define AF_MCTP 45 /* Management component
|
||||
* transport protocol
|
||||
*/
|
||||
|
||||
#define AF_MAX 45 /* For now.. */
|
||||
#define AF_MAX 46 /* For now.. */
|
||||
|
||||
/* Protocol families, same as address families. */
|
||||
#define PF_UNSPEC AF_UNSPEC
|
||||
@ -274,6 +277,7 @@ struct ucred {
|
||||
#define PF_QIPCRTR AF_QIPCRTR
|
||||
#define PF_SMC AF_SMC
|
||||
#define PF_XDP AF_XDP
|
||||
#define PF_MCTP AF_MCTP
|
||||
#define PF_MAX AF_MAX
|
||||
|
||||
/* Maximum queue length specifiable by listen. */
|
||||
@ -421,6 +425,9 @@ extern int __sys_accept4_file(struct file *file, unsigned file_flags,
|
||||
struct sockaddr __user *upeer_sockaddr,
|
||||
int __user *upeer_addrlen, int flags,
|
||||
unsigned long nofile);
|
||||
extern struct file *do_accept(struct file *file, unsigned file_flags,
|
||||
struct sockaddr __user *upeer_sockaddr,
|
||||
int __user *upeer_addrlen, int flags);
|
||||
extern int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr,
|
||||
int __user *upeer_addrlen, int flags);
|
||||
extern int __sys_socket(int family, int type, int protocol);
|
||||
|
@ -10,7 +10,7 @@ fi
|
||||
linux_mount=${linux_header_dir}/mount.h
|
||||
|
||||
printf "static const char *move_mount_flags[] = {\n"
|
||||
regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+MOVE_MOUNT_([FT]_[[:alnum:]_]+)[[:space:]]+(0x[[:xdigit:]]+)[[:space:]]*.*'
|
||||
regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+MOVE_MOUNT_([^_]+_[[:alnum:]_]+)[[:space:]]+(0x[[:xdigit:]]+)[[:space:]]*.*'
|
||||
egrep $regex ${linux_mount} | \
|
||||
sed -r "s/$regex/\2 \1/g" | \
|
||||
xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n"
|
||||
|
@ -59,6 +59,7 @@ perf-y += pstack.o
|
||||
perf-y += session.o
|
||||
perf-y += sample-raw.o
|
||||
perf-y += s390-sample-raw.o
|
||||
perf-y += amd-sample-raw.o
|
||||
perf-$(CONFIG_TRACE) += syscalltbl.o
|
||||
perf-y += ordered-events.o
|
||||
perf-y += namespaces.o
|
||||
|
289
tools/perf/util/amd-sample-raw.c
Normal file
289
tools/perf/util/amd-sample-raw.c
Normal file
@ -0,0 +1,289 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* AMD specific. Provide textual annotation for IBS raw sample data.
|
||||
*/
|
||||
|
||||
#include <unistd.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <inttypes.h>
|
||||
|
||||
#include <linux/string.h>
|
||||
#include "../../arch/x86/include/asm/amd-ibs.h"
|
||||
|
||||
#include "debug.h"
|
||||
#include "session.h"
|
||||
#include "evlist.h"
|
||||
#include "sample-raw.h"
|
||||
#include "pmu-events/pmu-events.h"
|
||||
|
||||
static u32 cpu_family, cpu_model, ibs_fetch_type, ibs_op_type;
|
||||
|
||||
static void pr_ibs_fetch_ctl(union ibs_fetch_ctl reg)
|
||||
{
|
||||
const char * const ic_miss_strs[] = {
|
||||
" IcMiss 0",
|
||||
" IcMiss 1",
|
||||
};
|
||||
const char * const l1tlb_pgsz_strs[] = {
|
||||
" L1TlbPgSz 4KB",
|
||||
" L1TlbPgSz 2MB",
|
||||
" L1TlbPgSz 1GB",
|
||||
" L1TlbPgSz RESERVED"
|
||||
};
|
||||
const char * const l1tlb_pgsz_strs_erratum1347[] = {
|
||||
" L1TlbPgSz 4KB",
|
||||
" L1TlbPgSz 16KB",
|
||||
" L1TlbPgSz 2MB",
|
||||
" L1TlbPgSz 1GB"
|
||||
};
|
||||
const char *ic_miss_str = NULL;
|
||||
const char *l1tlb_pgsz_str = NULL;
|
||||
|
||||
if (cpu_family == 0x19 && cpu_model < 0x10) {
|
||||
/*
|
||||
* Erratum #1238 workaround is to ignore MSRC001_1030[IbsIcMiss]
|
||||
* Erratum #1347 workaround is to use table provided in erratum
|
||||
*/
|
||||
if (reg.phy_addr_valid)
|
||||
l1tlb_pgsz_str = l1tlb_pgsz_strs_erratum1347[reg.l1tlb_pgsz];
|
||||
} else {
|
||||
if (reg.phy_addr_valid)
|
||||
l1tlb_pgsz_str = l1tlb_pgsz_strs[reg.l1tlb_pgsz];
|
||||
ic_miss_str = ic_miss_strs[reg.ic_miss];
|
||||
}
|
||||
|
||||
printf("ibs_fetch_ctl:\t%016llx MaxCnt %7d Cnt %7d Lat %5d En %d Val %d Comp %d%s "
|
||||
"PhyAddrValid %d%s L1TlbMiss %d L2TlbMiss %d RandEn %d%s\n",
|
||||
reg.val, reg.fetch_maxcnt << 4, reg.fetch_cnt << 4, reg.fetch_lat,
|
||||
reg.fetch_en, reg.fetch_val, reg.fetch_comp, ic_miss_str ? : "",
|
||||
reg.phy_addr_valid, l1tlb_pgsz_str ? : "", reg.l1tlb_miss, reg.l2tlb_miss,
|
||||
reg.rand_en, reg.fetch_comp ? (reg.fetch_l2_miss ? " L2Miss 1" : " L2Miss 0") : "");
|
||||
}
|
||||
|
||||
static void pr_ic_ibs_extd_ctl(union ic_ibs_extd_ctl reg)
|
||||
{
|
||||
printf("ic_ibs_ext_ctl:\t%016llx IbsItlbRefillLat %3d\n", reg.val, reg.itlb_refill_lat);
|
||||
}
|
||||
|
||||
static void pr_ibs_op_ctl(union ibs_op_ctl reg)
|
||||
{
|
||||
printf("ibs_op_ctl:\t%016llx MaxCnt %9d En %d Val %d CntCtl %d=%s CurCnt %9d\n",
|
||||
reg.val, ((reg.opmaxcnt_ext << 16) | reg.opmaxcnt) << 4, reg.op_en, reg.op_val,
|
||||
reg.cnt_ctl, reg.cnt_ctl ? "uOps" : "cycles", reg.opcurcnt);
|
||||
}
|
||||
|
||||
static void pr_ibs_op_data(union ibs_op_data reg)
|
||||
{
|
||||
printf("ibs_op_data:\t%016llx CompToRetCtr %5d TagToRetCtr %5d%s%s%s BrnRet %d "
|
||||
" RipInvalid %d BrnFuse %d Microcode %d\n",
|
||||
reg.val, reg.comp_to_ret_ctr, reg.tag_to_ret_ctr,
|
||||
reg.op_brn_ret ? (reg.op_return ? " OpReturn 1" : " OpReturn 0") : "",
|
||||
reg.op_brn_ret ? (reg.op_brn_taken ? " OpBrnTaken 1" : " OpBrnTaken 0") : "",
|
||||
reg.op_brn_ret ? (reg.op_brn_misp ? " OpBrnMisp 1" : " OpBrnMisp 0") : "",
|
||||
reg.op_brn_ret, reg.op_rip_invalid, reg.op_brn_fuse, reg.op_microcode);
|
||||
}
|
||||
|
||||
static void pr_ibs_op_data2(union ibs_op_data2 reg)
|
||||
{
|
||||
static const char * const data_src_str[] = {
|
||||
"",
|
||||
" DataSrc 1=(reserved)",
|
||||
" DataSrc 2=Local node cache",
|
||||
" DataSrc 3=DRAM",
|
||||
" DataSrc 4=Remote node cache",
|
||||
" DataSrc 5=(reserved)",
|
||||
" DataSrc 6=(reserved)",
|
||||
" DataSrc 7=Other"
|
||||
};
|
||||
|
||||
printf("ibs_op_data2:\t%016llx %sRmtNode %d%s\n", reg.val,
|
||||
reg.data_src == 2 ? (reg.cache_hit_st ? "CacheHitSt 1=O-State "
|
||||
: "CacheHitSt 0=M-state ") : "",
|
||||
reg.rmt_node, data_src_str[reg.data_src]);
|
||||
}
|
||||
|
||||
static void pr_ibs_op_data3(union ibs_op_data3 reg)
|
||||
{
|
||||
char l2_miss_str[sizeof(" L2Miss _")] = "";
|
||||
char op_mem_width_str[sizeof(" OpMemWidth _____ bytes")] = "";
|
||||
char op_dc_miss_open_mem_reqs_str[sizeof(" OpDcMissOpenMemReqs __")] = "";
|
||||
|
||||
/*
|
||||
* Erratum #1293
|
||||
* Ignore L2Miss and OpDcMissOpenMemReqs (and opdata2) if DcMissNoMabAlloc or SwPf set
|
||||
*/
|
||||
if (!(cpu_family == 0x19 && cpu_model < 0x10 && (reg.dc_miss_no_mab_alloc || reg.sw_pf))) {
|
||||
snprintf(l2_miss_str, sizeof(l2_miss_str), " L2Miss %d", reg.l2_miss);
|
||||
snprintf(op_dc_miss_open_mem_reqs_str, sizeof(op_dc_miss_open_mem_reqs_str),
|
||||
" OpDcMissOpenMemReqs %2d", reg.op_dc_miss_open_mem_reqs);
|
||||
}
|
||||
|
||||
if (reg.op_mem_width)
|
||||
snprintf(op_mem_width_str, sizeof(op_mem_width_str),
|
||||
" OpMemWidth %2d bytes", 1 << (reg.op_mem_width - 1));
|
||||
|
||||
printf("ibs_op_data3:\t%016llx LdOp %d StOp %d DcL1TlbMiss %d DcL2TlbMiss %d "
|
||||
"DcL1TlbHit2M %d DcL1TlbHit1G %d DcL2TlbHit2M %d DcMiss %d DcMisAcc %d "
|
||||
"DcWcMemAcc %d DcUcMemAcc %d DcLockedOp %d DcMissNoMabAlloc %d DcLinAddrValid %d "
|
||||
"DcPhyAddrValid %d DcL2TlbHit1G %d%s SwPf %d%s%s DcMissLat %5d TlbRefillLat %5d\n",
|
||||
reg.val, reg.ld_op, reg.st_op, reg.dc_l1tlb_miss, reg.dc_l2tlb_miss,
|
||||
reg.dc_l1tlb_hit_2m, reg.dc_l1tlb_hit_1g, reg.dc_l2tlb_hit_2m, reg.dc_miss,
|
||||
reg.dc_mis_acc, reg.dc_wc_mem_acc, reg.dc_uc_mem_acc, reg.dc_locked_op,
|
||||
reg.dc_miss_no_mab_alloc, reg.dc_lin_addr_valid, reg.dc_phy_addr_valid,
|
||||
reg.dc_l2_tlb_hit_1g, l2_miss_str, reg.sw_pf, op_mem_width_str,
|
||||
op_dc_miss_open_mem_reqs_str, reg.dc_miss_lat, reg.tlb_refill_lat);
|
||||
}
|
||||
|
||||
/*
|
||||
* IBS Op/Execution MSRs always saved, in order, are:
|
||||
* IBS_OP_CTL, IBS_OP_RIP, IBS_OP_DATA, IBS_OP_DATA2,
|
||||
* IBS_OP_DATA3, IBS_DC_LINADDR, IBS_DC_PHYSADDR, BP_IBSTGT_RIP
|
||||
*/
|
||||
static void amd_dump_ibs_op(struct perf_sample *sample)
|
||||
{
|
||||
struct perf_ibs_data *data = sample->raw_data;
|
||||
union ibs_op_ctl *op_ctl = (union ibs_op_ctl *)data->data;
|
||||
__u64 *rip = (__u64 *)op_ctl + 1;
|
||||
union ibs_op_data *op_data = (union ibs_op_data *)(rip + 1);
|
||||
union ibs_op_data3 *op_data3 = (union ibs_op_data3 *)(rip + 3);
|
||||
|
||||
pr_ibs_op_ctl(*op_ctl);
|
||||
if (!op_data->op_rip_invalid)
|
||||
printf("IbsOpRip:\t%016llx\n", *rip);
|
||||
pr_ibs_op_data(*op_data);
|
||||
/*
|
||||
* Erratum #1293: ignore op_data2 if DcMissNoMabAlloc or SwPf are set
|
||||
*/
|
||||
if (!(cpu_family == 0x19 && cpu_model < 0x10 &&
|
||||
(op_data3->dc_miss_no_mab_alloc || op_data3->sw_pf)))
|
||||
pr_ibs_op_data2(*(union ibs_op_data2 *)(rip + 2));
|
||||
pr_ibs_op_data3(*op_data3);
|
||||
if (op_data3->dc_lin_addr_valid)
|
||||
printf("IbsDCLinAd:\t%016llx\n", *(rip + 4));
|
||||
if (op_data3->dc_phy_addr_valid)
|
||||
printf("IbsDCPhysAd:\t%016llx\n", *(rip + 5));
|
||||
if (op_data->op_brn_ret && *(rip + 6))
|
||||
printf("IbsBrTarget:\t%016llx\n", *(rip + 6));
|
||||
}
|
||||
|
||||
/*
|
||||
* IBS Fetch MSRs always saved, in order, are:
|
||||
* IBS_FETCH_CTL, IBS_FETCH_LINADDR, IBS_FETCH_PHYSADDR, IC_IBS_EXTD_CTL
|
||||
*/
|
||||
static void amd_dump_ibs_fetch(struct perf_sample *sample)
|
||||
{
|
||||
struct perf_ibs_data *data = sample->raw_data;
|
||||
union ibs_fetch_ctl *fetch_ctl = (union ibs_fetch_ctl *)data->data;
|
||||
__u64 *addr = (__u64 *)fetch_ctl + 1;
|
||||
union ic_ibs_extd_ctl *extd_ctl = (union ic_ibs_extd_ctl *)addr + 2;
|
||||
|
||||
pr_ibs_fetch_ctl(*fetch_ctl);
|
||||
printf("IbsFetchLinAd:\t%016llx\n", *addr++);
|
||||
if (fetch_ctl->phy_addr_valid)
|
||||
printf("IbsFetchPhysAd:\t%016llx\n", *addr);
|
||||
pr_ic_ibs_extd_ctl(*extd_ctl);
|
||||
}
|
||||
|
||||
/*
|
||||
* Test for enable and valid bits in captured control MSRs.
|
||||
*/
|
||||
static bool is_valid_ibs_fetch_sample(struct perf_sample *sample)
|
||||
{
|
||||
struct perf_ibs_data *data = sample->raw_data;
|
||||
union ibs_fetch_ctl *fetch_ctl = (union ibs_fetch_ctl *)data->data;
|
||||
|
||||
if (fetch_ctl->fetch_en && fetch_ctl->fetch_val)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool is_valid_ibs_op_sample(struct perf_sample *sample)
|
||||
{
|
||||
struct perf_ibs_data *data = sample->raw_data;
|
||||
union ibs_op_ctl *op_ctl = (union ibs_op_ctl *)data->data;
|
||||
|
||||
if (op_ctl->op_en && op_ctl->op_val)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/* AMD vendor specific raw sample function. Check for PERF_RECORD_SAMPLE events
|
||||
* and if the event was triggered by IBS, display its raw data with decoded text.
|
||||
* The function is only invoked when the dump flag -D is set.
|
||||
*/
|
||||
void evlist__amd_sample_raw(struct evlist *evlist, union perf_event *event,
|
||||
struct perf_sample *sample)
|
||||
{
|
||||
struct evsel *evsel;
|
||||
|
||||
if (event->header.type != PERF_RECORD_SAMPLE || !sample->raw_size)
|
||||
return;
|
||||
|
||||
evsel = evlist__event2evsel(evlist, event);
|
||||
if (!evsel)
|
||||
return;
|
||||
|
||||
if (evsel->core.attr.type == ibs_fetch_type) {
|
||||
if (!is_valid_ibs_fetch_sample(sample)) {
|
||||
pr_debug("Invalid raw IBS Fetch MSR data encountered\n");
|
||||
return;
|
||||
}
|
||||
amd_dump_ibs_fetch(sample);
|
||||
} else if (evsel->core.attr.type == ibs_op_type) {
|
||||
if (!is_valid_ibs_op_sample(sample)) {
|
||||
pr_debug("Invalid raw IBS Op MSR data encountered\n");
|
||||
return;
|
||||
}
|
||||
amd_dump_ibs_op(sample);
|
||||
}
|
||||
}
|
||||
|
||||
static void parse_cpuid(struct perf_env *env)
|
||||
{
|
||||
const char *cpuid;
|
||||
int ret;
|
||||
|
||||
cpuid = perf_env__cpuid(env);
|
||||
/*
|
||||
* cpuid = "AuthenticAMD,family,model,stepping"
|
||||
*/
|
||||
ret = sscanf(cpuid, "%*[^,],%u,%u", &cpu_family, &cpu_model);
|
||||
if (ret != 2)
|
||||
pr_debug("problem parsing cpuid\n");
|
||||
}
|
||||
|
||||
/*
|
||||
* Find and assign the type number used for ibs_op or ibs_fetch samples.
|
||||
* Device names can be large - we are only interested in the first 9 characters,
|
||||
* to match "ibs_fetch".
|
||||
*/
|
||||
bool evlist__has_amd_ibs(struct evlist *evlist)
|
||||
{
|
||||
struct perf_env *env = evlist->env;
|
||||
int ret, nr_pmu_mappings = perf_env__nr_pmu_mappings(env);
|
||||
const char *pmu_mapping = perf_env__pmu_mappings(env);
|
||||
char name[sizeof("ibs_fetch")];
|
||||
u32 type;
|
||||
|
||||
while (nr_pmu_mappings--) {
|
||||
ret = sscanf(pmu_mapping, "%u:%9s", &type, name);
|
||||
if (ret == 2) {
|
||||
if (strstarts(name, "ibs_op"))
|
||||
ibs_op_type = type;
|
||||
else if (strstarts(name, "ibs_fetch"))
|
||||
ibs_fetch_type = type;
|
||||
}
|
||||
pmu_mapping += strlen(pmu_mapping) + 1 /* '\0' */;
|
||||
}
|
||||
|
||||
if (ibs_fetch_type || ibs_op_type) {
|
||||
if (!cpu_family)
|
||||
parse_cpuid(env);
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
@ -21,6 +21,14 @@
|
||||
#include "record.h"
|
||||
#include "util/synthetic-events.h"
|
||||
|
||||
struct btf * __weak btf__load_from_kernel_by_id(__u32 id)
|
||||
{
|
||||
struct btf *btf;
|
||||
int err = btf__get_from_id(id, &btf);
|
||||
|
||||
return err ? ERR_PTR(err) : btf;
|
||||
}
|
||||
|
||||
#define ptr_to_u64(ptr) ((__u64)(unsigned long)(ptr))
|
||||
|
||||
static int snprintf_hex(char *buf, size_t size, unsigned char *data, size_t len)
|
||||
|
@ -1349,6 +1349,16 @@ void dso__set_build_id(struct dso *dso, struct build_id *bid)
|
||||
|
||||
bool dso__build_id_equal(const struct dso *dso, struct build_id *bid)
|
||||
{
|
||||
if (dso->bid.size > bid->size && dso->bid.size == BUILD_ID_SIZE) {
|
||||
/*
|
||||
* For the backward compatibility, it allows a build-id has
|
||||
* trailing zeros.
|
||||
*/
|
||||
return !memcmp(dso->bid.data, bid->data, bid->size) &&
|
||||
!memchr_inv(&dso->bid.data[bid->size], 0,
|
||||
dso->bid.size - bid->size);
|
||||
}
|
||||
|
||||
return dso->bid.size == bid->size &&
|
||||
memcmp(dso->bid.data, bid->data, dso->bid.size) == 0;
|
||||
}
|
||||
|
@ -10,6 +10,7 @@
|
||||
#include <sys/utsname.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include "strbuf.h"
|
||||
|
||||
struct perf_env perf_env;
|
||||
|
||||
@ -306,6 +307,45 @@ int perf_env__read_cpu_topology_map(struct perf_env *env)
|
||||
return 0;
|
||||
}
|
||||
|
||||
int perf_env__read_pmu_mappings(struct perf_env *env)
|
||||
{
|
||||
struct perf_pmu *pmu = NULL;
|
||||
u32 pmu_num = 0;
|
||||
struct strbuf sb;
|
||||
|
||||
while ((pmu = perf_pmu__scan(pmu))) {
|
||||
if (!pmu->name)
|
||||
continue;
|
||||
pmu_num++;
|
||||
}
|
||||
if (!pmu_num) {
|
||||
pr_debug("pmu mappings not available\n");
|
||||
return -ENOENT;
|
||||
}
|
||||
env->nr_pmu_mappings = pmu_num;
|
||||
|
||||
if (strbuf_init(&sb, 128 * pmu_num) < 0)
|
||||
return -ENOMEM;
|
||||
|
||||
while ((pmu = perf_pmu__scan(pmu))) {
|
||||
if (!pmu->name)
|
||||
continue;
|
||||
if (strbuf_addf(&sb, "%u:%s", pmu->type, pmu->name) < 0)
|
||||
goto error;
|
||||
/* include a NULL character at the end */
|
||||
if (strbuf_add(&sb, "", 1) < 0)
|
||||
goto error;
|
||||
}
|
||||
|
||||
env->pmu_mappings = strbuf_detach(&sb, NULL);
|
||||
|
||||
return 0;
|
||||
|
||||
error:
|
||||
strbuf_release(&sb);
|
||||
return -1;
|
||||
}
|
||||
|
||||
int perf_env__read_cpuid(struct perf_env *env)
|
||||
{
|
||||
char cpuid[128];
|
||||
@ -404,6 +444,44 @@ const char *perf_env__arch(struct perf_env *env)
|
||||
return normalize_arch(arch_name);
|
||||
}
|
||||
|
||||
const char *perf_env__cpuid(struct perf_env *env)
|
||||
{
|
||||
int status;
|
||||
|
||||
if (!env || !env->cpuid) { /* Assume local operation */
|
||||
status = perf_env__read_cpuid(env);
|
||||
if (status)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return env->cpuid;
|
||||
}
|
||||
|
||||
int perf_env__nr_pmu_mappings(struct perf_env *env)
|
||||
{
|
||||
int status;
|
||||
|
||||
if (!env || !env->nr_pmu_mappings) { /* Assume local operation */
|
||||
status = perf_env__read_pmu_mappings(env);
|
||||
if (status)
|
||||
return 0;
|
||||
}
|
||||
|
||||
return env->nr_pmu_mappings;
|
||||
}
|
||||
|
||||
const char *perf_env__pmu_mappings(struct perf_env *env)
|
||||
{
|
||||
int status;
|
||||
|
||||
if (!env || !env->pmu_mappings) { /* Assume local operation */
|
||||
status = perf_env__read_pmu_mappings(env);
|
||||
if (status)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return env->pmu_mappings;
|
||||
}
|
||||
|
||||
int perf_env__numa_node(struct perf_env *env, int cpu)
|
||||
{
|
||||
|
@ -149,11 +149,16 @@ int perf_env__kernel_is_64_bit(struct perf_env *env);
|
||||
int perf_env__set_cmdline(struct perf_env *env, int argc, const char *argv[]);
|
||||
|
||||
int perf_env__read_cpuid(struct perf_env *env);
|
||||
int perf_env__read_pmu_mappings(struct perf_env *env);
|
||||
int perf_env__nr_pmu_mappings(struct perf_env *env);
|
||||
const char *perf_env__pmu_mappings(struct perf_env *env);
|
||||
|
||||
int perf_env__read_cpu_topology_map(struct perf_env *env);
|
||||
|
||||
void cpu_cache_level__free(struct cpu_cache_level *cache);
|
||||
|
||||
const char *perf_env__arch(struct perf_env *env);
|
||||
const char *perf_env__cpuid(struct perf_env *env);
|
||||
const char *perf_env__raw_arch(struct perf_env *env);
|
||||
int perf_env__nr_cpus_avail(struct perf_env *env);
|
||||
|
||||
|
@ -333,11 +333,11 @@ error_free:
|
||||
goto out;
|
||||
}
|
||||
|
||||
static int evsel__copy_config_terms(struct evsel *dst, struct evsel *src)
|
||||
int copy_config_terms(struct list_head *dst, struct list_head *src)
|
||||
{
|
||||
struct evsel_config_term *pos, *tmp;
|
||||
|
||||
list_for_each_entry(pos, &src->config_terms, list) {
|
||||
list_for_each_entry(pos, src, list) {
|
||||
tmp = malloc(sizeof(*tmp));
|
||||
if (tmp == NULL)
|
||||
return -ENOMEM;
|
||||
@ -350,11 +350,16 @@ static int evsel__copy_config_terms(struct evsel *dst, struct evsel *src)
|
||||
return -ENOMEM;
|
||||
}
|
||||
}
|
||||
list_add_tail(&tmp->list, &dst->config_terms);
|
||||
list_add_tail(&tmp->list, dst);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int evsel__copy_config_terms(struct evsel *dst, struct evsel *src)
|
||||
{
|
||||
return copy_config_terms(&dst->config_terms, &src->config_terms);
|
||||
}
|
||||
|
||||
/**
|
||||
* evsel__clone - create a new evsel copied from @orig
|
||||
* @orig: original evsel
|
||||
@ -1385,11 +1390,11 @@ int evsel__disable(struct evsel *evsel)
|
||||
return err;
|
||||
}
|
||||
|
||||
static void evsel__free_config_terms(struct evsel *evsel)
|
||||
void free_config_terms(struct list_head *config_terms)
|
||||
{
|
||||
struct evsel_config_term *term, *h;
|
||||
|
||||
list_for_each_entry_safe(term, h, &evsel->config_terms, list) {
|
||||
list_for_each_entry_safe(term, h, config_terms, list) {
|
||||
list_del_init(&term->list);
|
||||
if (term->free_str)
|
||||
zfree(&term->val.str);
|
||||
@ -1397,6 +1402,11 @@ static void evsel__free_config_terms(struct evsel *evsel)
|
||||
}
|
||||
}
|
||||
|
||||
static void evsel__free_config_terms(struct evsel *evsel)
|
||||
{
|
||||
free_config_terms(&evsel->config_terms);
|
||||
}
|
||||
|
||||
void evsel__exit(struct evsel *evsel)
|
||||
{
|
||||
assert(list_empty(&evsel->core.node));
|
||||
|
@ -213,6 +213,9 @@ static inline struct evsel *evsel__new(struct perf_event_attr *attr)
|
||||
struct evsel *evsel__clone(struct evsel *orig);
|
||||
struct evsel *evsel__newtp_idx(const char *sys, const char *name, int idx);
|
||||
|
||||
int copy_config_terms(struct list_head *dst, struct list_head *src);
|
||||
void free_config_terms(struct list_head *config_terms);
|
||||
|
||||
/*
|
||||
* Returns pointer with encoded error via <linux/err.h> interface.
|
||||
*/
|
||||
|
@ -76,12 +76,16 @@ static int add_hw_hybrid(struct parse_events_state *parse_state,
|
||||
int ret;
|
||||
|
||||
perf_pmu__for_each_hybrid_pmu(pmu) {
|
||||
LIST_HEAD(terms);
|
||||
|
||||
if (pmu_cmp(parse_state, pmu))
|
||||
continue;
|
||||
|
||||
copy_config_terms(&terms, config_terms);
|
||||
ret = create_event_hybrid(PERF_TYPE_HARDWARE,
|
||||
&parse_state->idx, list, attr, name,
|
||||
config_terms, pmu);
|
||||
&terms, pmu);
|
||||
free_config_terms(&terms);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
@ -115,11 +119,15 @@ static int add_raw_hybrid(struct parse_events_state *parse_state,
|
||||
int ret;
|
||||
|
||||
perf_pmu__for_each_hybrid_pmu(pmu) {
|
||||
LIST_HEAD(terms);
|
||||
|
||||
if (pmu_cmp(parse_state, pmu))
|
||||
continue;
|
||||
|
||||
copy_config_terms(&terms, config_terms);
|
||||
ret = create_raw_event_hybrid(&parse_state->idx, list, attr,
|
||||
name, config_terms, pmu);
|
||||
name, &terms, pmu);
|
||||
free_config_terms(&terms);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
@ -165,11 +173,15 @@ int parse_events__add_cache_hybrid(struct list_head *list, int *idx,
|
||||
|
||||
*hybrid = true;
|
||||
perf_pmu__for_each_hybrid_pmu(pmu) {
|
||||
LIST_HEAD(terms);
|
||||
|
||||
if (pmu_cmp(parse_state, pmu))
|
||||
continue;
|
||||
|
||||
copy_config_terms(&terms, config_terms);
|
||||
ret = create_event_hybrid(PERF_TYPE_HW_CACHE, idx, list,
|
||||
attr, name, config_terms, pmu);
|
||||
attr, name, &terms, pmu);
|
||||
free_config_terms(&terms);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
@ -387,7 +387,7 @@ __add_event(struct list_head *list, int *idx,
|
||||
evsel->name = strdup(name);
|
||||
|
||||
if (config_terms)
|
||||
list_splice(config_terms, &evsel->config_terms);
|
||||
list_splice_init(config_terms, &evsel->config_terms);
|
||||
|
||||
if (list)
|
||||
list_add_tail(&evsel->core.node, list);
|
||||
@ -535,9 +535,12 @@ int parse_events_add_cache(struct list_head *list, int *idx,
|
||||
config_name ? : name, &config_terms,
|
||||
&hybrid, parse_state);
|
||||
if (hybrid)
|
||||
return ret;
|
||||
goto out_free_terms;
|
||||
|
||||
return add_event(list, idx, &attr, config_name ? : name, &config_terms);
|
||||
ret = add_event(list, idx, &attr, config_name ? : name, &config_terms);
|
||||
out_free_terms:
|
||||
free_config_terms(&config_terms);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void tracepoint_error(struct parse_events_error *e, int err,
|
||||
@ -1457,10 +1460,13 @@ int parse_events_add_numeric(struct parse_events_state *parse_state,
|
||||
get_config_name(head_config),
|
||||
&config_terms, &hybrid);
|
||||
if (hybrid)
|
||||
return ret;
|
||||
goto out_free_terms;
|
||||
|
||||
return add_event(list, &parse_state->idx, &attr,
|
||||
get_config_name(head_config), &config_terms);
|
||||
ret = add_event(list, &parse_state->idx, &attr,
|
||||
get_config_name(head_config), &config_terms);
|
||||
out_free_terms:
|
||||
free_config_terms(&config_terms);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int parse_events_add_tool(struct parse_events_state *parse_state,
|
||||
@ -1608,14 +1614,7 @@ int parse_events_add_pmu(struct parse_events_state *parse_state,
|
||||
}
|
||||
|
||||
if (!parse_state->fake_pmu && perf_pmu__config(pmu, &attr, head_config, parse_state->error)) {
|
||||
struct evsel_config_term *pos, *tmp;
|
||||
|
||||
list_for_each_entry_safe(pos, tmp, &config_terms, list) {
|
||||
list_del_init(&pos->list);
|
||||
if (pos->free_str)
|
||||
zfree(&pos->val.str);
|
||||
free(pos);
|
||||
}
|
||||
free_config_terms(&config_terms);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
|
@ -137,6 +137,9 @@ int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr,
|
||||
PRINT_ATTRf(cgroup, p_unsigned);
|
||||
PRINT_ATTRf(text_poke, p_unsigned);
|
||||
PRINT_ATTRf(build_id, p_unsigned);
|
||||
PRINT_ATTRf(inherit_thread, p_unsigned);
|
||||
PRINT_ATTRf(remove_on_exec, p_unsigned);
|
||||
PRINT_ATTRf(sigtrap, p_unsigned);
|
||||
|
||||
PRINT_ATTRn("{ wakeup_events, wakeup_watermark }", wakeup_events, p_unsigned);
|
||||
PRINT_ATTRf(bp_type, p_unsigned);
|
||||
@ -150,7 +153,7 @@ int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr,
|
||||
PRINT_ATTRf(aux_watermark, p_unsigned);
|
||||
PRINT_ATTRf(sample_max_stack, p_unsigned);
|
||||
PRINT_ATTRf(aux_sample_size, p_unsigned);
|
||||
PRINT_ATTRf(text_poke, p_unsigned);
|
||||
PRINT_ATTRf(sig_data, p_unsigned);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
@ -1,8 +1,10 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
|
||||
#include <string.h>
|
||||
#include <linux/string.h>
|
||||
#include "evlist.h"
|
||||
#include "env.h"
|
||||
#include "header.h"
|
||||
#include "sample-raw.h"
|
||||
|
||||
/*
|
||||
@ -12,7 +14,13 @@
|
||||
void evlist__init_trace_event_sample_raw(struct evlist *evlist)
|
||||
{
|
||||
const char *arch_pf = perf_env__arch(evlist->env);
|
||||
const char *cpuid = perf_env__cpuid(evlist->env);
|
||||
|
||||
if (arch_pf && !strcmp("s390", arch_pf))
|
||||
evlist->trace_event_sample_raw = evlist__s390_sample_raw;
|
||||
else if (arch_pf && !strcmp("x86", arch_pf) &&
|
||||
cpuid && strstarts(cpuid, "AuthenticAMD") &&
|
||||
evlist__has_amd_ibs(evlist)) {
|
||||
evlist->trace_event_sample_raw = evlist__amd_sample_raw;
|
||||
}
|
||||
}
|
||||
|
@ -6,6 +6,10 @@ struct evlist;
|
||||
union perf_event;
|
||||
struct perf_sample;
|
||||
|
||||
void evlist__s390_sample_raw(struct evlist *evlist, union perf_event *event, struct perf_sample *sample);
|
||||
void evlist__s390_sample_raw(struct evlist *evlist, union perf_event *event,
|
||||
struct perf_sample *sample);
|
||||
bool evlist__has_amd_ibs(struct evlist *evlist);
|
||||
void evlist__amd_sample_raw(struct evlist *evlist, union perf_event *event,
|
||||
struct perf_sample *sample);
|
||||
void evlist__init_trace_event_sample_raw(struct evlist *evlist);
|
||||
#endif /* __PERF_EVLIST_H */
|
||||
|
@ -1581,10 +1581,6 @@ int dso__load_bfd_symbols(struct dso *dso, const char *debugfile)
|
||||
if (bfd_get_flavour(abfd) == bfd_target_elf_flavour)
|
||||
goto out_close;
|
||||
|
||||
section = bfd_get_section_by_name(abfd, ".text");
|
||||
if (section)
|
||||
dso->text_offset = section->vma - section->filepos;
|
||||
|
||||
symbols_size = bfd_get_symtab_upper_bound(abfd);
|
||||
if (symbols_size == 0) {
|
||||
bfd_close(abfd);
|
||||
@ -1602,6 +1598,22 @@ int dso__load_bfd_symbols(struct dso *dso, const char *debugfile)
|
||||
if (symbols_count < 0)
|
||||
goto out_free;
|
||||
|
||||
section = bfd_get_section_by_name(abfd, ".text");
|
||||
if (section) {
|
||||
for (i = 0; i < symbols_count; ++i) {
|
||||
if (!strcmp(bfd_asymbol_name(symbols[i]), "__ImageBase") ||
|
||||
!strcmp(bfd_asymbol_name(symbols[i]), "__image_base__"))
|
||||
break;
|
||||
}
|
||||
if (i < symbols_count) {
|
||||
/* PE symbols can only have 4 bytes, so use .text high bits */
|
||||
dso->text_offset = section->vma - (u32)section->vma;
|
||||
dso->text_offset += (u32)bfd_asymbol_value(symbols[i]);
|
||||
} else {
|
||||
dso->text_offset = section->vma - section->filepos;
|
||||
}
|
||||
}
|
||||
|
||||
qsort(symbols, symbols_count, sizeof(asymbol *), bfd_symbols__cmpvalue);
|
||||
|
||||
#ifdef bfd_get_section
|
||||
|
Loading…
x
Reference in New Issue
Block a user