8c98420987
Use BPF to collect statistics on the CPU usage based on perf BPF skeletons. Example usage: # perf kwork top -h Usage: perf kwork top [<options>] -b, --use-bpf Use BPF to measure task cpu usage -C, --cpu <cpu> list of cpus to profile -i, --input <file> input file name -n, --name <name> event name to profile -s, --sort <key[,key2...]> sort by key(s): rate, runtime, tid --time <str> Time span for analysis (start,stop) # # perf kwork -k sched top -b Starting trace, Hit <Ctrl+C> to stop and report ^C Total : 160702.425 ms, 8 cpus %Cpu(s): 36.00% id, 0.00% hi, 0.00% si %Cpu0 [|||||||||||||||||| 61.66%] %Cpu1 [|||||||||||||||||| 61.27%] %Cpu2 [||||||||||||||||||| 66.40%] %Cpu3 [|||||||||||||||||| 61.28%] %Cpu4 [|||||||||||||||||| 61.82%] %Cpu5 [||||||||||||||||||||||| 77.41%] %Cpu6 [|||||||||||||||||| 61.73%] %Cpu7 [|||||||||||||||||| 63.25%] PID SPID %CPU RUNTIME COMMMAND ------------------------------------------------------------- 0 0 38.72 8089.463 ms [swapper/1] 0 0 38.71 8084.547 ms [swapper/3] 0 0 38.33 8007.532 ms [swapper/0] 0 0 38.26 7992.985 ms [swapper/6] 0 0 38.17 7971.865 ms [swapper/4] 0 0 36.74 7447.765 ms [swapper/7] 0 0 33.59 6486.942 ms [swapper/2] 0 0 22.58 3771.268 ms [swapper/5] 9545 9351 2.48 447.136 ms sched-messaging 9574 9351 2.09 418.583 ms sched-messaging 9724 9351 2.05 372.407 ms sched-messaging 9531 9351 2.01 368.804 ms sched-messaging 9512 9351 2.00 362.250 ms sched-messaging 9514 9351 1.95 357.767 ms sched-messaging 9538 9351 1.86 384.476 ms sched-messaging 9712 9351 1.84 386.490 ms sched-messaging 9723 9351 1.83 380.021 ms sched-messaging 9722 9351 1.82 382.738 ms sched-messaging 9517 9351 1.81 354.794 ms sched-messaging 9559 9351 1.79 344.305 ms sched-messaging 9725 9351 1.77 365.315 ms sched-messaging <SNIP> # perf kwork -k sched top -b -n perf Starting trace, Hit <Ctrl+C> to stop and report ^C Total : 151563.332 ms, 8 cpus %Cpu(s): 26.49% id, 0.00% hi, 0.00% si %Cpu0 [ 0.01%] %Cpu1 [ 0.00%] %Cpu2 [ 0.00%] %Cpu3 [ 0.00%] %Cpu4 [ 0.00%] %Cpu5 [ 0.00%] %Cpu6 [ 0.00%] %Cpu7 [ 0.00%] PID SPID %CPU RUNTIME COMMMAND ------------------------------------------------------------- 9754 9754 0.01 2.303 ms perf # # perf kwork -k sched top -b -C 2,3,4 Starting trace, Hit <Ctrl+C> to stop and report ^C Total : 48016.721 ms, 3 cpus %Cpu(s): 27.82% id, 0.00% hi, 0.00% si %Cpu2 [|||||||||||||||||||||| 74.68%] %Cpu3 [||||||||||||||||||||| 71.06%] %Cpu4 [||||||||||||||||||||| 70.91%] PID SPID %CPU RUNTIME COMMMAND ------------------------------------------------------------- 0 0 29.08 4734.998 ms [swapper/4] 0 0 28.93 4710.029 ms [swapper/3] 0 0 25.31 3912.363 ms [swapper/2] 10248 10158 1.62 264.931 ms sched-messaging 10253 10158 1.62 265.136 ms sched-messaging 10158 10158 1.60 263.013 ms bash 10360 10158 1.49 243.639 ms sched-messaging 10413 10158 1.48 238.604 ms sched-messaging 10531 10158 1.47 234.067 ms sched-messaging 10400 10158 1.47 240.631 ms sched-messaging 10355 10158 1.47 230.586 ms sched-messaging 10377 10158 1.43 234.835 ms sched-messaging 10526 10158 1.42 232.045 ms sched-messaging 10298 10158 1.41 222.396 ms sched-messaging 10410 10158 1.38 221.853 ms sched-messaging 10364 10158 1.38 226.042 ms sched-messaging 10480 10158 1.36 213.633 ms sched-messaging 10370 10158 1.36 223.620 ms sched-messaging 10553 10158 1.34 217.169 ms sched-messaging 10291 10158 1.34 211.516 ms sched-messaging 10251 10158 1.34 218.813 ms sched-messaging 10522 10158 1.33 218.498 ms sched-messaging 10288 10158 1.33 216.787 ms sched-messaging <SNIP> Reviewed-by: Ian Rogers <irogers@google.com> Signed-off-by: Yang Jihong <yangjihong1@huawei.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kan Liang <kan.liang@linux.intel.com> Cc: Mark Rutland <mark.rutland@arm.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Ravi Bangoria <ravi.bangoria@amd.com> Cc: Sandipan Das <sandipan.das@amd.com> Link: https://lore.kernel.org/r/20230812084917.169338-15-yangjihong1@huawei.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
315 lines
8.7 KiB
C
315 lines
8.7 KiB
C
#ifndef PERF_UTIL_KWORK_H
|
|
#define PERF_UTIL_KWORK_H
|
|
|
|
#include "util/tool.h"
|
|
#include "util/time-utils.h"
|
|
|
|
#include <linux/bitmap.h>
|
|
#include <linux/list.h>
|
|
#include <linux/rbtree.h>
|
|
#include <linux/types.h>
|
|
|
|
struct perf_sample;
|
|
struct perf_session;
|
|
|
|
enum kwork_class_type {
|
|
KWORK_CLASS_IRQ,
|
|
KWORK_CLASS_SOFTIRQ,
|
|
KWORK_CLASS_WORKQUEUE,
|
|
KWORK_CLASS_SCHED,
|
|
KWORK_CLASS_MAX,
|
|
};
|
|
|
|
enum kwork_report_type {
|
|
KWORK_REPORT_RUNTIME,
|
|
KWORK_REPORT_LATENCY,
|
|
KWORK_REPORT_TIMEHIST,
|
|
KWORK_REPORT_TOP,
|
|
};
|
|
|
|
enum kwork_trace_type {
|
|
KWORK_TRACE_RAISE,
|
|
KWORK_TRACE_ENTRY,
|
|
KWORK_TRACE_EXIT,
|
|
KWORK_TRACE_MAX,
|
|
};
|
|
|
|
/*
|
|
* data structure:
|
|
*
|
|
* +==================+ +============+ +======================+
|
|
* | class | | work | | atom |
|
|
* +==================+ +============+ +======================+
|
|
* +------------+ | +-----+ | | +------+ | | +-------+ +-----+ |
|
|
* | perf_kwork | +-> | irq | --------|+-> | eth0 | --+-> | raise | - | ... | --+ +-----------+
|
|
* +-----+------+ || +-----+ ||| +------+ ||| +-------+ +-----+ | | | |
|
|
* | || ||| ||| | +-> | atom_page |
|
|
* | || ||| ||| +-------+ +-----+ | | |
|
|
* | class_list ||| |+-> | entry | - | ... | ----> | |
|
|
* | || ||| ||| +-------+ +-----+ | | |
|
|
* | || ||| ||| | +-> | |
|
|
* | || ||| ||| +-------+ +-----+ | | | |
|
|
* | || ||| |+-> | exit | - | ... | --+ +-----+-----+
|
|
* | || ||| | | +-------+ +-----+ | |
|
|
* | || ||| | | | |
|
|
* | || ||| +-----+ | | | |
|
|
* | || |+-> | ... | | | | |
|
|
* | || | | +-----+ | | | |
|
|
* | || | | | | | |
|
|
* | || +---------+ | | +-----+ | | +-------+ +-----+ | |
|
|
* | +-> | softirq | -------> | RCU | ---+-> | raise | - | ... | --+ +-----+-----+
|
|
* | || +---------+ | | +-----+ ||| +-------+ +-----+ | | | |
|
|
* | || | | ||| | +-> | atom_page |
|
|
* | || | | ||| +-------+ +-----+ | | |
|
|
* | || | | |+-> | entry | - | ... | ----> | |
|
|
* | || | | ||| +-------+ +-----+ | | |
|
|
* | || | | ||| | +-> | |
|
|
* | || | | ||| +-------+ +-----+ | | | |
|
|
* | || | | |+-> | exit | - | ... | --+ +-----+-----+
|
|
* | || | | | | +-------+ +-----+ | |
|
|
* | || | | | | | |
|
|
* | || +-----------+ | | +-----+ | | | |
|
|
* | +-> | workqueue | -----> | ... | | | | |
|
|
* | | +-----------+ | | +-----+ | | | |
|
|
* | +==================+ +============+ +======================+ |
|
|
* | |
|
|
* +----> atom_page_list ---------------------------------------------------------+
|
|
*
|
|
*/
|
|
|
|
struct kwork_atom {
|
|
struct list_head list;
|
|
u64 time;
|
|
struct kwork_atom *prev;
|
|
|
|
void *page_addr;
|
|
unsigned long bit_inpage;
|
|
};
|
|
|
|
#define NR_ATOM_PER_PAGE 128
|
|
struct kwork_atom_page {
|
|
struct list_head list;
|
|
struct kwork_atom atoms[NR_ATOM_PER_PAGE];
|
|
DECLARE_BITMAP(bitmap, NR_ATOM_PER_PAGE);
|
|
};
|
|
|
|
struct perf_kwork;
|
|
struct kwork_class;
|
|
struct kwork_work {
|
|
/*
|
|
* class field
|
|
*/
|
|
struct rb_node node;
|
|
struct kwork_class *class;
|
|
|
|
/*
|
|
* work field
|
|
*/
|
|
u64 id;
|
|
int cpu;
|
|
char *name;
|
|
|
|
/*
|
|
* atom field
|
|
*/
|
|
u64 nr_atoms;
|
|
struct list_head atom_list[KWORK_TRACE_MAX];
|
|
|
|
/*
|
|
* runtime report
|
|
*/
|
|
u64 max_runtime;
|
|
u64 max_runtime_start;
|
|
u64 max_runtime_end;
|
|
u64 total_runtime;
|
|
|
|
/*
|
|
* latency report
|
|
*/
|
|
u64 max_latency;
|
|
u64 max_latency_start;
|
|
u64 max_latency_end;
|
|
u64 total_latency;
|
|
|
|
/*
|
|
* top report
|
|
*/
|
|
u32 cpu_usage;
|
|
u32 tgid;
|
|
bool is_kthread;
|
|
};
|
|
|
|
struct kwork_class {
|
|
struct list_head list;
|
|
const char *name;
|
|
enum kwork_class_type type;
|
|
|
|
unsigned int nr_tracepoints;
|
|
const struct evsel_str_handler *tp_handlers;
|
|
|
|
struct rb_root_cached work_root;
|
|
|
|
int (*class_init)(struct kwork_class *class,
|
|
struct perf_session *session);
|
|
|
|
void (*work_init)(struct perf_kwork *kwork,
|
|
struct kwork_class *class,
|
|
struct kwork_work *work,
|
|
enum kwork_trace_type src_type,
|
|
struct evsel *evsel,
|
|
struct perf_sample *sample,
|
|
struct machine *machine);
|
|
|
|
void (*work_name)(struct kwork_work *work,
|
|
char *buf, int len);
|
|
};
|
|
|
|
struct trace_kwork_handler {
|
|
int (*raise_event)(struct perf_kwork *kwork,
|
|
struct kwork_class *class, struct evsel *evsel,
|
|
struct perf_sample *sample, struct machine *machine);
|
|
|
|
int (*entry_event)(struct perf_kwork *kwork,
|
|
struct kwork_class *class, struct evsel *evsel,
|
|
struct perf_sample *sample, struct machine *machine);
|
|
|
|
int (*exit_event)(struct perf_kwork *kwork,
|
|
struct kwork_class *class, struct evsel *evsel,
|
|
struct perf_sample *sample, struct machine *machine);
|
|
|
|
int (*sched_switch_event)(struct perf_kwork *kwork,
|
|
struct kwork_class *class, struct evsel *evsel,
|
|
struct perf_sample *sample, struct machine *machine);
|
|
};
|
|
|
|
struct __top_cpus_runtime {
|
|
u64 load;
|
|
u64 idle;
|
|
u64 irq;
|
|
u64 softirq;
|
|
u64 total;
|
|
};
|
|
|
|
struct kwork_top_stat {
|
|
DECLARE_BITMAP(all_cpus_bitmap, MAX_NR_CPUS);
|
|
struct __top_cpus_runtime *cpus_runtime;
|
|
};
|
|
|
|
struct perf_kwork {
|
|
/*
|
|
* metadata
|
|
*/
|
|
struct perf_tool tool;
|
|
struct list_head class_list;
|
|
struct list_head atom_page_list;
|
|
struct list_head sort_list, cmp_id;
|
|
struct rb_root_cached sorted_work_root;
|
|
const struct trace_kwork_handler *tp_handler;
|
|
|
|
/*
|
|
* profile filters
|
|
*/
|
|
const char *profile_name;
|
|
|
|
const char *cpu_list;
|
|
DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
|
|
|
|
const char *time_str;
|
|
struct perf_time_interval ptime;
|
|
|
|
/*
|
|
* options for command
|
|
*/
|
|
bool force;
|
|
const char *event_list_str;
|
|
enum kwork_report_type report;
|
|
|
|
/*
|
|
* options for subcommand
|
|
*/
|
|
bool summary;
|
|
const char *sort_order;
|
|
bool show_callchain;
|
|
unsigned int max_stack;
|
|
bool use_bpf;
|
|
|
|
/*
|
|
* statistics
|
|
*/
|
|
u64 timestart;
|
|
u64 timeend;
|
|
|
|
unsigned long nr_events;
|
|
unsigned long nr_lost_chunks;
|
|
unsigned long nr_lost_events;
|
|
|
|
u64 all_runtime;
|
|
u64 all_count;
|
|
u64 nr_skipped_events[KWORK_TRACE_MAX + 1];
|
|
|
|
/*
|
|
* perf kwork top data
|
|
*/
|
|
struct kwork_top_stat top_stat;
|
|
};
|
|
|
|
struct kwork_work *perf_kwork_add_work(struct perf_kwork *kwork,
|
|
struct kwork_class *class,
|
|
struct kwork_work *key);
|
|
|
|
#ifdef HAVE_BPF_SKEL
|
|
|
|
int perf_kwork__trace_prepare_bpf(struct perf_kwork *kwork);
|
|
int perf_kwork__report_read_bpf(struct perf_kwork *kwork);
|
|
void perf_kwork__report_cleanup_bpf(void);
|
|
|
|
void perf_kwork__trace_start(void);
|
|
void perf_kwork__trace_finish(void);
|
|
|
|
int perf_kwork__top_prepare_bpf(struct perf_kwork *kwork);
|
|
int perf_kwork__top_read_bpf(struct perf_kwork *kwork);
|
|
void perf_kwork__top_cleanup_bpf(void);
|
|
|
|
void perf_kwork__top_start(void);
|
|
void perf_kwork__top_finish(void);
|
|
|
|
#else /* !HAVE_BPF_SKEL */
|
|
|
|
static inline int
|
|
perf_kwork__trace_prepare_bpf(struct perf_kwork *kwork __maybe_unused)
|
|
{
|
|
return -1;
|
|
}
|
|
|
|
static inline int
|
|
perf_kwork__report_read_bpf(struct perf_kwork *kwork __maybe_unused)
|
|
{
|
|
return -1;
|
|
}
|
|
|
|
static inline void perf_kwork__report_cleanup_bpf(void) {}
|
|
|
|
static inline void perf_kwork__trace_start(void) {}
|
|
static inline void perf_kwork__trace_finish(void) {}
|
|
|
|
static inline int
|
|
perf_kwork__top_prepare_bpf(struct perf_kwork *kwork __maybe_unused)
|
|
{
|
|
return -1;
|
|
}
|
|
|
|
static inline int
|
|
perf_kwork__top_read_bpf(struct perf_kwork *kwork __maybe_unused)
|
|
{
|
|
return -1;
|
|
}
|
|
|
|
static inline void perf_kwork__top_cleanup_bpf(void) {}
|
|
|
|
static inline void perf_kwork__top_start(void) {}
|
|
static inline void perf_kwork__top_finish(void) {}
|
|
|
|
#endif /* HAVE_BPF_SKEL */
|
|
|
|
#endif /* PERF_UTIL_KWORK_H */
|