diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c index 0593c6e636c6..0d11f301fd72 100644 --- a/tools/perf/builtin-lock.c +++ b/tools/perf/builtin-lock.c @@ -509,6 +509,34 @@ alloc_failed: return NULL; } +bool match_callstack_filter(struct machine *machine, u64 *callstack) +{ + struct map *kmap; + struct symbol *sym; + u64 ip; + + if (list_empty(&callstack_filters)) + return true; + + for (int i = 0; i < max_stack_depth; i++) { + struct callstack_filter *filter; + + if (!callstack || !callstack[i]) + break; + + ip = callstack[i]; + sym = machine__find_kernel_symbol(machine, ip, &kmap); + if (sym == NULL) + continue; + + list_for_each_entry(filter, &callstack_filters, list) { + if (strstr(sym->name, filter->name)) + return true; + } + } + return false; +} + struct trace_lock_handler { /* it's used on CONFIG_LOCKDEP */ int (*acquire_event)(struct evsel *evsel, @@ -1070,12 +1098,6 @@ static int report_lock_contention_begin_event(struct evsel *evsel, ls = lock_stat_findnew(key, name, flags); if (!ls) return -ENOMEM; - - if (aggr_mode == LOCK_AGGR_CALLER && needs_callstack()) { - ls->callstack = get_callstack(sample, max_stack_depth); - if (ls->callstack == NULL) - return -ENOMEM; - } } if (filters.nr_types) { @@ -1106,6 +1128,22 @@ static int report_lock_contention_begin_event(struct evsel *evsel, return 0; } + if (needs_callstack()) { + u64 *callstack = get_callstack(sample, max_stack_depth); + if (callstack == NULL) + return -ENOMEM; + + if (!match_callstack_filter(machine, callstack)) { + free(callstack); + return 0; + } + + if (ls->callstack == NULL) + ls->callstack = callstack; + else + free(callstack); + } + ts = thread_stat_findnew(sample->tid); if (!ts) return -ENOMEM; @@ -1606,31 +1644,6 @@ static void print_contention_result(struct lock_contention *con) if (!st->wait_time_total) continue; - if (aggr_mode == LOCK_AGGR_CALLER && !list_empty(&callstack_filters)) { - struct map *kmap; - struct symbol *sym; - u64 ip; - - for (int i = 0; i < max_stack_depth; i++) { - struct callstack_filter *filter; - - if (!st->callstack || !st->callstack[i]) - break; - - ip = st->callstack[i]; - sym = machine__find_kernel_symbol(con->machine, ip, &kmap); - if (sym == NULL) - continue; - - list_for_each_entry(filter, &callstack_filters, list) { - if (strstr(sym->name, filter->name)) - goto found; - } - } - continue; - } - -found: list_for_each_entry(key, &lock_keys, list) { key->print(key, st); pr_info(" "); diff --git a/tools/perf/util/bpf_lock_contention.c b/tools/perf/util/bpf_lock_contention.c index ead2898ba377..72cf81114982 100644 --- a/tools/perf/util/bpf_lock_contention.c +++ b/tools/perf/util/bpf_lock_contention.c @@ -34,13 +34,15 @@ int lock_contention_prepare(struct lock_contention *con) bpf_map__set_max_entries(skel->maps.lock_stat, con->map_nr_entries); bpf_map__set_max_entries(skel->maps.tstamp, con->map_nr_entries); - if (con->aggr_mode == LOCK_AGGR_TASK) { + if (con->aggr_mode == LOCK_AGGR_TASK) bpf_map__set_max_entries(skel->maps.task_data, con->map_nr_entries); - bpf_map__set_max_entries(skel->maps.stacks, 1); - } else { + else bpf_map__set_max_entries(skel->maps.task_data, 1); + + if (con->save_callstack) bpf_map__set_max_entries(skel->maps.stacks, con->map_nr_entries); - } + else + bpf_map__set_max_entries(skel->maps.stacks, 1); if (target__has_cpu(target)) ncpus = perf_cpu_map__nr(evlist->core.user_requested_cpus); @@ -146,6 +148,7 @@ int lock_contention_prepare(struct lock_contention *con) /* these don't work well if in the rodata section */ skel->bss->stack_skip = con->stack_skip; skel->bss->aggr_mode = con->aggr_mode; + skel->bss->needs_callstack = con->save_callstack; lock_contention_bpf__attach(skel); return 0; @@ -177,7 +180,7 @@ static const char *lock_contention_get_name(struct lock_contention *con, if (con->aggr_mode == LOCK_AGGR_TASK) { struct contention_task_data task; - int pid = key->aggr_key; + int pid = key->pid; int task_fd = bpf_map__fd(skel->maps.task_data); /* do not update idle comm which contains CPU number */ @@ -194,7 +197,7 @@ static const char *lock_contention_get_name(struct lock_contention *con, } if (con->aggr_mode == LOCK_AGGR_ADDR) { - sym = machine__find_kernel_symbol(machine, key->aggr_key, &kmap); + sym = machine__find_kernel_symbol(machine, key->lock_addr, &kmap); if (sym) name = sym->name; return name; @@ -255,20 +258,35 @@ int lock_contention_read(struct lock_contention *con) prev_key = NULL; while (!bpf_map_get_next_key(fd, prev_key, &key)) { - s32 stack_id; + s64 ls_key; const char *name; /* to handle errors in the loop body */ err = -1; bpf_map_lookup_elem(fd, &key, &data); - if (con->save_callstack) { - stack_id = key.aggr_key; - bpf_map_lookup_elem(stack, &stack_id, stack_trace); + bpf_map_lookup_elem(stack, &key.stack_id, stack_trace); + + if (!match_callstack_filter(machine, stack_trace)) + goto next; } - st = lock_stat_find(key.aggr_key); + switch (con->aggr_mode) { + case LOCK_AGGR_CALLER: + ls_key = key.stack_id; + break; + case LOCK_AGGR_TASK: + ls_key = key.pid; + break; + case LOCK_AGGR_ADDR: + ls_key = key.lock_addr; + break; + default: + goto next; + } + + st = lock_stat_find(ls_key); if (st != NULL) { st->wait_time_total += data.total_time; if (st->wait_time_max < data.max_time) @@ -283,7 +301,7 @@ int lock_contention_read(struct lock_contention *con) } name = lock_contention_get_name(con, &key, stack_trace); - st = lock_stat_findnew(key.aggr_key, name, data.flags); + st = lock_stat_findnew(ls_key, name, data.flags); if (st == NULL) break; @@ -295,8 +313,6 @@ int lock_contention_read(struct lock_contention *con) if (data.count) st->avg_wait_time = data.total_time / data.count; - st->flags = data.flags; - if (con->save_callstack) { st->callstack = memdup(stack_trace, stack_size); if (st->callstack == NULL) diff --git a/tools/perf/util/bpf_skel/lock_contention.bpf.c b/tools/perf/util/bpf_skel/lock_contention.bpf.c index ad0ca5d50557..7ce276ed987e 100644 --- a/tools/perf/util/bpf_skel/lock_contention.bpf.c +++ b/tools/perf/util/bpf_skel/lock_contention.bpf.c @@ -82,6 +82,7 @@ int has_cpu; int has_task; int has_type; int has_addr; +int needs_callstack; int stack_skip; /* determine the key of lock stat */ @@ -173,7 +174,7 @@ int contention_begin(u64 *ctx) pelem->lock = (__u64)ctx[0]; pelem->flags = (__u32)ctx[1]; - if (aggr_mode == LOCK_AGGR_CALLER) { + if (needs_callstack) { pelem->stack_id = bpf_get_stackid(ctx, &stacks, BPF_F_FAST_STACK_CMP | stack_skip); if (pelem->stack_id < 0) @@ -188,7 +189,7 @@ int contention_end(u64 *ctx) { __u32 pid; struct tstamp_data *pelem; - struct contention_key key; + struct contention_key key = {}; struct contention_data *data; __u64 duration; @@ -204,14 +205,18 @@ int contention_end(u64 *ctx) switch (aggr_mode) { case LOCK_AGGR_CALLER: - key.aggr_key = pelem->stack_id; + key.stack_id = pelem->stack_id; break; case LOCK_AGGR_TASK: - key.aggr_key = pid; + key.pid = pid; update_task_data(pid); + if (needs_callstack) + key.stack_id = pelem->stack_id; break; case LOCK_AGGR_ADDR: - key.aggr_key = pelem->lock; + key.lock_addr = pelem->lock; + if (needs_callstack) + key.stack_id = pelem->stack_id; break; default: /* should not happen */ diff --git a/tools/perf/util/bpf_skel/lock_data.h b/tools/perf/util/bpf_skel/lock_data.h index ce71cf1a7e1e..3d35fd4407ac 100644 --- a/tools/perf/util/bpf_skel/lock_data.h +++ b/tools/perf/util/bpf_skel/lock_data.h @@ -4,7 +4,9 @@ #define UTIL_BPF_SKEL_LOCK_DATA_H struct contention_key { - u64 aggr_key; /* can be stack_id, pid or lock addr */ + u32 stack_id; + u32 pid; + u64 lock_addr; }; #define TASK_COMM_LEN 16 diff --git a/tools/perf/util/lock-contention.h b/tools/perf/util/lock-contention.h index 39d5bfc77f4e..e5fc036108ec 100644 --- a/tools/perf/util/lock-contention.h +++ b/tools/perf/util/lock-contention.h @@ -68,6 +68,8 @@ struct lock_stat { struct lock_stat *lock_stat_find(u64 addr); struct lock_stat *lock_stat_findnew(u64 addr, const char *name, int flags); +bool match_callstack_filter(struct machine *machine, u64 *callstack); + /* * struct lock_seq_stat: * Place to put on state of one lock sequence