tools/bpftool: add perf subcommand

The new command "bpftool perf [show | list]" will traverse
all processes under /proc, and if any fd is associated
with a perf event, it will print out related perf event
information. Documentation is also added.

Below is an example to show the results using bcc commands.
Running the following 4 bcc commands:
  kprobe:     trace.py '__x64_sys_nanosleep'
  kretprobe:  trace.py 'r::__x64_sys_nanosleep'
  tracepoint: trace.py 't:syscalls:sys_enter_nanosleep'
  uprobe:     trace.py 'p:/home/yhs/a.out:main'

The bpftool command line and result:

  $ bpftool perf
  pid 21711  fd 5: prog_id 5  kprobe  func __x64_sys_write  offset 0
  pid 21765  fd 5: prog_id 7  kretprobe  func __x64_sys_nanosleep  offset 0
  pid 21767  fd 5: prog_id 8  tracepoint  sys_enter_nanosleep
  pid 21800  fd 5: prog_id 9  uprobe  filename /home/yhs/a.out  offset 1159

  $ bpftool -j perf
  [{"pid":21711,"fd":5,"prog_id":5,"fd_type":"kprobe","func":"__x64_sys_write","offset":0}, \
   {"pid":21765,"fd":5,"prog_id":7,"fd_type":"kretprobe","func":"__x64_sys_nanosleep","offset":0}, \
   {"pid":21767,"fd":5,"prog_id":8,"fd_type":"tracepoint","tracepoint":"sys_enter_nanosleep"}, \
   {"pid":21800,"fd":5,"prog_id":9,"fd_type":"uprobe","filename":"/home/yhs/a.out","offset":1159}]

  $ bpftool prog
  5: kprobe  name probe___x64_sys  tag e495a0c82f2c7a8d  gpl
	  loaded_at 2018-05-15T04:46:37-0700  uid 0
	  xlated 200B  not jited  memlock 4096B  map_ids 4
  7: kprobe  name probe___x64_sys  tag f2fdee479a503abf  gpl
	  loaded_at 2018-05-15T04:48:32-0700  uid 0
	  xlated 200B  not jited  memlock 4096B  map_ids 7
  8: tracepoint  name tracepoint__sys  tag 5390badef2395fcf  gpl
	  loaded_at 2018-05-15T04:48:48-0700  uid 0
	  xlated 200B  not jited  memlock 4096B  map_ids 8
  9: kprobe  name probe_main_1  tag 0a87bdc2e2953b6d  gpl
	  loaded_at 2018-05-15T04:49:52-0700  uid 0
	  xlated 200B  not jited  memlock 4096B  map_ids 9

  $ ps ax | grep "python ./trace.py"
  21711 pts/0    T      0:03 python ./trace.py __x64_sys_write
  21765 pts/0    S+     0:00 python ./trace.py r::__x64_sys_nanosleep
  21767 pts/2    S+     0:00 python ./trace.py t:syscalls:sys_enter_nanosleep
  21800 pts/3    S+     0:00 python ./trace.py p:/home/yhs/a.out:main
  22374 pts/1    S+     0:00 grep --color=auto python ./trace.py

Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
This commit is contained in:
Yonghong Song 2018-05-24 11:21:58 -07:00 committed by Alexei Starovoitov
parent f699cf7aa4
commit b04df400c3
6 changed files with 343 additions and 2 deletions

View File

@ -0,0 +1,81 @@
================
bpftool-perf
================
-------------------------------------------------------------------------------
tool for inspection of perf related bpf prog attachments
-------------------------------------------------------------------------------
:Manual section: 8
SYNOPSIS
========
**bpftool** [*OPTIONS*] **perf** *COMMAND*
*OPTIONS* := { [{ **-j** | **--json** }] [{ **-p** | **--pretty** }] }
*COMMANDS* :=
{ **show** | **list** | **help** }
PERF COMMANDS
=============
| **bpftool** **perf { show | list }**
| **bpftool** **perf help**
DESCRIPTION
===========
**bpftool perf { show | list }**
List all raw_tracepoint, tracepoint, kprobe attachment in the system.
Output will start with process id and file descriptor in that process,
followed by bpf program id, attachment information, and attachment point.
The attachment point for raw_tracepoint/tracepoint is the trace probe name.
The attachment point for k[ret]probe is either symbol name and offset,
or a kernel virtual address.
The attachment point for u[ret]probe is the file name and the file offset.
**bpftool perf help**
Print short help message.
OPTIONS
=======
-h, --help
Print short generic help message (similar to **bpftool help**).
-v, --version
Print version number (similar to **bpftool version**).
-j, --json
Generate JSON output. For commands that cannot produce JSON, this
option has no effect.
-p, --pretty
Generate human-readable JSON output. Implies **-j**.
EXAMPLES
========
| **# bpftool perf**
::
pid 21711 fd 5: prog_id 5 kprobe func __x64_sys_write offset 0
pid 21765 fd 5: prog_id 7 kretprobe func __x64_sys_nanosleep offset 0
pid 21767 fd 5: prog_id 8 tracepoint sys_enter_nanosleep
pid 21800 fd 5: prog_id 9 uprobe filename /home/yhs/a.out offset 1159
|
| **# bpftool -j perf**
::
[{"pid":21711,"fd":5,"prog_id":5,"fd_type":"kprobe","func":"__x64_sys_write","offset":0}, \
{"pid":21765,"fd":5,"prog_id":7,"fd_type":"kretprobe","func":"__x64_sys_nanosleep","offset":0}, \
{"pid":21767,"fd":5,"prog_id":8,"fd_type":"tracepoint","tracepoint":"sys_enter_nanosleep"}, \
{"pid":21800,"fd":5,"prog_id":9,"fd_type":"uprobe","filename":"/home/yhs/a.out","offset":1159}]
SEE ALSO
========
**bpftool**\ (8), **bpftool-prog**\ (8), **bpftool-map**\ (8)

View File

@ -16,7 +16,7 @@ SYNOPSIS
**bpftool** **version**
*OBJECT* := { **map** | **program** | **cgroup** }
*OBJECT* := { **map** | **program** | **cgroup** | **perf** }
*OPTIONS* := { { **-V** | **--version** } | { **-h** | **--help** }
| { **-j** | **--json** } [{ **-p** | **--pretty** }] }
@ -30,6 +30,8 @@ SYNOPSIS
*CGROUP-COMMANDS* := { **show** | **list** | **attach** | **detach** | **help** }
*PERF-COMMANDS* := { **show** | **list** | **help** }
DESCRIPTION
===========
*bpftool* allows for inspection and simple modification of BPF objects
@ -56,3 +58,4 @@ OPTIONS
SEE ALSO
========
**bpftool-map**\ (8), **bpftool-prog**\ (8), **bpftool-cgroup**\ (8)
**bpftool-perf**\ (8)

View File

@ -448,6 +448,15 @@ _bpftool()
;;
esac
;;
perf)
case $command in
*)
[[ $prev == $object ]] && \
COMPREPLY=( $( compgen -W 'help \
show list' -- "$cur" ) )
;;
esac
;;
esac
} &&
complete -F _bpftool bpftool

View File

@ -87,7 +87,7 @@ static int do_help(int argc, char **argv)
" %s batch file FILE\n"
" %s version\n"
"\n"
" OBJECT := { prog | map | cgroup }\n"
" OBJECT := { prog | map | cgroup | perf }\n"
" " HELP_SPEC_OPTIONS "\n"
"",
bin_name, bin_name, bin_name);
@ -216,6 +216,7 @@ static const struct cmd cmds[] = {
{ "prog", do_prog },
{ "map", do_map },
{ "cgroup", do_cgroup },
{ "perf", do_perf },
{ "version", do_version },
{ 0 }
};

View File

@ -119,6 +119,7 @@ int do_prog(int argc, char **arg);
int do_map(int argc, char **arg);
int do_event_pipe(int argc, char **argv);
int do_cgroup(int argc, char **arg);
int do_perf(int argc, char **arg);
int prog_parse_fd(int *argc, char ***argv);
int map_parse_fd_and_info(int *argc, char ***argv, void *info, __u32 *info_len);

246
tools/bpf/bpftool/perf.c Normal file
View File

@ -0,0 +1,246 @@
// SPDX-License-Identifier: GPL-2.0+
// Copyright (C) 2018 Facebook
// Author: Yonghong Song <yhs@fb.com>
#define _GNU_SOURCE
#include <ctype.h>
#include <errno.h>
#include <fcntl.h>
#include <stdlib.h>
#include <string.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>
#include <ftw.h>
#include <bpf.h>
#include "main.h"
/* 0: undecided, 1: supported, 2: not supported */
static int perf_query_supported;
static bool has_perf_query_support(void)
{
__u64 probe_offset, probe_addr;
__u32 len, prog_id, fd_type;
char buf[256];
int fd;
if (perf_query_supported)
goto out;
fd = open(bin_name, O_RDONLY);
if (fd < 0) {
p_err("perf_query_support: %s", strerror(errno));
goto out;
}
/* the following query will fail as no bpf attachment,
* the expected errno is ENOTSUPP
*/
errno = 0;
len = sizeof(buf);
bpf_task_fd_query(getpid(), fd, 0, buf, &len, &prog_id,
&fd_type, &probe_offset, &probe_addr);
if (errno == 524 /* ENOTSUPP */) {
perf_query_supported = 1;
goto close_fd;
}
perf_query_supported = 2;
p_err("perf_query_support: %s", strerror(errno));
fprintf(stderr,
"HINT: non root or kernel doesn't support TASK_FD_QUERY\n");
close_fd:
close(fd);
out:
return perf_query_supported == 1;
}
static void print_perf_json(int pid, int fd, __u32 prog_id, __u32 fd_type,
char *buf, __u64 probe_offset, __u64 probe_addr)
{
jsonw_start_object(json_wtr);
jsonw_int_field(json_wtr, "pid", pid);
jsonw_int_field(json_wtr, "fd", fd);
jsonw_uint_field(json_wtr, "prog_id", prog_id);
switch (fd_type) {
case BPF_FD_TYPE_RAW_TRACEPOINT:
jsonw_string_field(json_wtr, "fd_type", "raw_tracepoint");
jsonw_string_field(json_wtr, "tracepoint", buf);
break;
case BPF_FD_TYPE_TRACEPOINT:
jsonw_string_field(json_wtr, "fd_type", "tracepoint");
jsonw_string_field(json_wtr, "tracepoint", buf);
break;
case BPF_FD_TYPE_KPROBE:
jsonw_string_field(json_wtr, "fd_type", "kprobe");
if (buf[0] != '\0') {
jsonw_string_field(json_wtr, "func", buf);
jsonw_lluint_field(json_wtr, "offset", probe_offset);
} else {
jsonw_lluint_field(json_wtr, "addr", probe_addr);
}
break;
case BPF_FD_TYPE_KRETPROBE:
jsonw_string_field(json_wtr, "fd_type", "kretprobe");
if (buf[0] != '\0') {
jsonw_string_field(json_wtr, "func", buf);
jsonw_lluint_field(json_wtr, "offset", probe_offset);
} else {
jsonw_lluint_field(json_wtr, "addr", probe_addr);
}
break;
case BPF_FD_TYPE_UPROBE:
jsonw_string_field(json_wtr, "fd_type", "uprobe");
jsonw_string_field(json_wtr, "filename", buf);
jsonw_lluint_field(json_wtr, "offset", probe_offset);
break;
case BPF_FD_TYPE_URETPROBE:
jsonw_string_field(json_wtr, "fd_type", "uretprobe");
jsonw_string_field(json_wtr, "filename", buf);
jsonw_lluint_field(json_wtr, "offset", probe_offset);
break;
}
jsonw_end_object(json_wtr);
}
static void print_perf_plain(int pid, int fd, __u32 prog_id, __u32 fd_type,
char *buf, __u64 probe_offset, __u64 probe_addr)
{
printf("pid %d fd %d: prog_id %u ", pid, fd, prog_id);
switch (fd_type) {
case BPF_FD_TYPE_RAW_TRACEPOINT:
printf("raw_tracepoint %s\n", buf);
break;
case BPF_FD_TYPE_TRACEPOINT:
printf("tracepoint %s\n", buf);
break;
case BPF_FD_TYPE_KPROBE:
if (buf[0] != '\0')
printf("kprobe func %s offset %llu\n", buf,
probe_offset);
else
printf("kprobe addr %llu\n", probe_addr);
break;
case BPF_FD_TYPE_KRETPROBE:
if (buf[0] != '\0')
printf("kretprobe func %s offset %llu\n", buf,
probe_offset);
else
printf("kretprobe addr %llu\n", probe_addr);
break;
case BPF_FD_TYPE_UPROBE:
printf("uprobe filename %s offset %llu\n", buf, probe_offset);
break;
case BPF_FD_TYPE_URETPROBE:
printf("uretprobe filename %s offset %llu\n", buf,
probe_offset);
break;
}
}
static int show_proc(const char *fpath, const struct stat *sb,
int tflag, struct FTW *ftwbuf)
{
__u64 probe_offset, probe_addr;
__u32 len, prog_id, fd_type;
int err, pid = 0, fd = 0;
const char *pch;
char buf[4096];
/* prefix always /proc */
pch = fpath + 5;
if (*pch == '\0')
return 0;
/* pid should be all numbers */
pch++;
while (isdigit(*pch)) {
pid = pid * 10 + *pch - '0';
pch++;
}
if (*pch == '\0')
return 0;
if (*pch != '/')
return FTW_SKIP_SUBTREE;
/* check /proc/<pid>/fd directory */
pch++;
if (strncmp(pch, "fd", 2))
return FTW_SKIP_SUBTREE;
pch += 2;
if (*pch == '\0')
return 0;
if (*pch != '/')
return FTW_SKIP_SUBTREE;
/* check /proc/<pid>/fd/<fd_num> */
pch++;
while (isdigit(*pch)) {
fd = fd * 10 + *pch - '0';
pch++;
}
if (*pch != '\0')
return FTW_SKIP_SUBTREE;
/* query (pid, fd) for potential perf events */
len = sizeof(buf);
err = bpf_task_fd_query(pid, fd, 0, buf, &len, &prog_id, &fd_type,
&probe_offset, &probe_addr);
if (err < 0)
return 0;
if (json_output)
print_perf_json(pid, fd, prog_id, fd_type, buf, probe_offset,
probe_addr);
else
print_perf_plain(pid, fd, prog_id, fd_type, buf, probe_offset,
probe_addr);
return 0;
}
static int do_show(int argc, char **argv)
{
int flags = FTW_ACTIONRETVAL | FTW_PHYS;
int err = 0, nopenfd = 16;
if (!has_perf_query_support())
return -1;
if (json_output)
jsonw_start_array(json_wtr);
if (nftw("/proc", show_proc, nopenfd, flags) == -1) {
p_err("%s", strerror(errno));
err = -1;
}
if (json_output)
jsonw_end_array(json_wtr);
return err;
}
static int do_help(int argc, char **argv)
{
fprintf(stderr,
"Usage: %s %s { show | list | help }\n"
"",
bin_name, argv[-2]);
return 0;
}
static const struct cmd cmds[] = {
{ "show", do_show },
{ "list", do_show },
{ "help", do_help },
{ 0 }
};
int do_perf(int argc, char **argv)
{
return cmd_select(cmds, argc, argv, do_help);
}