From c68677014bace6a4b6ad20f0818e1470d049618f Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 28 Mar 2017 11:19:59 -0300 Subject: [PATCH 1/9] perf tools: Remove support for command aliases This came from 'git', but isn't documented anywhere in tools/perf/Documentation/, looks like baggage we can do without, ditch it. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-e7uwkn60t4hmlnwj99ba4t2s@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-help.c | 13 ---- tools/perf/perf.c | 97 +++--------------------------- tools/perf/util/Build | 1 - tools/perf/util/alias.c | 78 ------------------------ tools/perf/util/cache.h | 1 - tools/perf/util/help-unknown-cmd.c | 8 +-- 6 files changed, 8 insertions(+), 190 deletions(-) delete mode 100644 tools/perf/util/alias.c diff --git a/tools/perf/builtin-help.c b/tools/perf/builtin-help.c index 7ae238929e95..1eec96a0fa67 100644 --- a/tools/perf/builtin-help.c +++ b/tools/perf/builtin-help.c @@ -301,12 +301,6 @@ void list_common_cmds_help(void) } } -static int is_perf_command(const char *s) -{ - return is_in_cmdlist(&main_cmds, s) || - is_in_cmdlist(&other_cmds, s); -} - static const char *cmd_to_page(const char *perf_cmd) { char *s; @@ -446,7 +440,6 @@ int cmd_help(int argc, const char **argv) "perf help [--all] [--man|--web|--info] [command]", NULL }; - const char *alias; int rc; load_command_list("perf-", &main_cmds, &other_cmds); @@ -472,12 +465,6 @@ int cmd_help(int argc, const char **argv) return 0; } - alias = alias_lookup(argv[0]); - if (alias && !is_perf_command(argv[0])) { - printf("`perf %s' is aliased to `%s'\n", argv[0], alias); - return 0; - } - switch (help_format) { case HELP_FORMAT_MAN: rc = show_man_page(argv[0]); diff --git a/tools/perf/perf.c b/tools/perf/perf.c index 4b283d18e158..9217f2227f3d 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -267,71 +267,6 @@ static int handle_options(const char ***argv, int *argc, int *envchanged) return handled; } -static int handle_alias(int *argcp, const char ***argv) -{ - int envchanged = 0, ret = 0, saved_errno = errno; - int count, option_count; - const char **new_argv; - const char *alias_command; - char *alias_string; - - alias_command = (*argv)[0]; - alias_string = alias_lookup(alias_command); - if (alias_string) { - if (alias_string[0] == '!') { - if (*argcp > 1) { - struct strbuf buf; - - if (strbuf_init(&buf, PATH_MAX) < 0 || - strbuf_addstr(&buf, alias_string) < 0 || - sq_quote_argv(&buf, (*argv) + 1, - PATH_MAX) < 0) - die("Failed to allocate memory."); - free(alias_string); - alias_string = buf.buf; - } - ret = system(alias_string + 1); - if (ret >= 0 && WIFEXITED(ret) && - WEXITSTATUS(ret) != 127) - exit(WEXITSTATUS(ret)); - die("Failed to run '%s' when expanding alias '%s'", - alias_string + 1, alias_command); - } - count = split_cmdline(alias_string, &new_argv); - if (count < 0) - die("Bad alias.%s string", alias_command); - option_count = handle_options(&new_argv, &count, &envchanged); - if (envchanged) - die("alias '%s' changes environment variables\n" - "You can use '!perf' in the alias to do this.", - alias_command); - memmove(new_argv - option_count, new_argv, - count * sizeof(char *)); - new_argv -= option_count; - - if (count < 1) - die("empty alias for %s", alias_command); - - if (!strcmp(alias_command, new_argv[0])) - die("recursive alias: %s", alias_command); - - new_argv = realloc(new_argv, sizeof(char *) * - (count + *argcp + 1)); - /* insert after command name */ - memcpy(new_argv + count, *argv + 1, sizeof(char *) * *argcp); - new_argv[count + *argcp] = NULL; - - *argv = new_argv; - *argcp += count - 1; - - ret = 1; - } - - errno = saved_errno; - - return ret; -} - #define RUN_SETUP (1<<0) #define USE_PAGER (1<<1) @@ -455,25 +390,12 @@ do_die: static int run_argv(int *argcp, const char ***argv) { - int done_alias = 0; + /* See if it's an internal command */ + handle_internal_command(*argcp, *argv); - while (1) { - /* See if it's an internal command */ - handle_internal_command(*argcp, *argv); - - /* .. then try the external ones */ - execv_dashed_external(*argv); - - /* It could be an alias -- this works around the insanity - * of overriding "perf log" with "perf show" by having - * alias.log = show - */ - if (done_alias || !handle_alias(argcp, argv)) - break; - done_alias = 1; - } - - return done_alias; + /* .. then try the external ones */ + execv_dashed_external(*argv); + return 0; } static void pthread__block_sigwinch(void) @@ -606,17 +528,12 @@ int main(int argc, const char **argv) while (1) { static int done_help; - int was_alias = run_argv(&argc, &argv); + + run_argv(&argc, &argv); if (errno != ENOENT) break; - if (was_alias) { - fprintf(stderr, "Expansion of alias '%s' failed; " - "'%s' is not a perf-command\n", - cmd, argv[0]); - goto out; - } if (!done_help) { cmd = argv[0] = help_unknown_cmd(cmd); done_help = 1; diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 2ae92da613dd..5c0ea11a8f0a 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -1,4 +1,3 @@ -libperf-y += alias.o libperf-y += annotate.o libperf-y += block-range.o libperf-y += build-id.o diff --git a/tools/perf/util/alias.c b/tools/perf/util/alias.c deleted file mode 100644 index 6455471d9cd1..000000000000 --- a/tools/perf/util/alias.c +++ /dev/null @@ -1,78 +0,0 @@ -#include "cache.h" -#include "util.h" -#include "config.h" - -static const char *alias_key; -static char *alias_val; - -static int alias_lookup_cb(const char *k, const char *v, - void *cb __maybe_unused) -{ - if (!prefixcmp(k, "alias.") && !strcmp(k+6, alias_key)) { - if (!v) - return config_error_nonbool(k); - alias_val = strdup(v); - return 0; - } - return 0; -} - -char *alias_lookup(const char *alias) -{ - alias_key = alias; - alias_val = NULL; - perf_config(alias_lookup_cb, NULL); - return alias_val; -} - -int split_cmdline(char *cmdline, const char ***argv) -{ - int src, dst, count = 0, size = 16; - char quoted = 0; - - *argv = malloc(sizeof(char*) * size); - - /* split alias_string */ - (*argv)[count++] = cmdline; - for (src = dst = 0; cmdline[src];) { - char c = cmdline[src]; - if (!quoted && isspace(c)) { - cmdline[dst++] = 0; - while (cmdline[++src] - && isspace(cmdline[src])) - ; /* skip */ - if (count >= size) { - size += 16; - *argv = realloc(*argv, sizeof(char*) * size); - } - (*argv)[count++] = cmdline + dst; - } else if (!quoted && (c == '\'' || c == '"')) { - quoted = c; - src++; - } else if (c == quoted) { - quoted = 0; - src++; - } else { - if (c == '\\' && quoted != '\'') { - src++; - c = cmdline[src]; - if (!c) { - zfree(argv); - return error("cmdline ends with \\"); - } - } - cmdline[dst++] = c; - src++; - } - } - - cmdline[dst] = 0; - - if (quoted) { - zfree(argv); - return error("unclosed quote"); - } - - return count; -} - diff --git a/tools/perf/util/cache.h b/tools/perf/util/cache.h index 512c0c83fbc6..0328f297a748 100644 --- a/tools/perf/util/cache.h +++ b/tools/perf/util/cache.h @@ -15,7 +15,6 @@ #define PERF_TRACEFS_ENVIRONMENT "PERF_TRACEFS_DIR" #define PERF_PAGER_ENVIRONMENT "PERF_PAGER" -char *alias_lookup(const char *alias); int split_cmdline(char *cmdline, const char ***argv); #define alloc_nr(x) (((x)+16)*3/2) diff --git a/tools/perf/util/help-unknown-cmd.c b/tools/perf/util/help-unknown-cmd.c index 2821f8d77e52..34201440ac03 100644 --- a/tools/perf/util/help-unknown-cmd.c +++ b/tools/perf/util/help-unknown-cmd.c @@ -6,16 +6,12 @@ #include "levenshtein.h" static int autocorrect; -static struct cmdnames aliases; static int perf_unknown_cmd_config(const char *var, const char *value, void *cb __maybe_unused) { if (!strcmp(var, "help.autocorrect")) autocorrect = perf_config_int(var,value); - /* Also use aliases for command lookup */ - if (!prefixcmp(var, "alias.")) - add_cmdname(&aliases, var + 6, strlen(var + 6)); return 0; } @@ -59,14 +55,12 @@ const char *help_unknown_cmd(const char *cmd) memset(&main_cmds, 0, sizeof(main_cmds)); memset(&other_cmds, 0, sizeof(main_cmds)); - memset(&aliases, 0, sizeof(aliases)); perf_config(perf_unknown_cmd_config, NULL); load_command_list("perf-", &main_cmds, &other_cmds); - if (add_cmd_list(&main_cmds, &aliases) < 0 || - add_cmd_list(&main_cmds, &other_cmds) < 0) { + if (add_cmd_list(&main_cmds, &other_cmds) < 0) { fprintf(stderr, "ERROR: Failed to allocate command list for unknown command.\n"); goto end; } From 2d01ecc580405169ecd6e3880617bc61cf482fdd Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Tue, 28 Mar 2017 15:17:52 +0530 Subject: [PATCH 2/9] perf/sdt/x86: Add renaming logic for (missing) 8 bit registers I found couple of events using al, bl, cl and dl registers for argument. These are not directly accepted by uprobe_events and thus needs to be mapped to ax, bx, cx and dx respectively. Few ex, /usr/bin/qemu-system-s390x css_adapter_interrupt: 1@%bl css_chpid_add: 1@%cl 1@%sil 1@%dl dma_bdrv_io: 8@%rbx 8@%rbp -8@%r14 1@%al /usr/bin/postgres buffer__read__done: ... -1@-bash -1@%al buffer__read__start: ... -1@%al I don't find any sdt events using ah, bh,... registers. But I also don't see any reason to not use them, so there might be rare events using these registers, and if so, perf should have a renaming logic for them too. Signed-off-by: Ravi Bangoria Acked-by: Masami Hiramatsu Cc: Alexander Shishkin Cc: Alexis Berlemont Cc: Hemant Kumar Cc: Michael Ellerman Cc: Naveen N. Rao Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20170328094754.3156-2-ravi.bangoria@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/x86/util/perf_regs.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tools/perf/arch/x86/util/perf_regs.c b/tools/perf/arch/x86/util/perf_regs.c index d8a8dcf761f7..fa1fd196837d 100644 --- a/tools/perf/arch/x86/util/perf_regs.c +++ b/tools/perf/arch/x86/util/perf_regs.c @@ -40,12 +40,20 @@ struct sdt_name_reg { static const struct sdt_name_reg sdt_reg_renamings[] = { SDT_NAME_REG(eax, ax), SDT_NAME_REG(rax, ax), + SDT_NAME_REG(al, ax), + SDT_NAME_REG(ah, ax), SDT_NAME_REG(ebx, bx), SDT_NAME_REG(rbx, bx), + SDT_NAME_REG(bl, bx), + SDT_NAME_REG(bh, bx), SDT_NAME_REG(ecx, cx), SDT_NAME_REG(rcx, cx), + SDT_NAME_REG(cl, cx), + SDT_NAME_REG(ch, cx), SDT_NAME_REG(edx, dx), SDT_NAME_REG(rdx, dx), + SDT_NAME_REG(dl, dx), + SDT_NAME_REG(dh, dx), SDT_NAME_REG(esi, si), SDT_NAME_REG(rsi, si), SDT_NAME_REG(sil, si), From d451a205da29c5485ca634367154e83997571aa0 Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Tue, 28 Mar 2017 15:17:53 +0530 Subject: [PATCH 3/9] perf/sdt/x86: Move OP parser to tools/perf/arch/x86/ SDT marker argument is in N@OP format. N is the size of argument and OP is the actual assembly operand. OP is arch dependent component and hence it's parsing logic also should be placed under tools/perf/arch/. Signed-off-by: Ravi Bangoria Acked-by: Masami Hiramatsu Cc: Alexander Shishkin Cc: Alexis Berlemont Cc: Hemant Kumar Cc: Michael Ellerman Cc: Naveen N. Rao Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20170328094754.3156-3-ravi.bangoria@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/x86/util/perf_regs.c | 187 +++++++++++++++++++++------ tools/perf/util/perf_regs.c | 6 +- tools/perf/util/perf_regs.h | 11 +- tools/perf/util/probe-file.c | 142 ++++++-------------- 4 files changed, 203 insertions(+), 143 deletions(-) diff --git a/tools/perf/arch/x86/util/perf_regs.c b/tools/perf/arch/x86/util/perf_regs.c index fa1fd196837d..3bf3548c5e2d 100644 --- a/tools/perf/arch/x86/util/perf_regs.c +++ b/tools/perf/arch/x86/util/perf_regs.c @@ -1,8 +1,10 @@ #include +#include #include "../../perf.h" #include "../../util/util.h" #include "../../util/perf_regs.h" +#include "../../util/debug.h" const struct sample_reg sample_reg_masks[] = { SMPL_REG(AX, PERF_REG_X86_AX), @@ -37,7 +39,7 @@ struct sdt_name_reg { #define SDT_NAME_REG(n, m) {.sdt_name = "%" #n, .uprobe_name = "%" #m} #define SDT_NAME_REG_END {.sdt_name = NULL, .uprobe_name = NULL} -static const struct sdt_name_reg sdt_reg_renamings[] = { +static const struct sdt_name_reg sdt_reg_tbl[] = { SDT_NAME_REG(eax, ax), SDT_NAME_REG(rax, ax), SDT_NAME_REG(al, ax), @@ -95,45 +97,158 @@ static const struct sdt_name_reg sdt_reg_renamings[] = { SDT_NAME_REG_END, }; -int sdt_rename_register(char **pdesc, char *old_name) +/* + * Perf only supports OP which is in +/-NUM(REG) form. + * Here plus-minus sign, NUM and parenthesis are optional, + * only REG is mandatory. + * + * SDT events also supports indirect addressing mode with a + * symbol as offset, scaled mode and constants in OP. But + * perf does not support them yet. Below are few examples. + * + * OP with scaled mode: + * (%rax,%rsi,8) + * 10(%ras,%rsi,8) + * + * OP with indirect addressing mode: + * check_action(%rip) + * mp_+52(%rip) + * 44+mp_(%rip) + * + * OP with constant values: + * $0 + * $123 + * $-1 + */ +#define SDT_OP_REGEX "^([+\\-]?)([0-9]*)(\\(?)(%[a-z][a-z0-9]+)(\\)?)$" + +static regex_t sdt_op_regex; + +static int sdt_init_op_regex(void) { - const struct sdt_name_reg *rnames = sdt_reg_renamings; - char *new_desc, *old_desc = *pdesc; - size_t prefix_len, sdt_len, uprobe_len, old_desc_len, offset; - int ret = -1; + static int initialized; + int ret = 0; - while (ret != 0 && rnames->sdt_name != NULL) { - sdt_len = strlen(rnames->sdt_name); - ret = strncmp(old_name, rnames->sdt_name, sdt_len); - rnames += !!ret; - } - - if (rnames->sdt_name == NULL) + if (initialized) return 0; - sdt_len = strlen(rnames->sdt_name); - uprobe_len = strlen(rnames->uprobe_name); - old_desc_len = strlen(old_desc) + 1; - - new_desc = zalloc(old_desc_len + uprobe_len - sdt_len); - if (new_desc == NULL) - return -1; - - /* Copy the chars before the register name (at least '%') */ - prefix_len = old_name - old_desc; - memcpy(new_desc, old_desc, prefix_len); - - /* Copy the new register name */ - memcpy(new_desc + prefix_len, rnames->uprobe_name, uprobe_len); - - /* Copy the chars after the register name (if need be) */ - offset = prefix_len + sdt_len; - if (offset < old_desc_len) - memcpy(new_desc + prefix_len + uprobe_len, - old_desc + offset, old_desc_len - offset); - - free(old_desc); - *pdesc = new_desc; + ret = regcomp(&sdt_op_regex, SDT_OP_REGEX, REG_EXTENDED); + if (ret < 0) { + pr_debug4("Regex compilation error.\n"); + return ret; + } + initialized = 1; return 0; } + +/* + * Max x86 register name length is 5(ex: %r15d). So, 6th char + * should always contain NULL. This helps to find register name + * length using strlen, insted of maintaing one more variable. + */ +#define SDT_REG_NAME_SIZE 6 + +/* + * The uprobe parser does not support all gas register names; + * so, we have to replace them (ex. for x86_64: %rax -> %ax). + * Note: If register does not require renaming, just copy + * paste as it is, but don't leave it empty. + */ +static void sdt_rename_register(char *sdt_reg, int sdt_len, char *uprobe_reg) +{ + int i = 0; + + for (i = 0; sdt_reg_tbl[i].sdt_name != NULL; i++) { + if (!strncmp(sdt_reg_tbl[i].sdt_name, sdt_reg, sdt_len)) { + strcpy(uprobe_reg, sdt_reg_tbl[i].uprobe_name); + return; + } + } + + strncpy(uprobe_reg, sdt_reg, sdt_len); +} + +int arch_sdt_arg_parse_op(char *old_op, char **new_op) +{ + char new_reg[SDT_REG_NAME_SIZE] = {0}; + int new_len = 0, ret; + /* + * rm[0]: +/-NUM(REG) + * rm[1]: +/- + * rm[2]: NUM + * rm[3]: ( + * rm[4]: REG + * rm[5]: ) + */ + regmatch_t rm[6]; + /* + * Max prefix length is 2 as it may contains sign(+/-) + * and displacement 0 (Both sign and displacement 0 are + * optional so it may be empty). Use one more character + * to hold last NULL so that strlen can be used to find + * prefix length, instead of maintaing one more variable. + */ + char prefix[3] = {0}; + + ret = sdt_init_op_regex(); + if (ret < 0) + return ret; + + /* + * If unsupported OR does not match with regex OR + * register name too long, skip it. + */ + if (strchr(old_op, ',') || strchr(old_op, '$') || + regexec(&sdt_op_regex, old_op, 6, rm, 0) || + rm[4].rm_eo - rm[4].rm_so > SDT_REG_NAME_SIZE) { + pr_debug4("Skipping unsupported SDT argument: %s\n", old_op); + return SDT_ARG_SKIP; + } + + /* + * Prepare prefix. + * If SDT OP has parenthesis but does not provide + * displacement, add 0 for displacement. + * SDT Uprobe Prefix + * ----------------------------- + * +24(%rdi) +24(%di) + + * 24(%rdi) +24(%di) + + * %rdi %di + * (%rdi) +0(%di) +0 + * -80(%rbx) -80(%bx) - + */ + if (rm[3].rm_so != rm[3].rm_eo) { + if (rm[1].rm_so != rm[1].rm_eo) + prefix[0] = *(old_op + rm[1].rm_so); + else if (rm[2].rm_so != rm[2].rm_eo) + prefix[0] = '+'; + else + strncpy(prefix, "+0", 2); + } + + /* Rename register */ + sdt_rename_register(old_op + rm[4].rm_so, rm[4].rm_eo - rm[4].rm_so, + new_reg); + + /* Prepare final OP which should be valid for uprobe_events */ + new_len = strlen(prefix) + + (rm[2].rm_eo - rm[2].rm_so) + + (rm[3].rm_eo - rm[3].rm_so) + + strlen(new_reg) + + (rm[5].rm_eo - rm[5].rm_so) + + 1; /* NULL */ + + *new_op = zalloc(new_len); + if (!*new_op) + return -ENOMEM; + + scnprintf(*new_op, new_len, "%.*s%.*s%.*s%.*s%.*s", + strlen(prefix), prefix, + (int)(rm[2].rm_eo - rm[2].rm_so), old_op + rm[2].rm_so, + (int)(rm[3].rm_eo - rm[3].rm_so), old_op + rm[3].rm_so, + strlen(new_reg), new_reg, + (int)(rm[5].rm_eo - rm[5].rm_so), old_op + rm[5].rm_so); + + return SDT_ARG_VALID; +} diff --git a/tools/perf/util/perf_regs.c b/tools/perf/util/perf_regs.c index a37e5934aa2a..b2ae039eff85 100644 --- a/tools/perf/util/perf_regs.c +++ b/tools/perf/util/perf_regs.c @@ -6,10 +6,10 @@ const struct sample_reg __weak sample_reg_masks[] = { SMPL_REG_END }; -int __weak sdt_rename_register(char **pdesc __maybe_unused, - char *old_name __maybe_unused) +int __weak arch_sdt_arg_parse_op(char *old_op __maybe_unused, + char **new_op __maybe_unused) { - return 0; + return SDT_ARG_SKIP; } #ifdef HAVE_PERF_REGS_SUPPORT diff --git a/tools/perf/util/perf_regs.h b/tools/perf/util/perf_regs.h index 7544a157e159..32b37d19dcc3 100644 --- a/tools/perf/util/perf_regs.h +++ b/tools/perf/util/perf_regs.h @@ -15,11 +15,12 @@ struct sample_reg { extern const struct sample_reg sample_reg_masks[]; -/* - * The table sdt_reg_renamings is used for adjusting gcc/gas-generated - * registers before filling the uprobe tracer interface. - */ -int sdt_rename_register(char **pdesc, char *old_name); +enum { + SDT_ARG_VALID = 0, + SDT_ARG_SKIP, +}; + +int arch_sdt_arg_parse_op(char *old_op, char **new_op); #ifdef HAVE_PERF_REGS_SUPPORT #include diff --git a/tools/perf/util/probe-file.c b/tools/perf/util/probe-file.c index d741634cbfc0..88714dec8912 100644 --- a/tools/perf/util/probe-file.c +++ b/tools/perf/util/probe-file.c @@ -694,10 +694,29 @@ static const char * const type_to_suffix[] = { "", ":u8", ":u16", "", ":u32", "", "", "", ":u64" }; +/* + * Isolate the string number and convert it into a decimal value; + * this will be an index to get suffix of the uprobe name (defining + * the type) + */ +static int sdt_arg_parse_size(char *n_ptr, const char **suffix) +{ + long type_idx; + + type_idx = strtol(n_ptr, NULL, 10); + if (type_idx < -8 || type_idx > 8) { + pr_debug4("Failed to get a valid sdt type\n"); + return -1; + } + + *suffix = type_to_suffix[type_idx + 8]; + return 0; +} + static int synthesize_sdt_probe_arg(struct strbuf *buf, int i, const char *arg) { - char *tmp, *desc = strdup(arg); - const char *prefix = "", *suffix = ""; + char *op, *desc = strdup(arg), *new_op = NULL; + const char *suffix = ""; int ret = -1; if (desc == NULL) { @@ -705,112 +724,37 @@ static int synthesize_sdt_probe_arg(struct strbuf *buf, int i, const char *arg) return ret; } - tmp = strchr(desc, '@'); - if (tmp) { - long type_idx; - /* - * Isolate the string number and convert it into a - * binary value; this will be an index to get suffix - * of the uprobe name (defining the type) - */ - tmp[0] = '\0'; - type_idx = strtol(desc, NULL, 10); - /* Check that the conversion went OK */ - if (type_idx == LONG_MIN || type_idx == LONG_MAX) { - pr_debug4("Failed to parse sdt type\n"); + /* + * Argument is in N@OP format. N is size of the argument and OP is + * the actual assembly operand. N can be omitted; in that case + * argument is just OP(without @). + */ + op = strchr(desc, '@'); + if (op) { + op[0] = '\0'; + op++; + + if (sdt_arg_parse_size(desc, &suffix)) goto error; - } - /* Check that the converted value is OK */ - if (type_idx < -8 || type_idx > 8) { - pr_debug4("Failed to get a valid sdt type\n"); - goto error; - } - suffix = type_to_suffix[type_idx + 8]; - /* Get rid of the sdt prefix which is now useless */ - tmp++; - memmove(desc, tmp, strlen(tmp) + 1); + } else { + op = desc; } - /* - * The uprobe tracer format does not support all the - * addressing modes (notably: in x86 the scaled mode); so, we - * detect ',' characters, if there is just one, there is no - * use converting the sdt arg into a uprobe one. - */ - if (strchr(desc, ',')) { - pr_debug4("Skipping unsupported SDT argument; %s\n", desc); - goto out; - } + ret = arch_sdt_arg_parse_op(op, &new_op); - /* - * If the argument addressing mode is indirect, we must check - * a few things... - */ - tmp = strchr(desc, '('); - if (tmp) { - int j; - - /* - * ...if the addressing mode is indirect with a - * positive offset (ex.: "1608(%ax)"), we need to add - * a '+' prefix so as to be compliant with uprobe - * format. - */ - if (desc[0] != '+' && desc[0] != '-') - prefix = "+"; - - /* - * ...or if the addressing mode is indirect with a symbol - * as offset, the argument will not be supported by - * the uprobe tracer format; so, let's skip this one. - */ - for (j = 0; j < tmp - desc; j++) { - if (desc[j] != '+' && desc[j] != '-' && - !isdigit(desc[j])) { - pr_debug4("Skipping unsupported SDT argument; " - "%s\n", desc); - goto out; - } - } - } - - /* - * The uprobe tracer format does not support constants; if we - * find one in the current argument, let's skip the argument. - */ - if (strchr(desc, '$')) { - pr_debug4("Skipping unsupported SDT argument; %s\n", desc); - goto out; - } - - /* - * The uprobe parser does not support all gas register names; - * so, we have to replace them (ex. for x86_64: %rax -> %ax); - * the loop below looks for the register names (starting with - * a '%' and tries to perform the needed renamings. - */ - tmp = strchr(desc, '%'); - while (tmp) { - size_t offset = tmp - desc; - - ret = sdt_rename_register(&desc, desc + offset); - if (ret < 0) - goto error; - - /* - * The desc pointer might have changed; so, let's not - * try to reuse tmp for next lookup - */ - tmp = strchr(desc + offset + 1, '%'); - } - - if (strbuf_addf(buf, " arg%d=%s%s%s", i + 1, prefix, desc, suffix) < 0) + if (ret < 0) goto error; -out: + if (ret == SDT_ARG_VALID) { + ret = strbuf_addf(buf, " arg%d=%s%s", i + 1, new_op, suffix); + if (ret < 0) + goto error; + } + ret = 0; error: free(desc); + free(new_op); return ret; } From c1dfcfad5879df7f41c436d887aea509dadd516d Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Thu, 9 Mar 2017 16:06:26 +0800 Subject: [PATCH 4/9] perf report: Drop cycles 0 for LBR print For some platforms, for example Broadwell, it doesn't support cycles for LBR. But the perf always prints cycles:0, it's not necessary. The patch refactors the LBR info print code and drops the cycles:0. For example: perf report --branch-history --no-children --stdio On Broadwell: --0.91%--__random_r random_r.c:394 (iterations:2) __random_r random_r.c:360 (predicted:0.0%) __random_r random_r.c:380 (predicted:0.0%) __random_r random_r.c:357 On Skylake: --1.07%--main div.c:39 (predicted:52.4% cycles:1 iterations:17) main div.c:44 (predicted:52.4% cycles:1) main div.c:42 (cycles:2) compute_flag div.c:28 (cycles:2) compute_flag div.c:27 (cycles:1) rand rand.c:28 (cycles:1) rand rand.c:28 (cycles:1) __random random.c:298 (cycles:1) __random random.c:297 (cycles:1) __random random.c:295 (cycles:1) __random random.c:295 (cycles:1) __random random.c:295 (cycles:1) Signed-off-by: Yao Jin Reviewed-by: Andi Kleen Cc: Jiri Olsa Cc: Kan Liang Link: http://lkml.kernel.org/r/1489046786-10061-1-git-send-email-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/callchain.c | 133 +++++++++++++++++++++++------------- 1 file changed, 85 insertions(+), 48 deletions(-) diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index d78776a20e80..3cea1fb5404b 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -1105,63 +1105,100 @@ int callchain_branch_counts(struct callchain_root *root, cycles_count); } +static int counts_str_build(char *bf, int bfsize, + u64 branch_count, u64 predicted_count, + u64 abort_count, u64 cycles_count, + u64 iter_count, u64 samples_count) +{ + double predicted_percent = 0.0; + const char *null_str = ""; + char iter_str[32]; + char cycle_str[32]; + char *istr, *cstr; + u64 cycles; + + if (branch_count == 0) + return scnprintf(bf, bfsize, " (calltrace)"); + + cycles = cycles_count / branch_count; + + if (iter_count && samples_count) { + if (cycles > 0) + scnprintf(iter_str, sizeof(iter_str), + " iterations:%" PRId64 "", + iter_count / samples_count); + else + scnprintf(iter_str, sizeof(iter_str), + "iterations:%" PRId64 "", + iter_count / samples_count); + istr = iter_str; + } else + istr = (char *)null_str; + + if (cycles > 0) { + scnprintf(cycle_str, sizeof(cycle_str), + "cycles:%" PRId64 "", cycles); + cstr = cycle_str; + } else + cstr = (char *)null_str; + + predicted_percent = predicted_count * 100.0 / branch_count; + + if ((predicted_count == branch_count) && (abort_count == 0)) { + if ((cycles > 0) || (istr != (char *)null_str)) + return scnprintf(bf, bfsize, " (%s%s)", cstr, istr); + else + return scnprintf(bf, bfsize, "%s", (char *)null_str); + } + + if ((predicted_count < branch_count) && (abort_count == 0)) { + if ((cycles > 0) || (istr != (char *)null_str)) + return scnprintf(bf, bfsize, + " (predicted:%.1f%% %s%s)", + predicted_percent, cstr, istr); + else { + return scnprintf(bf, bfsize, + " (predicted:%.1f%%)", + predicted_percent); + } + } + + if ((predicted_count == branch_count) && (abort_count > 0)) { + if ((cycles > 0) || (istr != (char *)null_str)) + return scnprintf(bf, bfsize, + " (abort:%" PRId64 " %s%s)", + abort_count, cstr, istr); + else + return scnprintf(bf, bfsize, + " (abort:%" PRId64 ")", + abort_count); + } + + if ((cycles > 0) || (istr != (char *)null_str)) + return scnprintf(bf, bfsize, + " (predicted:%.1f%% abort:%" PRId64 " %s%s)", + predicted_percent, abort_count, cstr, istr); + + return scnprintf(bf, bfsize, + " (predicted:%.1f%% abort:%" PRId64 ")", + predicted_percent, abort_count); +} + static int callchain_counts_printf(FILE *fp, char *bf, int bfsize, u64 branch_count, u64 predicted_count, u64 abort_count, u64 cycles_count, u64 iter_count, u64 samples_count) { - double predicted_percent = 0.0; - const char *null_str = ""; - char iter_str[32]; - char *str; - u64 cycles = 0; + char str[128]; - if (branch_count == 0) { - if (fp) - return fprintf(fp, " (calltrace)"); - - return scnprintf(bf, bfsize, " (calltrace)"); - } - - if (iter_count && samples_count) { - scnprintf(iter_str, sizeof(iter_str), - ", iterations:%" PRId64 "", - iter_count / samples_count); - str = iter_str; - } else - str = (char *)null_str; - - predicted_percent = predicted_count * 100.0 / branch_count; - cycles = cycles_count / branch_count; - - if ((predicted_percent >= 100.0) && (abort_count == 0)) { - if (fp) - return fprintf(fp, " (cycles:%" PRId64 "%s)", - cycles, str); - - return scnprintf(bf, bfsize, " (cycles:%" PRId64 "%s)", - cycles, str); - } - - if ((predicted_percent < 100.0) && (abort_count == 0)) { - if (fp) - return fprintf(fp, - " (predicted:%.1f%%, cycles:%" PRId64 "%s)", - predicted_percent, cycles, str); - - return scnprintf(bf, bfsize, - " (predicted:%.1f%%, cycles:%" PRId64 "%s)", - predicted_percent, cycles, str); - } + counts_str_build(str, sizeof(str), branch_count, + predicted_count, abort_count, cycles_count, + iter_count, samples_count); if (fp) - return fprintf(fp, - " (predicted:%.1f%%, abort:%" PRId64 ", cycles:%" PRId64 "%s)", - predicted_percent, abort_count, cycles, str); + return fprintf(fp, "%s", str); - return scnprintf(bf, bfsize, - " (predicted:%.1f%%, abort:%" PRId64 ", cycles:%" PRId64 "%s)", - predicted_percent, abort_count, cycles, str); + return scnprintf(bf, bfsize, "%s", str); } int callchain_list_counts__printf_value(struct callchain_node *node, From fd2b2975149f5f7099693027cece81b16842964a Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 29 Mar 2017 16:37:51 -0300 Subject: [PATCH 5/9] perf trace: Handle unpaired raw_syscalls:sys_exit event MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Which may happen when we start a tracing session and a thread is waiting for something like "poll" to return, in which case we better print "?" both for the syscall entry timestamp and for the duration. E.g.: Tracing existing mutt session: # perf trace -p `pidof mutt` ? ( ? ): mutt/17135 ... [continued]: poll()) = 1 0.027 ( 0.013 ms): mutt/17135 read(buf: 0x7ffcb3c42cef, count: 1) = 1 0.047 ( 0.008 ms): mutt/17135 poll(ufds: 0x7ffcb3c42c50, nfds: 1, timeout_msecs: 1000) = 1 0.059 ( 0.008 ms): mutt/17135 read(buf: 0x7ffcb3c42cef, count: 1) = 1 Before it would print a large number because we'd do: ttrace->entry_time - trace->base_time And entry_time would be 0, while base_time would be the timestamp for the first event 'perf trace' reads, oops. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Luis Claudio Gonçalves Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-wbcb93ofva2qdjd5ltn5eeqq@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 43 ++++++++++++++++++++++++++++++-------- 1 file changed, 34 insertions(+), 9 deletions(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index c88f9f215e6f..7379792a6504 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -821,12 +821,21 @@ struct syscall { void **arg_parm; }; -static size_t fprintf_duration(unsigned long t, FILE *fp) +/* + * We need to have this 'calculated' boolean because in some cases we really + * don't know what is the duration of a syscall, for instance, when we start + * a session and some threads are waiting for a syscall to finish, say 'poll', + * in which case all we can do is to print "( ? ) for duration and for the + * start timestamp. + */ +static size_t fprintf_duration(unsigned long t, bool calculated, FILE *fp) { double duration = (double)t / NSEC_PER_MSEC; size_t printed = fprintf(fp, "("); - if (duration >= 1.0) + if (!calculated) + printed += fprintf(fp, " ? "); + else if (duration >= 1.0) printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration); else if (duration >= 0.01) printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration); @@ -1028,13 +1037,27 @@ static bool trace__filter_duration(struct trace *trace, double t) return t < (trace->duration_filter * NSEC_PER_MSEC); } -static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp) +static size_t __trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp) { double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC; return fprintf(fp, "%10.3f ", ts); } +/* + * We're handling tstamp=0 as an undefined tstamp, i.e. like when we are + * using ttrace->entry_time for a thread that receives a sys_exit without + * first having received a sys_enter ("poll" issued before tracing session + * starts, lost sys_enter exit due to ring buffer overflow). + */ +static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp) +{ + if (tstamp > 0) + return __trace__fprintf_tstamp(trace, tstamp, fp); + + return fprintf(fp, " ? "); +} + static bool done = false; static bool interrupted = false; @@ -1045,10 +1068,10 @@ static void sig_handler(int sig) } static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread, - u64 duration, u64 tstamp, FILE *fp) + u64 duration, bool duration_calculated, u64 tstamp, FILE *fp) { size_t printed = trace__fprintf_tstamp(trace, tstamp, fp); - printed += fprintf_duration(duration, fp); + printed += fprintf_duration(duration, duration_calculated, fp); if (trace->multiple_threads) { if (trace->show_comm) @@ -1450,7 +1473,7 @@ static int trace__printf_interrupted_entry(struct trace *trace, struct perf_samp duration = sample->time - ttrace->entry_time; - printed = trace__fprintf_entry_head(trace, trace->current, duration, ttrace->entry_time, trace->output); + printed = trace__fprintf_entry_head(trace, trace->current, duration, true, ttrace->entry_time, trace->output); printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str); ttrace->entry_pending = false; @@ -1497,7 +1520,7 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel, if (sc->is_exit) { if (!(trace->duration_filter || trace->summary_only || trace->min_stack)) { - trace__fprintf_entry_head(trace, thread, 1, ttrace->entry_time, trace->output); + trace__fprintf_entry_head(trace, thread, 0, false, ttrace->entry_time, trace->output); fprintf(trace->output, "%-70s)\n", ttrace->entry_str); } } else { @@ -1545,6 +1568,7 @@ static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel, { long ret; u64 duration = 0; + bool duration_calculated = false; struct thread *thread; int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1, callchain_ret = 0; struct syscall *sc = trace__syscall_info(trace, evsel, id); @@ -1573,6 +1597,7 @@ static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel, duration = sample->time - ttrace->entry_time; if (trace__filter_duration(trace, duration)) goto out; + duration_calculated = true; } else if (trace->duration_filter) goto out; @@ -1588,7 +1613,7 @@ static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel, if (trace->summary_only) goto out; - trace__fprintf_entry_head(trace, thread, duration, ttrace->entry_time, trace->output); + trace__fprintf_entry_head(trace, thread, duration, duration_calculated, ttrace->entry_time, trace->output); if (ttrace->entry_pending) { fprintf(trace->output, "%-70s", ttrace->entry_str); @@ -1855,7 +1880,7 @@ static int trace__pgfault(struct trace *trace, thread__find_addr_location(thread, sample->cpumode, MAP__FUNCTION, sample->ip, &al); - trace__fprintf_entry_head(trace, thread, 0, sample->time, trace->output); + trace__fprintf_entry_head(trace, thread, 0, true, sample->time, trace->output); fprintf(trace->output, "%sfault [", evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ? From a596a877fde0b34e622dbf123f361dacd086cd6e Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 30 Mar 2017 10:54:40 +0100 Subject: [PATCH 6/9] perf utils: Fix spelling mistake: "Invalud" -> "Invalid" Trivial fix to spelling mistake in pr_debug message. Signed-off-by: Colin King Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Namhyung Kim Cc: Krister Johansen Cc: Peter Zijlstra Cc: kernel-janitors@vger.kernel.org Link: http://lkml.kernel.org/r/20170330095440.19444-1-colin.king@canonical.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/hist.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 3c4d4d00cb2c..61bf304206fd 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -2459,7 +2459,7 @@ int parse_filter_percentage(const struct option *opt __maybe_unused, else if (!strcmp(arg, "absolute")) symbol_conf.filter_relative = false; else { - pr_debug("Invalud percentage: %s\n", arg); + pr_debug("Invalid percentage: %s\n", arg); return -1; } From 67ef28794d7e30f33936d655f2951e8dcae7cd5a Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 30 Mar 2017 11:16:59 -0300 Subject: [PATCH 7/9] tools include uapi: Grab copies of stat.h and fcntl.h We will need it to build tools/perf/trace/beauty/statx.h. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-nin41ve2fa63lrfbdr6x57yr@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/linux/types.h | 1 + tools/include/uapi/linux/fcntl.h | 72 +++++++++++++ tools/include/uapi/linux/stat.h | 176 +++++++++++++++++++++++++++++++ tools/perf/MANIFEST | 2 + tools/perf/check-headers.sh | 2 + 5 files changed, 253 insertions(+) create mode 100644 tools/include/uapi/linux/fcntl.h create mode 100644 tools/include/uapi/linux/stat.h diff --git a/tools/include/linux/types.h b/tools/include/linux/types.h index c24b3e3ae296..77a28a26a670 100644 --- a/tools/include/linux/types.h +++ b/tools/include/linux/types.h @@ -7,6 +7,7 @@ #define __SANE_USERSPACE_TYPES__ /* For PPC64, to get LL64 types */ #include +#include struct page; struct kmem_cache; diff --git a/tools/include/uapi/linux/fcntl.h b/tools/include/uapi/linux/fcntl.h new file mode 100644 index 000000000000..813afd6eee71 --- /dev/null +++ b/tools/include/uapi/linux/fcntl.h @@ -0,0 +1,72 @@ +#ifndef _UAPI_LINUX_FCNTL_H +#define _UAPI_LINUX_FCNTL_H + +#include + +#define F_SETLEASE (F_LINUX_SPECIFIC_BASE + 0) +#define F_GETLEASE (F_LINUX_SPECIFIC_BASE + 1) + +/* + * Cancel a blocking posix lock; internal use only until we expose an + * asynchronous lock api to userspace: + */ +#define F_CANCELLK (F_LINUX_SPECIFIC_BASE + 5) + +/* Create a file descriptor with FD_CLOEXEC set. */ +#define F_DUPFD_CLOEXEC (F_LINUX_SPECIFIC_BASE + 6) + +/* + * Request nofications on a directory. + * See below for events that may be notified. + */ +#define F_NOTIFY (F_LINUX_SPECIFIC_BASE+2) + +/* + * Set and get of pipe page size array + */ +#define F_SETPIPE_SZ (F_LINUX_SPECIFIC_BASE + 7) +#define F_GETPIPE_SZ (F_LINUX_SPECIFIC_BASE + 8) + +/* + * Set/Get seals + */ +#define F_ADD_SEALS (F_LINUX_SPECIFIC_BASE + 9) +#define F_GET_SEALS (F_LINUX_SPECIFIC_BASE + 10) + +/* + * Types of seals + */ +#define F_SEAL_SEAL 0x0001 /* prevent further seals from being set */ +#define F_SEAL_SHRINK 0x0002 /* prevent file from shrinking */ +#define F_SEAL_GROW 0x0004 /* prevent file from growing */ +#define F_SEAL_WRITE 0x0008 /* prevent writes */ +/* (1U << 31) is reserved for signed error codes */ + +/* + * Types of directory notifications that may be requested. + */ +#define DN_ACCESS 0x00000001 /* File accessed */ +#define DN_MODIFY 0x00000002 /* File modified */ +#define DN_CREATE 0x00000004 /* File created */ +#define DN_DELETE 0x00000008 /* File removed */ +#define DN_RENAME 0x00000010 /* File renamed */ +#define DN_ATTRIB 0x00000020 /* File changed attibutes */ +#define DN_MULTISHOT 0x80000000 /* Don't remove notifier */ + +#define AT_FDCWD -100 /* Special value used to indicate + openat should use the current + working directory. */ +#define AT_SYMLINK_NOFOLLOW 0x100 /* Do not follow symbolic links. */ +#define AT_REMOVEDIR 0x200 /* Remove directory instead of + unlinking file. */ +#define AT_SYMLINK_FOLLOW 0x400 /* Follow symbolic links. */ +#define AT_NO_AUTOMOUNT 0x800 /* Suppress terminal automount traversal */ +#define AT_EMPTY_PATH 0x1000 /* Allow empty relative pathname */ + +#define AT_STATX_SYNC_TYPE 0x6000 /* Type of synchronisation required from statx() */ +#define AT_STATX_SYNC_AS_STAT 0x0000 /* - Do whatever stat() does */ +#define AT_STATX_FORCE_SYNC 0x2000 /* - Force the attributes to be sync'd with the server */ +#define AT_STATX_DONT_SYNC 0x4000 /* - Don't sync attributes with the server */ + + +#endif /* _UAPI_LINUX_FCNTL_H */ diff --git a/tools/include/uapi/linux/stat.h b/tools/include/uapi/linux/stat.h new file mode 100644 index 000000000000..51a6b86e3700 --- /dev/null +++ b/tools/include/uapi/linux/stat.h @@ -0,0 +1,176 @@ +#ifndef _UAPI_LINUX_STAT_H +#define _UAPI_LINUX_STAT_H + +#include + +#if defined(__KERNEL__) || !defined(__GLIBC__) || (__GLIBC__ < 2) + +#define S_IFMT 00170000 +#define S_IFSOCK 0140000 +#define S_IFLNK 0120000 +#define S_IFREG 0100000 +#define S_IFBLK 0060000 +#define S_IFDIR 0040000 +#define S_IFCHR 0020000 +#define S_IFIFO 0010000 +#define S_ISUID 0004000 +#define S_ISGID 0002000 +#define S_ISVTX 0001000 + +#define S_ISLNK(m) (((m) & S_IFMT) == S_IFLNK) +#define S_ISREG(m) (((m) & S_IFMT) == S_IFREG) +#define S_ISDIR(m) (((m) & S_IFMT) == S_IFDIR) +#define S_ISCHR(m) (((m) & S_IFMT) == S_IFCHR) +#define S_ISBLK(m) (((m) & S_IFMT) == S_IFBLK) +#define S_ISFIFO(m) (((m) & S_IFMT) == S_IFIFO) +#define S_ISSOCK(m) (((m) & S_IFMT) == S_IFSOCK) + +#define S_IRWXU 00700 +#define S_IRUSR 00400 +#define S_IWUSR 00200 +#define S_IXUSR 00100 + +#define S_IRWXG 00070 +#define S_IRGRP 00040 +#define S_IWGRP 00020 +#define S_IXGRP 00010 + +#define S_IRWXO 00007 +#define S_IROTH 00004 +#define S_IWOTH 00002 +#define S_IXOTH 00001 + +#endif + +/* + * Timestamp structure for the timestamps in struct statx. + * + * tv_sec holds the number of seconds before (negative) or after (positive) + * 00:00:00 1st January 1970 UTC. + * + * tv_nsec holds a number of nanoseconds before (0..-999,999,999 if tv_sec is + * negative) or after (0..999,999,999 if tv_sec is positive) the tv_sec time. + * + * Note that if both tv_sec and tv_nsec are non-zero, then the two values must + * either be both positive or both negative. + * + * __reserved is held in case we need a yet finer resolution. + */ +struct statx_timestamp { + __s64 tv_sec; + __s32 tv_nsec; + __s32 __reserved; +}; + +/* + * Structures for the extended file attribute retrieval system call + * (statx()). + * + * The caller passes a mask of what they're specifically interested in as a + * parameter to statx(). What statx() actually got will be indicated in + * st_mask upon return. + * + * For each bit in the mask argument: + * + * - if the datum is not supported: + * + * - the bit will be cleared, and + * + * - the datum will be set to an appropriate fabricated value if one is + * available (eg. CIFS can take a default uid and gid), otherwise + * + * - the field will be cleared; + * + * - otherwise, if explicitly requested: + * + * - the datum will be synchronised to the server if AT_STATX_FORCE_SYNC is + * set or if the datum is considered out of date, and + * + * - the field will be filled in and the bit will be set; + * + * - otherwise, if not requested, but available in approximate form without any + * effort, it will be filled in anyway, and the bit will be set upon return + * (it might not be up to date, however, and no attempt will be made to + * synchronise the internal state first); + * + * - otherwise the field and the bit will be cleared before returning. + * + * Items in STATX_BASIC_STATS may be marked unavailable on return, but they + * will have values installed for compatibility purposes so that stat() and + * co. can be emulated in userspace. + */ +struct statx { + /* 0x00 */ + __u32 stx_mask; /* What results were written [uncond] */ + __u32 stx_blksize; /* Preferred general I/O size [uncond] */ + __u64 stx_attributes; /* Flags conveying information about the file [uncond] */ + /* 0x10 */ + __u32 stx_nlink; /* Number of hard links */ + __u32 stx_uid; /* User ID of owner */ + __u32 stx_gid; /* Group ID of owner */ + __u16 stx_mode; /* File mode */ + __u16 __spare0[1]; + /* 0x20 */ + __u64 stx_ino; /* Inode number */ + __u64 stx_size; /* File size */ + __u64 stx_blocks; /* Number of 512-byte blocks allocated */ + __u64 __spare1[1]; + /* 0x40 */ + struct statx_timestamp stx_atime; /* Last access time */ + struct statx_timestamp stx_btime; /* File creation time */ + struct statx_timestamp stx_ctime; /* Last attribute change time */ + struct statx_timestamp stx_mtime; /* Last data modification time */ + /* 0x80 */ + __u32 stx_rdev_major; /* Device ID of special file [if bdev/cdev] */ + __u32 stx_rdev_minor; + __u32 stx_dev_major; /* ID of device containing file [uncond] */ + __u32 stx_dev_minor; + /* 0x90 */ + __u64 __spare2[14]; /* Spare space for future expansion */ + /* 0x100 */ +}; + +/* + * Flags to be stx_mask + * + * Query request/result mask for statx() and struct statx::stx_mask. + * + * These bits should be set in the mask argument of statx() to request + * particular items when calling statx(). + */ +#define STATX_TYPE 0x00000001U /* Want/got stx_mode & S_IFMT */ +#define STATX_MODE 0x00000002U /* Want/got stx_mode & ~S_IFMT */ +#define STATX_NLINK 0x00000004U /* Want/got stx_nlink */ +#define STATX_UID 0x00000008U /* Want/got stx_uid */ +#define STATX_GID 0x00000010U /* Want/got stx_gid */ +#define STATX_ATIME 0x00000020U /* Want/got stx_atime */ +#define STATX_MTIME 0x00000040U /* Want/got stx_mtime */ +#define STATX_CTIME 0x00000080U /* Want/got stx_ctime */ +#define STATX_INO 0x00000100U /* Want/got stx_ino */ +#define STATX_SIZE 0x00000200U /* Want/got stx_size */ +#define STATX_BLOCKS 0x00000400U /* Want/got stx_blocks */ +#define STATX_BASIC_STATS 0x000007ffU /* The stuff in the normal stat struct */ +#define STATX_BTIME 0x00000800U /* Want/got stx_btime */ +#define STATX_ALL 0x00000fffU /* All currently supported flags */ + +/* + * Attributes to be found in stx_attributes + * + * These give information about the features or the state of a file that might + * be of use to ordinary userspace programs such as GUIs or ls rather than + * specialised tools. + * + * Note that the flags marked [I] correspond to generic FS_IOC_FLAGS + * semantically. Where possible, the numerical value is picked to correspond + * also. + */ +#define STATX_ATTR_COMPRESSED 0x00000004 /* [I] File is compressed by the fs */ +#define STATX_ATTR_IMMUTABLE 0x00000010 /* [I] File is marked immutable */ +#define STATX_ATTR_APPEND 0x00000020 /* [I] File is append-only */ +#define STATX_ATTR_NODUMP 0x00000040 /* [I] File is not to be dumped */ +#define STATX_ATTR_ENCRYPTED 0x00000800 /* [I] File requires key to decrypt in fs */ + +#define STATX_ATTR_AUTOMOUNT 0x00001000 /* Dir: Automount trigger */ + + +#endif /* _UAPI_LINUX_STAT_H */ diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST index 28648c09dcd6..89018c7311a4 100644 --- a/tools/perf/MANIFEST +++ b/tools/perf/MANIFEST @@ -73,9 +73,11 @@ tools/include/uapi/asm-generic/mman-common.h tools/include/uapi/asm-generic/mman.h tools/include/uapi/linux/bpf.h tools/include/uapi/linux/bpf_common.h +tools/include/uapi/linux/fcntl.h tools/include/uapi/linux/hw_breakpoint.h tools/include/uapi/linux/mman.h tools/include/uapi/linux/perf_event.h +tools/include/uapi/linux/stat.h tools/include/linux/poison.h tools/include/linux/rbtree.h tools/include/linux/rbtree_augmented.h diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh index c747bfd7f14d..83fe2202382e 100755 --- a/tools/perf/check-headers.sh +++ b/tools/perf/check-headers.sh @@ -1,7 +1,9 @@ #!/bin/sh HEADERS=' +include/uapi/linux/fcntl.h include/uapi/linux/perf_event.h +include/uapi/linux/stat.h include/linux/hash.h include/uapi/linux/hw_breakpoint.h arch/x86/include/asm/disabled-features.h From 3e00cbe8891a655520ca2cfe9b6d509d0a845f07 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 30 Mar 2017 16:46:37 +0200 Subject: [PATCH 8/9] perf tools: Do not fail in case of empty HOME env variable Currently we fail in the following case: $ unset HOME $ ./perf record ls $ echo $? 255 It's because the config code init fails due to a missing HOME variable value. Fix this by skipping the user config init if there's no HOME variable value. Reported-by: Jan Stancek Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20170330144637.7468-1-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/config.c | 60 +++++++++++++++++++++++----------------- 1 file changed, 34 insertions(+), 26 deletions(-) diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c index 0c7d5a4975cd..7b01d59076d3 100644 --- a/tools/perf/util/config.c +++ b/tools/perf/util/config.c @@ -627,6 +627,8 @@ static int perf_config_set__init(struct perf_config_set *set) { int ret = -1; const char *home = NULL; + char *user_config; + struct stat st; /* Setting $PERF_CONFIG makes perf read _only_ the given config file. */ if (config_exclusive_filename) @@ -637,35 +639,41 @@ static int perf_config_set__init(struct perf_config_set *set) } home = getenv("HOME"); - if (perf_config_global() && home) { - char *user_config = strdup(mkpath("%s/.perfconfig", home)); - struct stat st; - if (user_config == NULL) { - warning("Not enough memory to process %s/.perfconfig, " - "ignoring it.", home); - goto out; - } + /* + * Skip reading user config if: + * - there is no place to read it from (HOME) + * - we are asked not to (PERF_CONFIG_NOGLOBAL=1) + */ + if (!home || !*home || !perf_config_global()) + return 0; - if (stat(user_config, &st) < 0) { - if (errno == ENOENT) - ret = 0; - goto out_free; - } - - ret = 0; - - if (st.st_uid && (st.st_uid != geteuid())) { - warning("File %s not owned by current user or root, " - "ignoring it.", user_config); - goto out_free; - } - - if (st.st_size) - ret = perf_config_from_file(collect_config, user_config, set); -out_free: - free(user_config); + user_config = strdup(mkpath("%s/.perfconfig", home)); + if (user_config == NULL) { + warning("Not enough memory to process %s/.perfconfig, " + "ignoring it.", home); + goto out; } + + if (stat(user_config, &st) < 0) { + if (errno == ENOENT) + ret = 0; + goto out_free; + } + + ret = 0; + + if (st.st_uid && (st.st_uid != geteuid())) { + warning("File %s not owned by current user or root, " + "ignoring it.", user_config); + goto out_free; + } + + if (st.st_size) + ret = perf_config_from_file(collect_config, user_config, set); + +out_free: + free(user_config); out: return ret; } From fd5cead23f54697310bd565aa2a23ae5128080a0 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 14 Mar 2017 16:19:30 -0300 Subject: [PATCH 9/9] perf trace: Beautify statx syscall 'flag' and 'mask' arguments To test it, build samples/statx/test_statx, which I did as: $ make headers_install $ cc -I ~/git/linux/usr/include samples/statx/test-statx.c -o /tmp/statx And then use perf trace on it: # perf trace -e statx /tmp/statx /etc/passwd statx(/etc/passwd) = 0 results=7ff Size: 3496 Blocks: 8 IO Block: 4096 regular file Device: fd:00 Inode: 280156 Links: 1 Access: (0644/-rw-r--r--) Uid: 0 Gid: 0 Access: 2017-03-29 16:01:01.650073438-0300 Modify: 2017-03-10 16:25:14.156479354-0300 Change: 2017-03-10 16:25:14.171479328-0300 0.000 ( 0.007 ms): statx/30648 statx(dfd: CWD, filename: 0x7ef503f4, flags: SYMLINK_NOFOLLOW, mask: TYPE|MODE|NLINK|UID|GID|ATIME|MTIME|CTIME|INO|SIZE|BLOCKS|BTIME, buffer: 0x7fff7ef4eb10) = 0 # Using the test-stat.c options to change the mask: # perf trace -e statx /tmp/statx -O /etc/passwd > /dev/null 0.000 ( 0.008 ms): statx/30745 statx(dfd: CWD, filename: 0x3a0753f4, flags: SYMLINK_NOFOLLOW, mask: BTIME, buffer: 0x7ffd3a0735c0) = 0 # # perf trace -e statx /tmp/statx -A /etc/passwd > /dev/null 0.000 ( 0.010 ms): statx/30757 statx(dfd: CWD, filename: 0xa94e63f4, flags: SYMLINK_NOFOLLOW|NO_AUTOMOUNT, mask: TYPE|MODE|NLINK|UID|GID|ATIME|MTIME|CTIME|INO|SIZE|BLOCKS|BTIME, buffer: 0x7ffea94e49d0) = 0 # # trace --no-inherit -e statx /tmp/statx -F /etc/passwd > /dev/null 0.000 ( 0.011 ms): statx(dfd: CWD, filename: 0x3b02d3f3, flags: SYMLINK_NOFOLLOW|STATX_FORCE_SYNC, mask: TYPE|MODE|NLINK|UID|GID|ATIME|MTIME|CTIME|INO|SIZE|BLOCKS|BTIME, buffer: 0x7ffd3b02c850) = 0 # # trace --no-inherit -e statx /tmp/statx -F -L /etc/passwd > /dev/null 0.000 ( 0.008 ms): statx(dfd: CWD, filename: 0x15cff3f3, flags: STATX_FORCE_SYNC, mask: TYPE|MODE|NLINK|UID|GID|ATIME|MTIME|CTIME|INO|SIZE|BLOCKS|BTIME, buffer: 0x7fff15cfdda0) = 0 # # trace --no-inherit -e statx /tmp/statx -D -O /etc/passwd > /dev/null 0.000 ( 0.009 ms): statx(dfd: CWD, filename: 0xfa37f3f3, flags: SYMLINK_NOFOLLOW|STATX_DONT_SYNC, mask: BTIME, buffer: 0x7ffffa37da20) = 0 # Adding a probe to get the filename collected as well: # perf probe 'vfs_getname=getname_flags:72 pathname=result->name:string' Added new event: probe:vfs_getname (on getname_flags:72 with pathname=result->name:string) You can now use it in all perf tools, such as: perf record -e probe:vfs_getname -aR sleep 1 # trace --no-inherit -e statx /tmp/statx -D -O /etc/passwd > /dev/null 0.169 ( 0.007 ms): statx(dfd: CWD, filename: /etc/passwd, flags: SYMLINK_NOFOLLOW|STATX_DONT_SYNC, mask: BTIME, buffer: 0x7ffda9bf50f0) = 0 # Same technique could be used to collect and beautify the result put in the 'buffer' argument. Finally do a system wide 'perf trace' session looking for any use of statx, then run the test proggie with various flags: # trace -e statx 16612.967 ( 0.028 ms): statx/4562 statx(dfd: CWD, filename: /tmp/statx, flags: SYMLINK_NOFOLLOW, mask: TYPE|MODE|NLINK|UID|GID|ATIME|MTIME|CTIME|INO|SIZE|BLOCKS|BTIME, buffer: 0x7ffef195d660) = 0 33064.447 ( 0.011 ms): statx/4569 statx(dfd: CWD, filename: /tmp/statx, flags: SYMLINK_NOFOLLOW|STATX_FORCE_SYNC, mask: TYPE|MODE|NLINK|UID|GID|ATIME|MTIME|CTIME|INO|SIZE|BLOCKS|BTIME, buffer: 0x7ffc5484c790) = 0 36050.891 ( 0.023 ms): statx/4576 statx(dfd: CWD, filename: /tmp/statx, flags: SYMLINK_NOFOLLOW, mask: BTIME, buffer: 0x7ffeb18b66e0) = 0 38039.889 ( 0.023 ms): statx/4584 statx(dfd: CWD, filename: /tmp/statx, flags: SYMLINK_NOFOLLOW, mask: TYPE|MODE|NLINK|UID|GID|ATIME|MTIME|CTIME|INO|SIZE|BLOCKS|BTIME, buffer: 0x7fff1db0ea90) = 0 ^C# This one also starts moving the beautifiers from files directly included in builtin-trace.c to separate objects + a beauty.h header with prototypes, so that we can add test cases in tools/perf/tests/ to fire syscalls with various arguments and then get them intercepted as syscalls:sys_enter_foo or raw_syscalls:sys_enter + sys_exit to then format and check that the formatted output is the one we expect. Cc: Adrian Hunter Cc: Al Viro Cc: David Ahern Cc: David Howells Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-xvzw8eynffvez5czyzidhrno@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Build | 1 + .../arch/x86/entry/syscalls/syscall_64.tbl | 1 + tools/perf/builtin-trace.c | 14 ++-- tools/perf/trace/beauty/Build | 1 + tools/perf/trace/beauty/beauty.h | 24 +++++++ tools/perf/trace/beauty/statx.c | 72 +++++++++++++++++++ 6 files changed, 104 insertions(+), 9 deletions(-) create mode 100644 tools/perf/trace/beauty/Build create mode 100644 tools/perf/trace/beauty/beauty.h create mode 100644 tools/perf/trace/beauty/statx.c diff --git a/tools/perf/Build b/tools/perf/Build index 9b79f8d7db50..bd8eeb60533c 100644 --- a/tools/perf/Build +++ b/tools/perf/Build @@ -50,5 +50,6 @@ libperf-y += util/ libperf-y += arch/ libperf-y += ui/ libperf-y += scripts/ +libperf-y += trace/beauty/ gtk-y += ui/gtk/ diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl index e93ef0b38db8..5aef183e2f85 100644 --- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl +++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl @@ -338,6 +338,7 @@ 329 common pkey_mprotect sys_pkey_mprotect 330 common pkey_alloc sys_pkey_alloc 331 common pkey_free sys_pkey_free +332 common statx sys_statx # # x32-specific system call numbers start at 512 to avoid cache impact diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 7379792a6504..fce278d5fada 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -31,6 +31,7 @@ #include "util/intlist.h" #include "util/thread_map.h" #include "util/stat.h" +#include "trace/beauty/beauty.h" #include "trace-event.h" #include "util/parse-events.h" #include "util/bpf-loader.h" @@ -267,15 +268,6 @@ out_delete: ({ struct syscall_tp *fields = evsel->priv; \ fields->name.pointer(&fields->name, sample); }) -struct syscall_arg { - unsigned long val; - struct thread *thread; - struct trace *trace; - void *parm; - u8 idx; - u8 mask; -}; - struct strarray { int offset; int nr_entries; @@ -771,6 +763,10 @@ static struct syscall_fmt { .arg_parm = { [0] = &strarray__socket_families, /* family */ }, }, { .name = "stat", .errmsg = true, .alias = "newstat", }, { .name = "statfs", .errmsg = true, }, + { .name = "statx", .errmsg = true, + .arg_scnprintf = { [0] = SCA_FDAT, /* flags */ + [2] = SCA_STATX_FLAGS, /* flags */ + [3] = SCA_STATX_MASK, /* mask */ }, }, { .name = "swapoff", .errmsg = true, .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, }, { .name = "swapon", .errmsg = true, diff --git a/tools/perf/trace/beauty/Build b/tools/perf/trace/beauty/Build new file mode 100644 index 000000000000..be95ac6ce845 --- /dev/null +++ b/tools/perf/trace/beauty/Build @@ -0,0 +1 @@ +libperf-y += statx.o diff --git a/tools/perf/trace/beauty/beauty.h b/tools/perf/trace/beauty/beauty.h new file mode 100644 index 000000000000..cf50be3f17a4 --- /dev/null +++ b/tools/perf/trace/beauty/beauty.h @@ -0,0 +1,24 @@ +#ifndef _PERF_TRACE_BEAUTY_H +#define _PERF_TRACE_BEAUTY_H + +#include + +struct trace; +struct thread; + +struct syscall_arg { + unsigned long val; + struct thread *thread; + struct trace *trace; + void *parm; + u8 idx; + u8 mask; +}; + +size_t syscall_arg__scnprintf_statx_flags(char *bf, size_t size, struct syscall_arg *arg); +#define SCA_STATX_FLAGS syscall_arg__scnprintf_statx_flags + +size_t syscall_arg__scnprintf_statx_mask(char *bf, size_t size, struct syscall_arg *arg); +#define SCA_STATX_MASK syscall_arg__scnprintf_statx_mask + +#endif /* _PERF_TRACE_BEAUTY_H */ diff --git a/tools/perf/trace/beauty/statx.c b/tools/perf/trace/beauty/statx.c new file mode 100644 index 000000000000..5643b692af4c --- /dev/null +++ b/tools/perf/trace/beauty/statx.c @@ -0,0 +1,72 @@ +/* + * trace/beauty/statx.c + * + * Copyright (C) 2017, Red Hat Inc, Arnaldo Carvalho de Melo + * + * Released under the GPL v2. (and only v2, not any later version) + */ + +#include "trace/beauty/beauty.h" +#include +#include +#include +#include + +size_t syscall_arg__scnprintf_statx_flags(char *bf, size_t size, struct syscall_arg *arg) +{ + int printed = 0, flags = arg->val; + + if (flags == 0) + return scnprintf(bf, size, "SYNC_AS_STAT"); +#define P_FLAG(n) \ + if (flags & AT_##n) { \ + printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ + flags &= ~AT_##n; \ + } + + P_FLAG(SYMLINK_NOFOLLOW); + P_FLAG(REMOVEDIR); + P_FLAG(SYMLINK_FOLLOW); + P_FLAG(NO_AUTOMOUNT); + P_FLAG(EMPTY_PATH); + P_FLAG(STATX_FORCE_SYNC); + P_FLAG(STATX_DONT_SYNC); + +#undef P_FLAG + + if (flags) + printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags); + + return printed; +} + +size_t syscall_arg__scnprintf_statx_mask(char *bf, size_t size, struct syscall_arg *arg) +{ + int printed = 0, flags = arg->val; + +#define P_FLAG(n) \ + if (flags & STATX_##n) { \ + printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ + flags &= ~STATX_##n; \ + } + + P_FLAG(TYPE); + P_FLAG(MODE); + P_FLAG(NLINK); + P_FLAG(UID); + P_FLAG(GID); + P_FLAG(ATIME); + P_FLAG(MTIME); + P_FLAG(CTIME); + P_FLAG(INO); + P_FLAG(SIZE); + P_FLAG(BLOCKS); + P_FLAG(BTIME); + +#undef P_FLAG + + if (flags) + printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags); + + return printed; +}