601366678c
When perf data is in a pipe, it reads each event separately using read(2) syscall. This is a huge performance bottleneck when processing large data like in perf inject. Also perf inject needs to use write(2) syscall for the output. So convert it to use buffer I/O functions in stdio library for pipe data. This makes inject-build-id bench time drops from 20ms to 8ms. $ perf bench internals inject-build-id # Running 'internals/inject-build-id' benchmark: Average build-id injection took: 8.074 msec (+- 0.013 msec) Average time per event: 0.792 usec (+- 0.001 usec) Average memory usage: 8328 KB (+- 0 KB) Average build-id-all injection took: 5.490 msec (+- 0.008 msec) Average time per event: 0.538 usec (+- 0.001 usec) Average memory usage: 7563 KB (+- 0 KB) This patch enables it just for perf inject when used with pipe (it's a default behavior). Maybe we could do it for perf record and/or report later.. Committer testing: Before: $ perf stat -r 5 perf bench internals inject-build-id # Running 'internals/inject-build-id' benchmark: Average build-id injection took: 13.605 msec (+- 0.064 msec) Average time per event: 1.334 usec (+- 0.006 usec) Average memory usage: 12220 KB (+- 7 KB) Average build-id-all injection took: 11.458 msec (+- 0.058 msec) Average time per event: 1.123 usec (+- 0.006 usec) Average memory usage: 11546 KB (+- 8 KB) # Running 'internals/inject-build-id' benchmark: Average build-id injection took: 13.673 msec (+- 0.057 msec) Average time per event: 1.341 usec (+- 0.006 usec) Average memory usage: 12508 KB (+- 8 KB) Average build-id-all injection took: 11.437 msec (+- 0.046 msec) Average time per event: 1.121 usec (+- 0.004 usec) Average memory usage: 11812 KB (+- 7 KB) # Running 'internals/inject-build-id' benchmark: Average build-id injection took: 13.641 msec (+- 0.069 msec) Average time per event: 1.337 usec (+- 0.007 usec) Average memory usage: 12302 KB (+- 8 KB) Average build-id-all injection took: 10.820 msec (+- 0.106 msec) Average time per event: 1.061 usec (+- 0.010 usec) Average memory usage: 11616 KB (+- 7 KB) # Running 'internals/inject-build-id' benchmark: Average build-id injection took: 13.379 msec (+- 0.074 msec) Average time per event: 1.312 usec (+- 0.007 usec) Average memory usage: 12334 KB (+- 8 KB) Average build-id-all injection took: 11.288 msec (+- 0.071 msec) Average time per event: 1.107 usec (+- 0.007 usec) Average memory usage: 11657 KB (+- 8 KB) # Running 'internals/inject-build-id' benchmark: Average build-id injection took: 13.534 msec (+- 0.058 msec) Average time per event: 1.327 usec (+- 0.006 usec) Average memory usage: 12264 KB (+- 8 KB) Average build-id-all injection took: 11.557 msec (+- 0.076 msec) Average time per event: 1.133 usec (+- 0.007 usec) Average memory usage: 11593 KB (+- 8 KB) Performance counter stats for 'perf bench internals inject-build-id' (5 runs): 4,060.05 msec task-clock:u # 1.566 CPUs utilized ( +- 0.65% ) 0 context-switches:u # 0.000 K/sec 0 cpu-migrations:u # 0.000 K/sec 101,888 page-faults:u # 0.025 M/sec ( +- 0.12% ) 3,745,833,163 cycles:u # 0.923 GHz ( +- 0.10% ) (83.22%) 194,346,613 stalled-cycles-frontend:u # 5.19% frontend cycles idle ( +- 0.57% ) (83.30%) 708,495,034 stalled-cycles-backend:u # 18.91% backend cycles idle ( +- 0.48% ) (83.48%) 5,629,328,628 instructions:u # 1.50 insn per cycle # 0.13 stalled cycles per insn ( +- 0.21% ) (83.57%) 1,236,697,927 branches:u # 304.602 M/sec ( +- 0.16% ) (83.44%) 17,564,877 branch-misses:u # 1.42% of all branches ( +- 0.23% ) (82.99%) 2.5934 +- 0.0128 seconds time elapsed ( +- 0.49% ) $ After: $ perf stat -r 5 perf bench internals inject-build-id # Running 'internals/inject-build-id' benchmark: Average build-id injection took: 8.560 msec (+- 0.125 msec) Average time per event: 0.839 usec (+- 0.012 usec) Average memory usage: 12520 KB (+- 8 KB) Average build-id-all injection took: 5.789 msec (+- 0.054 msec) Average time per event: 0.568 usec (+- 0.005 usec) Average memory usage: 11919 KB (+- 9 KB) # Running 'internals/inject-build-id' benchmark: Average build-id injection took: 8.639 msec (+- 0.111 msec) Average time per event: 0.847 usec (+- 0.011 usec) Average memory usage: 12732 KB (+- 8 KB) Average build-id-all injection took: 5.647 msec (+- 0.069 msec) Average time per event: 0.554 usec (+- 0.007 usec) Average memory usage: 12093 KB (+- 7 KB) # Running 'internals/inject-build-id' benchmark: Average build-id injection took: 8.551 msec (+- 0.096 msec) Average time per event: 0.838 usec (+- 0.009 usec) Average memory usage: 12739 KB (+- 8 KB) Average build-id-all injection took: 5.617 msec (+- 0.061 msec) Average time per event: 0.551 usec (+- 0.006 usec) Average memory usage: 12105 KB (+- 7 KB) # Running 'internals/inject-build-id' benchmark: Average build-id injection took: 8.403 msec (+- 0.097 msec) Average time per event: 0.824 usec (+- 0.010 usec) Average memory usage: 12770 KB (+- 8 KB) Average build-id-all injection took: 5.611 msec (+- 0.085 msec) Average time per event: 0.550 usec (+- 0.008 usec) Average memory usage: 12134 KB (+- 8 KB) # Running 'internals/inject-build-id' benchmark: Average build-id injection took: 8.518 msec (+- 0.102 msec) Average time per event: 0.835 usec (+- 0.010 usec) Average memory usage: 12518 KB (+- 10 KB) Average build-id-all injection took: 5.503 msec (+- 0.073 msec) Average time per event: 0.540 usec (+- 0.007 usec) Average memory usage: 11882 KB (+- 8 KB) Performance counter stats for 'perf bench internals inject-build-id' (5 runs): 2,394.88 msec task-clock:u # 1.577 CPUs utilized ( +- 0.83% ) 0 context-switches:u # 0.000 K/sec 0 cpu-migrations:u # 0.000 K/sec 103,181 page-faults:u # 0.043 M/sec ( +- 0.11% ) 3,548,172,030 cycles:u # 1.482 GHz ( +- 0.30% ) (83.26%) 81,537,700 stalled-cycles-frontend:u # 2.30% frontend cycles idle ( +- 1.54% ) (83.24%) 876,631,544 stalled-cycles-backend:u # 24.71% backend cycles idle ( +- 1.14% ) (83.45%) 5,960,361,707 instructions:u # 1.68 insn per cycle # 0.15 stalled cycles per insn ( +- 0.27% ) (83.26%) 1,269,413,491 branches:u # 530.054 M/sec ( +- 0.10% ) (83.48%) 11,372,453 branch-misses:u # 0.90% of all branches ( +- 0.52% ) (83.31%) 1.51874 +- 0.00642 seconds time elapsed ( +- 0.42% ) $ Signed-off-by: Namhyung Kim <namhyung@kernel.org> Acked-by: Jiri Olsa <jolsa@redhat.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Ian Rogers <irogers@google.com> Cc: Mark Rutland <mark.rutland@arm.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Stephane Eranian <eranian@google.com> Link: http://lore.kernel.org/lkml/20201030054742.87740-1-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
102 lines
2.3 KiB
C
102 lines
2.3 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
#ifndef __PERF_DATA_H
|
|
#define __PERF_DATA_H
|
|
|
|
#include <stdio.h>
|
|
#include <stdbool.h>
|
|
|
|
enum perf_data_mode {
|
|
PERF_DATA_MODE_WRITE,
|
|
PERF_DATA_MODE_READ,
|
|
};
|
|
|
|
enum perf_dir_version {
|
|
PERF_DIR_SINGLE_FILE = 0,
|
|
PERF_DIR_VERSION = 1,
|
|
};
|
|
|
|
struct perf_data_file {
|
|
char *path;
|
|
union {
|
|
int fd;
|
|
FILE *fptr;
|
|
};
|
|
unsigned long size;
|
|
};
|
|
|
|
struct perf_data {
|
|
const char *path;
|
|
struct perf_data_file file;
|
|
bool is_pipe;
|
|
bool is_dir;
|
|
bool force;
|
|
bool use_stdio;
|
|
enum perf_data_mode mode;
|
|
|
|
struct {
|
|
u64 version;
|
|
struct perf_data_file *files;
|
|
int nr;
|
|
} dir;
|
|
};
|
|
|
|
static inline bool perf_data__is_read(struct perf_data *data)
|
|
{
|
|
return data->mode == PERF_DATA_MODE_READ;
|
|
}
|
|
|
|
static inline bool perf_data__is_write(struct perf_data *data)
|
|
{
|
|
return data->mode == PERF_DATA_MODE_WRITE;
|
|
}
|
|
|
|
static inline int perf_data__is_pipe(struct perf_data *data)
|
|
{
|
|
return data->is_pipe;
|
|
}
|
|
|
|
static inline bool perf_data__is_dir(struct perf_data *data)
|
|
{
|
|
return data->is_dir;
|
|
}
|
|
|
|
static inline bool perf_data__is_single_file(struct perf_data *data)
|
|
{
|
|
return data->dir.version == PERF_DIR_SINGLE_FILE;
|
|
}
|
|
|
|
static inline int perf_data__fd(struct perf_data *data)
|
|
{
|
|
if (data->use_stdio)
|
|
return fileno(data->file.fptr);
|
|
|
|
return data->file.fd;
|
|
}
|
|
|
|
int perf_data__open(struct perf_data *data);
|
|
void perf_data__close(struct perf_data *data);
|
|
ssize_t perf_data__read(struct perf_data *data, void *buf, size_t size);
|
|
ssize_t perf_data__write(struct perf_data *data,
|
|
void *buf, size_t size);
|
|
ssize_t perf_data_file__write(struct perf_data_file *file,
|
|
void *buf, size_t size);
|
|
/*
|
|
* If at_exit is set, only rename current perf.data to
|
|
* perf.data.<postfix>, continue write on original data.
|
|
* Set at_exit when flushing the last output.
|
|
*
|
|
* Return value is fd of new output.
|
|
*/
|
|
int perf_data__switch(struct perf_data *data,
|
|
const char *postfix,
|
|
size_t pos, bool at_exit, char **new_filepath);
|
|
|
|
int perf_data__create_dir(struct perf_data *data, int nr);
|
|
int perf_data__open_dir(struct perf_data *data);
|
|
void perf_data__close_dir(struct perf_data *data);
|
|
int perf_data__update_dir(struct perf_data *data);
|
|
unsigned long perf_data__size(struct perf_data *data);
|
|
int perf_data__make_kcore_dir(struct perf_data *data, char *buf, size_t buf_sz);
|
|
char *perf_data__kallsyms_name(struct perf_data *data);
|
|
#endif /* __PERF_DATA_H */
|