2009-11-05 03:31:34 +03:00
/*
* builtin - bench . c
*
2013-10-23 16:37:56 +04:00
* General benchmarking collections provided by perf
2009-11-05 03:31:34 +03:00
*
* Copyright ( C ) 2009 , Hitoshi Mitake < mitake @ dcl . info . waseda . ac . jp >
*/
/*
2013-10-23 16:37:56 +04:00
* Available benchmark collection list :
2009-11-05 03:31:34 +03:00
*
2013-10-23 16:37:56 +04:00
* sched . . . scheduler and IPC performance
2009-11-17 18:20:09 +03:00
* mem . . . memory access performance
2013-10-23 16:37:56 +04:00
* numa . . . NUMA scheduling and MM performance
2013-12-15 08:31:55 +04:00
* futex . . . Futex performance
2009-11-05 03:31:34 +03:00
*/
# include "perf.h"
# include "util/util.h"
2015-12-15 18:39:39 +03:00
# include <subcmd/parse-options.h>
2009-11-05 03:31:34 +03:00
# include "builtin.h"
# include "bench/bench.h"
# include <stdio.h>
# include <stdlib.h>
# include <string.h>
2013-10-23 16:37:56 +04:00
# include <sys/prctl.h>
2009-11-05 03:31:34 +03:00
2013-10-23 16:37:56 +04:00
typedef int ( * bench_fn_t ) ( int argc , const char * * argv , const char * prefix ) ;
struct bench {
const char * name ;
const char * summary ;
bench_fn_t fn ;
2009-11-05 03:31:34 +03:00
} ;
2013-09-30 14:07:11 +04:00
# ifdef HAVE_LIBNUMA_SUPPORT
2013-10-23 16:37:56 +04:00
static struct bench numa_benchmarks [ ] = {
{ " mem " , " Benchmark for NUMA workloads " , bench_numa } ,
2015-10-19 11:04:30 +03:00
{ " all " , " Run all NUMA benchmarks " , NULL } ,
2013-10-23 16:37:56 +04:00
{ NULL , NULL , NULL }
perf: Add 'perf bench numa mem' NUMA performance measurement suite
Add a suite of NUMA performance benchmarks.
The goal was simulate the behavior and access patterns of real NUMA
workloads, via a wide range of parameters, so this tool goes well
beyond simple bzero() measurements that most NUMA micro-benchmarks use:
- It processes the data and creates a chain of data dependencies,
like a real workload would. Neither the compiler, nor the
kernel (via KSM and other optimizations) nor the CPU can
eliminate parts of the workload.
- It randomizes the initial state and also randomizes the target
addresses of the processing - it's not a simple forward scan
of addresses.
- It provides flexible options to set process, thread and memory
relationship information: -G sets "global" memory shared between
all test processes, -P sets "process" memory shared by all
threads of a process and -T sets "thread" private memory.
- There's a NUMA convergence monitoring and convergence latency
measurement option via -c and -m.
- Micro-sleeps and synchronization can be injected to provoke lock
contention and scheduling, via the -u and -S options. This simulates
IO and contention.
- The -x option instructs the workload to 'perturb' itself artificially
every N seconds, by moving to the first and last CPU of the system
periodically. This way the stability of convergence equilibrium and
the number of steps taken for the scheduler to reach equilibrium again
can be measured.
- The amount of work can be specified via the -l loop count, and/or
via a -s seconds-timeout value.
- CPU and node memory binding options, to test hard binding scenarios.
THP can be turned on and off via madvise() calls.
- Live reporting of convergence progress in an 'at glance' output format.
Printing of convergence and deconvergence events.
The 'perf bench numa mem -a' option will start an array of about 30
individual tests that will each output such measurements:
# Running 5x5-bw-thread, "perf bench numa mem -p 5 -t 5 -P 512 -s 20 -zZ0q --thp 1"
5x5-bw-thread, 20.276, secs, runtime-max/thread
5x5-bw-thread, 20.004, secs, runtime-min/thread
5x5-bw-thread, 20.155, secs, runtime-avg/thread
5x5-bw-thread, 0.671, %, spread-runtime/thread
5x5-bw-thread, 21.153, GB, data/thread
5x5-bw-thread, 528.818, GB, data-total
5x5-bw-thread, 0.959, nsecs, runtime/byte/thread
5x5-bw-thread, 1.043, GB/sec, thread-speed
5x5-bw-thread, 26.081, GB/sec, total-speed
See the help text and the code for more details.
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2012-12-06 16:51:59 +04:00
} ;
2013-01-28 05:51:22 +04:00
# endif
perf: Add 'perf bench numa mem' NUMA performance measurement suite
Add a suite of NUMA performance benchmarks.
The goal was simulate the behavior and access patterns of real NUMA
workloads, via a wide range of parameters, so this tool goes well
beyond simple bzero() measurements that most NUMA micro-benchmarks use:
- It processes the data and creates a chain of data dependencies,
like a real workload would. Neither the compiler, nor the
kernel (via KSM and other optimizations) nor the CPU can
eliminate parts of the workload.
- It randomizes the initial state and also randomizes the target
addresses of the processing - it's not a simple forward scan
of addresses.
- It provides flexible options to set process, thread and memory
relationship information: -G sets "global" memory shared between
all test processes, -P sets "process" memory shared by all
threads of a process and -T sets "thread" private memory.
- There's a NUMA convergence monitoring and convergence latency
measurement option via -c and -m.
- Micro-sleeps and synchronization can be injected to provoke lock
contention and scheduling, via the -u and -S options. This simulates
IO and contention.
- The -x option instructs the workload to 'perturb' itself artificially
every N seconds, by moving to the first and last CPU of the system
periodically. This way the stability of convergence equilibrium and
the number of steps taken for the scheduler to reach equilibrium again
can be measured.
- The amount of work can be specified via the -l loop count, and/or
via a -s seconds-timeout value.
- CPU and node memory binding options, to test hard binding scenarios.
THP can be turned on and off via madvise() calls.
- Live reporting of convergence progress in an 'at glance' output format.
Printing of convergence and deconvergence events.
The 'perf bench numa mem -a' option will start an array of about 30
individual tests that will each output such measurements:
# Running 5x5-bw-thread, "perf bench numa mem -p 5 -t 5 -P 512 -s 20 -zZ0q --thp 1"
5x5-bw-thread, 20.276, secs, runtime-max/thread
5x5-bw-thread, 20.004, secs, runtime-min/thread
5x5-bw-thread, 20.155, secs, runtime-avg/thread
5x5-bw-thread, 0.671, %, spread-runtime/thread
5x5-bw-thread, 21.153, GB, data/thread
5x5-bw-thread, 528.818, GB, data-total
5x5-bw-thread, 0.959, nsecs, runtime/byte/thread
5x5-bw-thread, 1.043, GB/sec, thread-speed
5x5-bw-thread, 26.081, GB/sec, total-speed
See the help text and the code for more details.
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2012-12-06 16:51:59 +04:00
2013-10-23 16:37:56 +04:00
static struct bench sched_benchmarks [ ] = {
{ " messaging " , " Benchmark for scheduling and IPC " , bench_sched_messaging } ,
{ " pipe " , " Benchmark for pipe() between two processes " , bench_sched_pipe } ,
2015-10-19 11:04:30 +03:00
{ " all " , " Run all scheduler benchmarks " , NULL } ,
2013-10-23 16:37:56 +04:00
{ NULL , NULL , NULL }
2009-11-05 03:31:34 +03:00
} ;
2013-10-23 16:37:56 +04:00
static struct bench mem_benchmarks [ ] = {
2015-10-19 11:04:26 +03:00
{ " memcpy " , " Benchmark for memcpy() functions " , bench_mem_memcpy } ,
{ " memset " , " Benchmark for memset() functions " , bench_mem_memset } ,
2015-10-19 11:04:30 +03:00
{ " all " , " Run all memory access benchmarks " , NULL } ,
2013-10-23 16:37:56 +04:00
{ NULL , NULL , NULL }
2009-11-17 18:20:09 +03:00
} ;
2013-12-15 08:31:55 +04:00
static struct bench futex_benchmarks [ ] = {
{ " hash " , " Benchmark for futex hash table " , bench_futex_hash } ,
2013-12-15 08:31:56 +04:00
{ " wake " , " Benchmark for futex wake calls " , bench_futex_wake } ,
2015-05-08 21:37:59 +03:00
{ " wake-parallel " , " Benchmark for parallel futex wake calls " , bench_futex_wake_parallel } ,
2013-12-15 08:31:57 +04:00
{ " requeue " , " Benchmark for futex requeue calls " , bench_futex_requeue } ,
2015-07-07 11:55:53 +03:00
/* pi-futexes */
{ " lock-pi " , " Benchmark for futex lock_pi calls " , bench_futex_lock_pi } ,
2015-10-19 11:04:30 +03:00
{ " all " , " Run all futex benchmarks " , NULL } ,
2013-12-15 08:31:55 +04:00
{ NULL , NULL , NULL }
} ;
2013-10-23 16:37:56 +04:00
struct collection {
const char * name ;
const char * summary ;
struct bench * benchmarks ;
2009-11-05 03:31:34 +03:00
} ;
2013-10-23 16:37:56 +04:00
static struct collection collections [ ] = {
2013-12-15 08:31:55 +04:00
{ " sched " , " Scheduler and IPC benchmarks " , sched_benchmarks } ,
2013-10-23 16:37:56 +04:00
{ " mem " , " Memory access benchmarks " , mem_benchmarks } ,
2013-09-30 14:07:11 +04:00
# ifdef HAVE_LIBNUMA_SUPPORT
2013-10-23 16:37:56 +04:00
{ " numa " , " NUMA scheduling and MM benchmarks " , numa_benchmarks } ,
2013-01-28 05:51:22 +04:00
# endif
2013-12-15 08:31:55 +04:00
{ " futex " , " Futex stressing benchmarks " , futex_benchmarks } ,
2013-10-23 16:37:56 +04:00
{ " all " , " All benchmarks " , NULL } ,
{ NULL , NULL , NULL }
2009-11-05 03:31:34 +03:00
} ;
2013-10-23 16:37:56 +04:00
/* Iterate over all benchmark collections: */
# define for_each_collection(coll) \
for ( coll = collections ; coll - > name ; coll + + )
/* Iterate over all benchmarks within a collection: */
# define for_each_bench(coll, bench) \
2014-03-13 02:40:51 +04:00
for ( bench = coll - > benchmarks ; bench & & bench - > name ; bench + + )
2013-10-23 16:37:56 +04:00
static void dump_benchmarks ( struct collection * coll )
2009-11-05 03:31:34 +03:00
{
2013-10-23 16:37:56 +04:00
struct bench * bench ;
2009-11-05 03:31:34 +03:00
2013-10-23 16:37:56 +04:00
printf ( " \n # List of available benchmarks for collection '%s': \n \n " , coll - > name ) ;
2009-11-05 03:31:34 +03:00
2013-10-23 16:37:56 +04:00
for_each_bench ( coll , bench )
printf ( " %14s: %s \n " , bench - > name , bench - > summary ) ;
2009-11-05 03:31:34 +03:00
printf ( " \n " ) ;
}
2010-05-17 23:22:41 +04:00
static const char * bench_format_str ;
2013-10-23 16:37:56 +04:00
/* Output/formatting style, exported to benchmark modules: */
2009-11-10 02:20:00 +03:00
int bench_format = BENCH_FORMAT_DEFAULT ;
2014-06-16 22:14:19 +04:00
unsigned int bench_repeat = 10 ; /* default number of times to repeat the run */
2009-11-10 02:20:00 +03:00
static const struct option bench_options [ ] = {
2015-10-19 11:04:22 +03:00
OPT_STRING ( ' f ' , " format " , & bench_format_str , " default|simple " , " Specify the output formatting style " ) ,
2014-06-16 22:14:19 +04:00
OPT_UINTEGER ( ' r ' , " repeat " , & bench_repeat , " Specify amount of times to repeat the run " ) ,
2009-11-10 02:20:00 +03:00
OPT_END ( )
} ;
static const char * const bench_usage [ ] = {
2013-10-23 16:37:56 +04:00
" perf bench [<common options>] <collection> <benchmark> [<options>] " ,
2009-11-10 02:20:00 +03:00
NULL
} ;
static void print_usage ( void )
{
2013-10-23 16:37:56 +04:00
struct collection * coll ;
2009-11-10 02:20:00 +03:00
int i ;
printf ( " Usage: \n " ) ;
for ( i = 0 ; bench_usage [ i ] ; i + + )
printf ( " \t %s \n " , bench_usage [ i ] ) ;
printf ( " \n " ) ;
2013-10-23 16:37:56 +04:00
printf ( " # List of all available benchmark collections: \n \n " ) ;
2009-11-10 02:20:00 +03:00
2013-10-23 16:37:56 +04:00
for_each_collection ( coll )
printf ( " %14s: %s \n " , coll - > name , coll - > summary ) ;
2009-11-10 02:20:00 +03:00
printf ( " \n " ) ;
}
2010-05-17 23:22:41 +04:00
static int bench_str2int ( const char * str )
2009-11-10 02:20:00 +03:00
{
if ( ! str )
return BENCH_FORMAT_DEFAULT ;
if ( ! strcmp ( str , BENCH_FORMAT_DEFAULT_STR ) )
return BENCH_FORMAT_DEFAULT ;
else if ( ! strcmp ( str , BENCH_FORMAT_SIMPLE_STR ) )
return BENCH_FORMAT_SIMPLE ;
return BENCH_FORMAT_UNKNOWN ;
}
2013-10-23 16:37:56 +04:00
/*
* Run a specific benchmark but first rename the running task ' s - > comm [ ]
* to something meaningful :
*/
static int run_bench ( const char * coll_name , const char * bench_name , bench_fn_t fn ,
int argc , const char * * argv , const char * prefix )
2009-12-13 11:01:59 +03:00
{
2013-10-23 16:37:56 +04:00
int size ;
char * name ;
int ret ;
size = strlen ( coll_name ) + 1 + strlen ( bench_name ) + 1 ;
name = zalloc ( size ) ;
BUG_ON ( ! name ) ;
scnprintf ( name , size , " %s-%s " , coll_name , bench_name ) ;
prctl ( PR_SET_NAME , name ) ;
argv [ 0 ] = name ;
ret = fn ( argc , argv , prefix ) ;
free ( name ) ;
return ret ;
}
static void run_collection ( struct collection * coll )
{
struct bench * bench ;
2009-12-13 11:01:59 +03:00
const char * argv [ 2 ] ;
argv [ 1 ] = NULL ;
/*
* TODO :
2013-10-23 16:37:56 +04:00
*
* Preparing preset parameters for
2009-12-13 11:01:59 +03:00
* embedded , ordinary PC , HPC , etc . . .
2013-10-23 16:37:56 +04:00
* would be helpful .
2009-12-13 11:01:59 +03:00
*/
2013-10-23 16:37:56 +04:00
for_each_bench ( coll , bench ) {
if ( ! bench - > fn )
break ;
printf ( " # Running %s/%s benchmark... \n " , coll - > name , bench - > name ) ;
2013-01-08 13:39:26 +04:00
fflush ( stdout ) ;
2009-12-13 11:01:59 +03:00
2013-10-23 16:37:56 +04:00
argv [ 1 ] = bench - > name ;
run_bench ( coll - > name , bench - > name , bench - > fn , 1 , argv , NULL ) ;
2009-12-13 11:01:59 +03:00
printf ( " \n " ) ;
}
}
2013-10-23 16:37:56 +04:00
static void run_all_collections ( void )
2009-12-13 11:01:59 +03:00
{
2013-10-23 16:37:56 +04:00
struct collection * coll ;
for_each_collection ( coll )
run_collection ( coll ) ;
2009-12-13 11:01:59 +03:00
}
2012-09-11 02:15:03 +04:00
int cmd_bench ( int argc , const char * * argv , const char * prefix __maybe_unused )
2009-11-05 03:31:34 +03:00
{
2013-10-23 16:37:56 +04:00
struct collection * coll ;
int ret = 0 ;
2009-11-05 03:31:34 +03:00
if ( argc < 2 ) {
2013-10-23 16:37:56 +04:00
/* No collection specified. */
2009-11-10 02:20:00 +03:00
print_usage ( ) ;
goto end ;
}
2009-11-05 03:31:34 +03:00
2009-11-10 02:20:00 +03:00
argc = parse_options ( argc , argv , bench_options , bench_usage ,
PARSE_OPT_STOP_AT_NON_OPTION ) ;
bench_format = bench_str2int ( bench_format_str ) ;
if ( bench_format = = BENCH_FORMAT_UNKNOWN ) {
2013-10-23 16:37:56 +04:00
printf ( " Unknown format descriptor: '%s' \n " , bench_format_str ) ;
2009-11-10 02:20:00 +03:00
goto end ;
}
2009-11-05 03:31:34 +03:00
2014-06-16 22:14:19 +04:00
if ( bench_repeat = = 0 ) {
printf ( " Invalid repeat option: Must specify a positive value \n " ) ;
goto end ;
}
2009-11-10 02:20:00 +03:00
if ( argc < 1 ) {
print_usage ( ) ;
2009-11-05 03:31:34 +03:00
goto end ;
}
2009-12-13 11:01:59 +03:00
if ( ! strcmp ( argv [ 0 ] , " all " ) ) {
2013-10-23 16:37:56 +04:00
run_all_collections ( ) ;
2009-12-13 11:01:59 +03:00
goto end ;
}
2013-10-23 16:37:56 +04:00
for_each_collection ( coll ) {
struct bench * bench ;
if ( strcmp ( coll - > name , argv [ 0 ] ) )
2009-11-05 03:31:34 +03:00
continue ;
2009-11-10 02:20:00 +03:00
if ( argc < 2 ) {
2013-10-23 16:37:56 +04:00
/* No bench specified. */
dump_benchmarks ( coll ) ;
2009-11-05 03:31:34 +03:00
goto end ;
}
2009-12-13 11:01:59 +03:00
if ( ! strcmp ( argv [ 1 ] , " all " ) ) {
2013-10-23 16:37:56 +04:00
run_collection ( coll ) ;
2009-12-13 11:01:59 +03:00
goto end ;
}
2013-10-23 16:37:56 +04:00
for_each_bench ( coll , bench ) {
if ( strcmp ( bench - > name , argv [ 1 ] ) )
2009-11-05 03:31:34 +03:00
continue ;
2009-11-10 18:04:00 +03:00
if ( bench_format = = BENCH_FORMAT_DEFAULT )
2013-10-23 16:37:56 +04:00
printf ( " # Running '%s/%s' benchmark: \n " , coll - > name , bench - > name ) ;
2013-01-08 13:39:26 +04:00
fflush ( stdout ) ;
2013-10-23 16:37:56 +04:00
ret = run_bench ( coll - > name , bench - > name , bench - > fn , argc - 1 , argv + 1 , prefix ) ;
2009-11-05 03:31:34 +03:00
goto end ;
}
2009-11-10 02:20:00 +03:00
if ( ! strcmp ( argv [ 1 ] , " -h " ) | | ! strcmp ( argv [ 1 ] , " --help " ) ) {
2013-10-23 16:37:56 +04:00
dump_benchmarks ( coll ) ;
2009-11-05 03:31:34 +03:00
goto end ;
}
2013-10-23 16:37:56 +04:00
printf ( " Unknown benchmark: '%s' for collection '%s' \n " , argv [ 1 ] , argv [ 0 ] ) ;
ret = 1 ;
2009-11-05 03:31:34 +03:00
goto end ;
}
2013-10-23 16:37:56 +04:00
printf ( " Unknown collection: '%s' \n " , argv [ 0 ] ) ;
ret = 1 ;
2009-11-05 03:31:34 +03:00
end :
2013-10-23 16:37:56 +04:00
return ret ;
2009-11-05 03:31:34 +03:00
}