perf bench mem: Always memset source before memcpy
[ Upstream commit 1beaef29c34154ccdcb3f1ae557f6883eda18840 ] For memcpy, the source pages are memset to zero only when --cycles is used. This leads to wildly different results with or without --cycles, since all sources pages are likely to be mapped to the same zero page without explicit writes. Before this fix: $ export cmd="./perf stat -e LLC-loads -- ./perf bench \ mem memcpy -s 1024MB -l 100 -f default" $ $cmd 2,935,826 LLC-loads 3.821677452 seconds time elapsed $ $cmd --cycles 217,533,436 LLC-loads 8.616725985 seconds time elapsed After this fix: $ $cmd 214,459,686 LLC-loads 8.674301124 seconds time elapsed $ $cmd --cycles 214,758,651 LLC-loads 8.644480006 seconds time elapsed Fixes: 47b5757bac03c338 ("perf bench mem: Move boilerplate memory allocation to the infrastructure") Signed-off-by: Vincent Whitchurch <vincent.whitchurch@axis.com> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Jiri Olsa <jolsa@redhat.com> Cc: Mark Rutland <mark.rutland@arm.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: kernel@axis.com Link: http://lore.kernel.org/lkml/20200810133404.30829-1-vincent.whitchurch@axis.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> Signed-off-by: Sasha Levin <sashal@kernel.org>
This commit is contained in:
parent
306df54b5b
commit
45989801d7
@ -223,12 +223,8 @@ static int bench_mem_common(int argc, const char **argv, struct bench_mem_info *
|
||||
return 0;
|
||||
}
|
||||
|
||||
static u64 do_memcpy_cycles(const struct function *r, size_t size, void *src, void *dst)
|
||||
static void memcpy_prefault(memcpy_t fn, size_t size, void *src, void *dst)
|
||||
{
|
||||
u64 cycle_start = 0ULL, cycle_end = 0ULL;
|
||||
memcpy_t fn = r->fn.memcpy;
|
||||
int i;
|
||||
|
||||
/* Make sure to always prefault zero pages even if MMAP_THRESH is crossed: */
|
||||
memset(src, 0, size);
|
||||
|
||||
@ -237,6 +233,15 @@ static u64 do_memcpy_cycles(const struct function *r, size_t size, void *src, vo
|
||||
* to not measure page fault overhead:
|
||||
*/
|
||||
fn(dst, src, size);
|
||||
}
|
||||
|
||||
static u64 do_memcpy_cycles(const struct function *r, size_t size, void *src, void *dst)
|
||||
{
|
||||
u64 cycle_start = 0ULL, cycle_end = 0ULL;
|
||||
memcpy_t fn = r->fn.memcpy;
|
||||
int i;
|
||||
|
||||
memcpy_prefault(fn, size, src, dst);
|
||||
|
||||
cycle_start = get_cycles();
|
||||
for (i = 0; i < nr_loops; ++i)
|
||||
@ -252,11 +257,7 @@ static double do_memcpy_gettimeofday(const struct function *r, size_t size, void
|
||||
memcpy_t fn = r->fn.memcpy;
|
||||
int i;
|
||||
|
||||
/*
|
||||
* We prefault the freshly allocated memory range here,
|
||||
* to not measure page fault overhead:
|
||||
*/
|
||||
fn(dst, src, size);
|
||||
memcpy_prefault(fn, size, src, dst);
|
||||
|
||||
BUG_ON(gettimeofday(&tv_start, NULL));
|
||||
for (i = 0; i < nr_loops; ++i)
|
||||
|
Loading…
x
Reference in New Issue
Block a user