| From 1beaef29c34154ccdcb3f1ae557f6883eda18840 Mon Sep 17 00:00:00 2001 |
| From: Vincent Whitchurch <vincent.whitchurch@axis.com> |
| Date: Mon, 10 Aug 2020 15:34:04 +0200 |
| Subject: [PATCH] perf bench mem: Always memset source before memcpy |
| |
| commit 1beaef29c34154ccdcb3f1ae557f6883eda18840 upstream. |
| |
| For memcpy, the source pages are memset to zero only when --cycles is |
| used. This leads to wildly different results with or without --cycles, |
| since all sources pages are likely to be mapped to the same zero page |
| without explicit writes. |
| |
| Before this fix: |
| |
| $ export cmd="./perf stat -e LLC-loads -- ./perf bench \ |
| mem memcpy -s 1024MB -l 100 -f default" |
| $ $cmd |
| |
| 2,935,826 LLC-loads |
| 3.821677452 seconds time elapsed |
| |
| $ $cmd --cycles |
| |
| 217,533,436 LLC-loads |
| 8.616725985 seconds time elapsed |
| |
| After this fix: |
| |
| $ $cmd |
| |
| 214,459,686 LLC-loads |
| 8.674301124 seconds time elapsed |
| |
| $ $cmd --cycles |
| |
| 214,758,651 LLC-loads |
| 8.644480006 seconds time elapsed |
| |
| Fixes: 47b5757bac03c338 ("perf bench mem: Move boilerplate memory allocation to the infrastructure") |
| Signed-off-by: Vincent Whitchurch <vincent.whitchurch@axis.com> |
| Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> |
| Cc: Jiri Olsa <jolsa@redhat.com> |
| Cc: Mark Rutland <mark.rutland@arm.com> |
| Cc: Namhyung Kim <namhyung@kernel.org> |
| Cc: Peter Zijlstra <peterz@infradead.org> |
| Cc: kernel@axis.com |
| Link: http://lore.kernel.org/lkml/20200810133404.30829-1-vincent.whitchurch@axis.com |
| Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> |
| |
| diff --git a/tools/perf/bench/mem-functions.c b/tools/perf/bench/mem-functions.c |
| index 9235b76501be..19d45c377ac1 100644 |
| --- a/tools/perf/bench/mem-functions.c |
| +++ b/tools/perf/bench/mem-functions.c |
| @@ -223,12 +223,8 @@ static int bench_mem_common(int argc, const char **argv, struct bench_mem_info * |
| return 0; |
| } |
| |
| -static u64 do_memcpy_cycles(const struct function *r, size_t size, void *src, void *dst) |
| +static void memcpy_prefault(memcpy_t fn, size_t size, void *src, void *dst) |
| { |
| - u64 cycle_start = 0ULL, cycle_end = 0ULL; |
| - memcpy_t fn = r->fn.memcpy; |
| - int i; |
| - |
| /* Make sure to always prefault zero pages even if MMAP_THRESH is crossed: */ |
| memset(src, 0, size); |
| |
| @@ -237,6 +233,15 @@ static u64 do_memcpy_cycles(const struct function *r, size_t size, void *src, vo |
| * to not measure page fault overhead: |
| */ |
| fn(dst, src, size); |
| +} |
| + |
| +static u64 do_memcpy_cycles(const struct function *r, size_t size, void *src, void *dst) |
| +{ |
| + u64 cycle_start = 0ULL, cycle_end = 0ULL; |
| + memcpy_t fn = r->fn.memcpy; |
| + int i; |
| + |
| + memcpy_prefault(fn, size, src, dst); |
| |
| cycle_start = get_cycles(); |
| for (i = 0; i < nr_loops; ++i) |
| @@ -252,11 +257,7 @@ static double do_memcpy_gettimeofday(const struct function *r, size_t size, void |
| memcpy_t fn = r->fn.memcpy; |
| int i; |
| |
| - /* |
| - * We prefault the freshly allocated memory range here, |
| - * to not measure page fault overhead: |
| - */ |
| - fn(dst, src, size); |
| + memcpy_prefault(fn, size, src, dst); |
| |
| BUG_ON(gettimeofday(&tv_start, NULL)); |
| for (i = 0; i < nr_loops; ++i) |
| -- |
| 2.27.0 |
| |