perf, x86: Update/fix Intel Nehalem cache events
Change the Nehalem cache events to use retired memory instruction counters (similar to Westmere), this greatly improves the provided stats. Using: main () { int i; for (i = 0; i < 1000000000; i++) { asm("mov (%%rsp), %%rbx;" "mov %%rbx, (%%rsp);" : : : "rbx"); } } We find: $ perf stat --repeat 10 -e instructions:u -e l1-dcache-loads:u -e l1-dcache-stores:u ./loop_1b_loads+stores Performance counter stats for './loop_1b_loads+stores' (10 runs): 4,000,081,056 instructions:u # 0.000 IPC ( +- 0.000% ) 4,999,502,846 l1-dcache-loads:u ( +- 0.008% ) 1,000,034,832 l1-dcache-stores:u ( +- 0.000% ) 1.565184942 seconds time elapsed ( +- 0.005% ) The 5b is surprising - we'd expect 1b: $ perf stat --repeat 10 -e instructions:u -e r10b:u -e l1-dcache-stores:u ./loop_1b_loads+stores Performance counter stats for './loop_1b_loads+stores' (10 runs): 4,000,081,054 instructions:u # 0.000 IPC ( +- 0.000% ) 1,000,021,961 r10b:u ( +- 0.000% ) 1,000,030,951 l1-dcache-stores:u ( +- 0.000% ) 1.565055422 seconds time elapsed ( +- 0.003% ) Which this patch thus fixes. Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Frederic Weisbecker <fweisbec@gmail.com> Cc: Paul Mackerras <paulus@samba.org> Cc: Mike Galbraith <efault@gmx.de> Cc: Steven Rostedt <rostedt@goodmis.org> Cc: Stephane Eranian <eranian@google.com> Cc: Lin Ming <ming.m.lin@intel.com> Cc: Cyrill Gorcunov <gorcunov@openvz.org> Link: http://lkml.kernel.org/n/tip-q9rtru7b7840tws75xzboapv@git.kernel.org Signed-off-by: Ingo Molnar <mingo@elte.hu>
This commit is contained in:
parent
1ea5a6afd9
commit
f4929bd372
@ -391,12 +391,12 @@ static __initconst const u64 nehalem_hw_cache_event_ids
|
||||
{
|
||||
[ C(L1D) ] = {
|
||||
[ C(OP_READ) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI */
|
||||
[ C(RESULT_MISS) ] = 0x0140, /* L1D_CACHE_LD.I_STATE */
|
||||
[ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS */
|
||||
[ C(RESULT_MISS) ] = 0x0151, /* L1D.REPL */
|
||||
},
|
||||
[ C(OP_WRITE) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI */
|
||||
[ C(RESULT_MISS) ] = 0x0141, /* L1D_CACHE_ST.I_STATE */
|
||||
[ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES */
|
||||
[ C(RESULT_MISS) ] = 0x0251, /* L1D.M_REPL */
|
||||
},
|
||||
[ C(OP_PREFETCH) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS */
|
||||
|
Loading…
Reference in New Issue
Block a user