| From 87e24f4b67e68d9fd8df16e0bf9c66d1ad2a2533 Mon Sep 17 00:00:00 2001 |
| From: Peter Zijlstra <a.p.zijlstra@chello.nl> |
| Date: Mon, 5 Mar 2012 23:59:25 +0100 |
| Subject: perf/x86: Fix local vs remote memory events for NHM/WSM |
| |
| From: Peter Zijlstra <a.p.zijlstra@chello.nl> |
| |
| commit 87e24f4b67e68d9fd8df16e0bf9c66d1ad2a2533 upstream. |
| |
| Verified using the below proglet.. before: |
| |
| [root@westmere ~]# perf stat -e node-stores -e node-store-misses ./numa 0 |
| remote write |
| |
| Performance counter stats for './numa 0': |
| |
| 2,101,554 node-stores |
| 2,096,931 node-store-misses |
| |
| 5.021546079 seconds time elapsed |
| |
| [root@westmere ~]# perf stat -e node-stores -e node-store-misses ./numa 1 |
| local write |
| |
| Performance counter stats for './numa 1': |
| |
| 501,137 node-stores |
| 199 node-store-misses |
| |
| 5.124451068 seconds time elapsed |
| |
| After: |
| |
| [root@westmere ~]# perf stat -e node-stores -e node-store-misses ./numa 0 |
| remote write |
| |
| Performance counter stats for './numa 0': |
| |
| 2,107,516 node-stores |
| 2,097,187 node-store-misses |
| |
| 5.012755149 seconds time elapsed |
| |
| [root@westmere ~]# perf stat -e node-stores -e node-store-misses ./numa 1 |
| local write |
| |
| Performance counter stats for './numa 1': |
| |
| 2,063,355 node-stores |
| 165 node-store-misses |
| |
| 5.082091494 seconds time elapsed |
| |
| #define _GNU_SOURCE |
| |
| #include <sched.h> |
| #include <stdio.h> |
| #include <errno.h> |
| #include <sys/mman.h> |
| #include <sys/types.h> |
| #include <dirent.h> |
| #include <signal.h> |
| #include <unistd.h> |
| #include <numaif.h> |
| #include <stdlib.h> |
| |
| #define SIZE (32*1024*1024) |
| |
| volatile int done; |
| |
| void sig_done(int sig) |
| { |
| done = 1; |
| } |
| |
| int main(int argc, char **argv) |
| { |
| cpu_set_t *mask, *mask2; |
| size_t size; |
| int i, err, t; |
| int nrcpus = 1024; |
| char *mem; |
| unsigned long nodemask = 0x01; /* node 0 */ |
| DIR *node; |
| struct dirent *de; |
| int read = 0; |
| int local = 0; |
| |
| if (argc < 2) { |
| printf("usage: %s [0-3]\n", argv[0]); |
| printf(" bit0 - local/remote\n"); |
| printf(" bit1 - read/write\n"); |
| exit(0); |
| } |
| |
| switch (atoi(argv[1])) { |
| case 0: |
| printf("remote write\n"); |
| break; |
| case 1: |
| printf("local write\n"); |
| local = 1; |
| break; |
| case 2: |
| printf("remote read\n"); |
| read = 1; |
| break; |
| case 3: |
| printf("local read\n"); |
| local = 1; |
| read = 1; |
| break; |
| } |
| |
| mask = CPU_ALLOC(nrcpus); |
| size = CPU_ALLOC_SIZE(nrcpus); |
| CPU_ZERO_S(size, mask); |
| |
| node = opendir("/sys/devices/system/node/node0/"); |
| if (!node) |
| perror("opendir"); |
| while ((de = readdir(node))) { |
| int cpu; |
| |
| if (sscanf(de->d_name, "cpu%d", &cpu) == 1) |
| CPU_SET_S(cpu, size, mask); |
| } |
| closedir(node); |
| |
| mask2 = CPU_ALLOC(nrcpus); |
| CPU_ZERO_S(size, mask2); |
| for (i = 0; i < size; i++) |
| CPU_SET_S(i, size, mask2); |
| CPU_XOR_S(size, mask2, mask2, mask); // invert |
| |
| if (!local) |
| mask = mask2; |
| |
| err = sched_setaffinity(0, size, mask); |
| if (err) |
| perror("sched_setaffinity"); |
| |
| mem = mmap(0, SIZE, PROT_READ|PROT_WRITE, |
| MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); |
| err = mbind(mem, SIZE, MPOL_BIND, &nodemask, 8*sizeof(nodemask), MPOL_MF_MOVE); |
| if (err) |
| perror("mbind"); |
| |
| signal(SIGALRM, sig_done); |
| alarm(5); |
| |
| if (!read) { |
| while (!done) { |
| for (i = 0; i < SIZE; i++) |
| mem[i] = 0x01; |
| } |
| } else { |
| while (!done) { |
| for (i = 0; i < SIZE; i++) |
| t += *(volatile char *)(mem + i); |
| } |
| } |
| |
| return 0; |
| } |
| |
| Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> |
| Cc: Stephane Eranian <eranian@google.com> |
| Link: http://lkml.kernel.org/n/tip-tq73sxus35xmqpojf7ootxgs@git.kernel.org |
| Signed-off-by: Ingo Molnar <mingo@elte.hu> |
| Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> |
| |
| --- |
| arch/x86/kernel/cpu/perf_event_intel.c | 17 +++++++++-------- |
| 1 file changed, 9 insertions(+), 8 deletions(-) |
| |
| --- a/arch/x86/kernel/cpu/perf_event_intel.c |
| +++ b/arch/x86/kernel/cpu/perf_event_intel.c |
| @@ -389,14 +389,15 @@ static __initconst const u64 westmere_hw |
| #define NHM_LOCAL_DRAM (1 << 14) |
| #define NHM_NON_DRAM (1 << 15) |
| |
| -#define NHM_ALL_DRAM (NHM_REMOTE_DRAM|NHM_LOCAL_DRAM) |
| +#define NHM_LOCAL (NHM_LOCAL_DRAM|NHM_REMOTE_CACHE_FWD) |
| +#define NHM_REMOTE (NHM_REMOTE_DRAM) |
| |
| #define NHM_DMND_READ (NHM_DMND_DATA_RD) |
| #define NHM_DMND_WRITE (NHM_DMND_RFO|NHM_DMND_WB) |
| #define NHM_DMND_PREFETCH (NHM_PF_DATA_RD|NHM_PF_DATA_RFO) |
| |
| #define NHM_L3_HIT (NHM_UNCORE_HIT|NHM_OTHER_CORE_HIT_SNP|NHM_OTHER_CORE_HITM) |
| -#define NHM_L3_MISS (NHM_NON_DRAM|NHM_ALL_DRAM|NHM_REMOTE_CACHE_FWD) |
| +#define NHM_L3_MISS (NHM_NON_DRAM|NHM_LOCAL_DRAM|NHM_REMOTE_DRAM|NHM_REMOTE_CACHE_FWD) |
| #define NHM_L3_ACCESS (NHM_L3_HIT|NHM_L3_MISS) |
| |
| static __initconst const u64 nehalem_hw_cache_extra_regs |
| @@ -420,16 +421,16 @@ static __initconst const u64 nehalem_hw_ |
| }, |
| [ C(NODE) ] = { |
| [ C(OP_READ) ] = { |
| - [ C(RESULT_ACCESS) ] = NHM_DMND_READ|NHM_ALL_DRAM, |
| - [ C(RESULT_MISS) ] = NHM_DMND_READ|NHM_REMOTE_DRAM, |
| + [ C(RESULT_ACCESS) ] = NHM_DMND_READ|NHM_LOCAL|NHM_REMOTE, |
| + [ C(RESULT_MISS) ] = NHM_DMND_READ|NHM_REMOTE, |
| }, |
| [ C(OP_WRITE) ] = { |
| - [ C(RESULT_ACCESS) ] = NHM_DMND_WRITE|NHM_ALL_DRAM, |
| - [ C(RESULT_MISS) ] = NHM_DMND_WRITE|NHM_REMOTE_DRAM, |
| + [ C(RESULT_ACCESS) ] = NHM_DMND_WRITE|NHM_LOCAL|NHM_REMOTE, |
| + [ C(RESULT_MISS) ] = NHM_DMND_WRITE|NHM_REMOTE, |
| }, |
| [ C(OP_PREFETCH) ] = { |
| - [ C(RESULT_ACCESS) ] = NHM_DMND_PREFETCH|NHM_ALL_DRAM, |
| - [ C(RESULT_MISS) ] = NHM_DMND_PREFETCH|NHM_REMOTE_DRAM, |
| + [ C(RESULT_ACCESS) ] = NHM_DMND_PREFETCH|NHM_LOCAL|NHM_REMOTE, |
| + [ C(RESULT_MISS) ] = NHM_DMND_PREFETCH|NHM_REMOTE, |
| }, |
| }, |
| }; |