| From 914123fa39042e651d79eaf86bbf63a1b938dddf Mon Sep 17 00:00:00 2001 |
| From: "Lendacky, Thomas" <Thomas.Lendacky@amd.com> |
| Date: Tue, 2 Apr 2019 15:21:14 +0000 |
| Subject: x86/perf/amd: Resolve race condition when disabling PMC |
| |
| From: Lendacky, Thomas <Thomas.Lendacky@amd.com> |
| |
| commit 914123fa39042e651d79eaf86bbf63a1b938dddf upstream. |
| |
| On AMD processors, the detection of an overflowed counter in the NMI |
| handler relies on the current value of the counter. So, for example, to |
| check for overflow on a 48 bit counter, bit 47 is checked to see if it |
| is 1 (not overflowed) or 0 (overflowed). |
| |
| There is currently a race condition present when disabling and then |
| updating the PMC. Increased NMI latency in newer AMD processors makes this |
| race condition more pronounced. If the counter value has overflowed, it is |
| possible to update the PMC value before the NMI handler can run. The |
| updated PMC value is not an overflowed value, so when the perf NMI handler |
| does run, it will not find an overflowed counter. This may appear as an |
| unknown NMI resulting in either a panic or a series of messages, depending |
| on how the kernel is configured. |
| |
| To eliminate this race condition, the PMC value must be checked after |
| disabling the counter. Add an AMD function, amd_pmu_disable_all(), that |
| will wait for the NMI handler to reset any active and overflowed counter |
| after calling x86_pmu_disable_all(). |
| |
| Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com> |
| Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> |
| Cc: <stable@vger.kernel.org> # 4.14.x- |
| Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> |
| Cc: Arnaldo Carvalho de Melo <acme@kernel.org> |
| Cc: Arnaldo Carvalho de Melo <acme@redhat.com> |
| Cc: Borislav Petkov <bp@alien8.de> |
| Cc: Jiri Olsa <jolsa@redhat.com> |
| Cc: Linus Torvalds <torvalds@linux-foundation.org> |
| Cc: Namhyung Kim <namhyung@kernel.org> |
| Cc: Peter Zijlstra <peterz@infradead.org> |
| Cc: Stephane Eranian <eranian@google.com> |
| Cc: Thomas Gleixner <tglx@linutronix.de> |
| Cc: Vince Weaver <vincent.weaver@maine.edu> |
| Link: https://lkml.kernel.org/r/Message-ID: |
| Signed-off-by: Ingo Molnar <mingo@kernel.org> |
| Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> |
| |
| --- |
| arch/x86/events/amd/core.c | 65 ++++++++++++++++++++++++++++++++++++++++++--- |
| 1 file changed, 62 insertions(+), 3 deletions(-) |
| |
| --- a/arch/x86/events/amd/core.c |
| +++ b/arch/x86/events/amd/core.c |
| @@ -3,6 +3,7 @@ |
| #include <linux/types.h> |
| #include <linux/init.h> |
| #include <linux/slab.h> |
| +#include <linux/delay.h> |
| #include <asm/apicdef.h> |
| |
| #include "../perf_event.h" |
| @@ -429,6 +430,64 @@ static void amd_pmu_cpu_dead(int cpu) |
| } |
| } |
| |
| +/* |
| + * When a PMC counter overflows, an NMI is used to process the event and |
| + * reset the counter. NMI latency can result in the counter being updated |
| + * before the NMI can run, which can result in what appear to be spurious |
| + * NMIs. This function is intended to wait for the NMI to run and reset |
| + * the counter to avoid possible unhandled NMI messages. |
| + */ |
| +#define OVERFLOW_WAIT_COUNT 50 |
| + |
| +static void amd_pmu_wait_on_overflow(int idx) |
| +{ |
| + unsigned int i; |
| + u64 counter; |
| + |
| + /* |
| + * Wait for the counter to be reset if it has overflowed. This loop |
| + * should exit very, very quickly, but just in case, don't wait |
| + * forever... |
| + */ |
| + for (i = 0; i < OVERFLOW_WAIT_COUNT; i++) { |
| + rdmsrl(x86_pmu_event_addr(idx), counter); |
| + if (counter & (1ULL << (x86_pmu.cntval_bits - 1))) |
| + break; |
| + |
| + /* Might be in IRQ context, so can't sleep */ |
| + udelay(1); |
| + } |
| +} |
| + |
| +static void amd_pmu_disable_all(void) |
| +{ |
| + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); |
| + int idx; |
| + |
| + x86_pmu_disable_all(); |
| + |
| + /* |
| + * This shouldn't be called from NMI context, but add a safeguard here |
| + * to return, since if we're in NMI context we can't wait for an NMI |
| + * to reset an overflowed counter value. |
| + */ |
| + if (in_nmi()) |
| + return; |
| + |
| + /* |
| + * Check each counter for overflow and wait for it to be reset by the |
| + * NMI if it has overflowed. This relies on the fact that all active |
| + * counters are always enabled when this function is caled and |
| + * ARCH_PERFMON_EVENTSEL_INT is always set. |
| + */ |
| + for (idx = 0; idx < x86_pmu.num_counters; idx++) { |
| + if (!test_bit(idx, cpuc->active_mask)) |
| + continue; |
| + |
| + amd_pmu_wait_on_overflow(idx); |
| + } |
| +} |
| + |
| static struct event_constraint * |
| amd_get_event_constraints(struct cpu_hw_events *cpuc, int idx, |
| struct perf_event *event) |
| @@ -622,7 +681,7 @@ static ssize_t amd_event_sysfs_show(char |
| static __initconst const struct x86_pmu amd_pmu = { |
| .name = "AMD", |
| .handle_irq = x86_pmu_handle_irq, |
| - .disable_all = x86_pmu_disable_all, |
| + .disable_all = amd_pmu_disable_all, |
| .enable_all = x86_pmu_enable_all, |
| .enable = x86_pmu_enable_event, |
| .disable = x86_pmu_disable_event, |
| @@ -732,7 +791,7 @@ void amd_pmu_enable_virt(void) |
| cpuc->perf_ctr_virt_mask = 0; |
| |
| /* Reload all events */ |
| - x86_pmu_disable_all(); |
| + amd_pmu_disable_all(); |
| x86_pmu_enable_all(0); |
| } |
| EXPORT_SYMBOL_GPL(amd_pmu_enable_virt); |
| @@ -750,7 +809,7 @@ void amd_pmu_disable_virt(void) |
| cpuc->perf_ctr_virt_mask = AMD64_EVENTSEL_HOSTONLY; |
| |
| /* Reload all events */ |
| - x86_pmu_disable_all(); |
| + amd_pmu_disable_all(); |
| x86_pmu_enable_all(0); |
| } |
| EXPORT_SYMBOL_GPL(amd_pmu_disable_virt); |