releases/5.0.8/x86-perf-amd-resolve-race-condition-when-disabling-pmc.patch - pub/scm/linux/kernel/git/stable/stable-queue - Git at Google

 From 914123fa39042e651d79eaf86bbf63a1b938dddf Mon Sep 17 00:00:00 2001
 From: "Lendacky, Thomas" <Thomas.Lendacky@amd.com>
 Date: Tue, 2 Apr 2019 15:21:14 +0000
 Subject: x86/perf/amd: Resolve race condition when disabling PMC

 From: Lendacky, Thomas <Thomas.Lendacky@amd.com>

 commit 914123fa39042e651d79eaf86bbf63a1b938dddf upstream.

 On AMD processors, the detection of an overflowed counter in the NMI
 handler relies on the current value of the counter. So, for example, to
 check for overflow on a 48 bit counter, bit 47 is checked to see if it
 is 1 (not overflowed) or 0 (overflowed).

 There is currently a race condition present when disabling and then
 updating the PMC. Increased NMI latency in newer AMD processors makes this
 race condition more pronounced. If the counter value has overflowed, it is
 possible to update the PMC value before the NMI handler can run. The
 updated PMC value is not an overflowed value, so when the perf NMI handler
 does run, it will not find an overflowed counter. This may appear as an
 unknown NMI resulting in either a panic or a series of messages, depending
 on how the kernel is configured.

 To eliminate this race condition, the PMC value must be checked after
 disabling the counter. Add an AMD function, amd_pmu_disable_all(), that
 will wait for the NMI handler to reset any active and overflowed counter
 after calling x86_pmu_disable_all().

 Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
 Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
 Cc: <stable@vger.kernel.org> # 4.14.x-
 Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
 Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
 Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
 Cc: Borislav Petkov <bp@alien8.de>
 Cc: Jiri Olsa <jolsa@redhat.com>
 Cc: Linus Torvalds <torvalds@linux-foundation.org>
 Cc: Namhyung Kim <namhyung@kernel.org>
 Cc: Peter Zijlstra <peterz@infradead.org>
 Cc: Stephane Eranian <eranian@google.com>
 Cc: Thomas Gleixner <tglx@linutronix.de>
 Cc: Vince Weaver <vincent.weaver@maine.edu>
 Link: https://lkml.kernel.org/r/Message-ID:
 Signed-off-by: Ingo Molnar <mingo@kernel.org>
 Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

 ---
  arch/x86/events/amd/core.c |   65 ++++++++++++++++++++++++++++++++++++++++++---
  1 file changed, 62 insertions(+), 3 deletions(-)

 --- a/arch/x86/events/amd/core.c
 +++ b/arch/x86/events/amd/core.c
 @@ -3,6 +3,7 @@
  #include <linux/types.h>
  #include <linux/init.h>
  #include <linux/slab.h>
 +#include <linux/delay.h>
  #include <asm/apicdef.h>

  #include "../perf_event.h"
 @@ -429,6 +430,64 @@ static void amd_pmu_cpu_dead(int cpu)
  	}
  }

 +/*
 + * When a PMC counter overflows, an NMI is used to process the event and
 + * reset the counter. NMI latency can result in the counter being updated
 + * before the NMI can run, which can result in what appear to be spurious
 + * NMIs. This function is intended to wait for the NMI to run and reset
 + * the counter to avoid possible unhandled NMI messages.
 + */
 +#define OVERFLOW_WAIT_COUNT	50
 +
 +static void amd_pmu_wait_on_overflow(int idx)
 +{
 +	unsigned int i;
 +	u64 counter;
 +
 +	/*
 +	 * Wait for the counter to be reset if it has overflowed. This loop
 +	 * should exit very, very quickly, but just in case, don't wait
 +	 * forever...
 +	 */
 +	for (i = 0; i < OVERFLOW_WAIT_COUNT; i++) {
 +		rdmsrl(x86_pmu_event_addr(idx), counter);
 +		if (counter & (1ULL << (x86_pmu.cntval_bits - 1)))
 +			break;
 +
 +		/* Might be in IRQ context, so can't sleep */
 +		udelay(1);
 +	}
 +}
 +
 +static void amd_pmu_disable_all(void)
 +{
 +	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 +	int idx;
 +
 +	x86_pmu_disable_all();
 +
 +	/*
 +	 * This shouldn't be called from NMI context, but add a safeguard here
 +	 * to return, since if we're in NMI context we can't wait for an NMI
 +	 * to reset an overflowed counter value.
 +	 */
 +	if (in_nmi())
 +		return;
 +
 +	/*
 +	 * Check each counter for overflow and wait for it to be reset by the
 +	 * NMI if it has overflowed. This relies on the fact that all active
 +	 * counters are always enabled when this function is caled and
 +	 * ARCH_PERFMON_EVENTSEL_INT is always set.
 +	 */
 +	for (idx = 0; idx < x86_pmu.num_counters; idx++) {
 +		if (!test_bit(idx, cpuc->active_mask))
 +			continue;
 +
 +		amd_pmu_wait_on_overflow(idx);
 +	}
 +}
 +
  static struct event_constraint *
  amd_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
  			  struct perf_event *event)
 @@ -622,7 +681,7 @@ static ssize_t amd_event_sysfs_show(char
  static __initconst const struct x86_pmu amd_pmu = {
  	.name			= "AMD",
  	.handle_irq		= x86_pmu_handle_irq,
 -	.disable_all		= x86_pmu_disable_all,
 +	.disable_all		= amd_pmu_disable_all,
  	.enable_all		= x86_pmu_enable_all,
  	.enable			= x86_pmu_enable_event,
  	.disable		= x86_pmu_disable_event,
 @@ -732,7 +791,7 @@ void amd_pmu_enable_virt(void)
  	cpuc->perf_ctr_virt_mask = 0;

  	/* Reload all events */
 -	x86_pmu_disable_all();
 +	amd_pmu_disable_all();
  	x86_pmu_enable_all(0);
  }
  EXPORT_SYMBOL_GPL(amd_pmu_enable_virt);
 @@ -750,7 +809,7 @@ void amd_pmu_disable_virt(void)
  	cpuc->perf_ctr_virt_mask = AMD64_EVENTSEL_HOSTONLY;

  	/* Reload all events */
 -	x86_pmu_disable_all();
 +	amd_pmu_disable_all();
  	x86_pmu_enable_all(0);
  }
  EXPORT_SYMBOL_GPL(amd_pmu_disable_virt);
	From 914123fa39042e651d79eaf86bbf63a1b938dddf Mon Sep 17 00:00:00 2001
	From: "Lendacky, Thomas" <Thomas.Lendacky@amd.com>
	Date: Tue, 2 Apr 2019 15:21:14 +0000
	Subject: x86/perf/amd: Resolve race condition when disabling PMC

	From: Lendacky, Thomas <Thomas.Lendacky@amd.com>

	commit 914123fa39042e651d79eaf86bbf63a1b938dddf upstream.

	On AMD processors, the detection of an overflowed counter in the NMI
	handler relies on the current value of the counter. So, for example, to
	check for overflow on a 48 bit counter, bit 47 is checked to see if it
	is 1 (not overflowed) or 0 (overflowed).

	There is currently a race condition present when disabling and then
	updating the PMC. Increased NMI latency in newer AMD processors makes this
	race condition more pronounced. If the counter value has overflowed, it is
	possible to update the PMC value before the NMI handler can run. The
	updated PMC value is not an overflowed value, so when the perf NMI handler
	does run, it will not find an overflowed counter. This may appear as an
	unknown NMI resulting in either a panic or a series of messages, depending
	on how the kernel is configured.

	To eliminate this race condition, the PMC value must be checked after
	disabling the counter. Add an AMD function, amd_pmu_disable_all(), that
	will wait for the NMI handler to reset any active and overflowed counter
	after calling x86_pmu_disable_all().

	Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
	Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
	Cc: <stable@vger.kernel.org> # 4.14.x-
	Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
	Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
	Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
	Cc: Borislav Petkov <bp@alien8.de>
	Cc: Jiri Olsa <jolsa@redhat.com>
	Cc: Linus Torvalds <torvalds@linux-foundation.org>
	Cc: Namhyung Kim <namhyung@kernel.org>
	Cc: Peter Zijlstra <peterz@infradead.org>
	Cc: Stephane Eranian <eranian@google.com>
	Cc: Thomas Gleixner <tglx@linutronix.de>
	Cc: Vince Weaver <vincent.weaver@maine.edu>
	Link: https://lkml.kernel.org/r/Message-ID:
	Signed-off-by: Ingo Molnar <mingo@kernel.org>
	Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

	---
	arch/x86/events/amd/core.c \| 65 ++++++++++++++++++++++++++++++++++++++++++---
	1 file changed, 62 insertions(+), 3 deletions(-)

	--- a/arch/x86/events/amd/core.c
	+++ b/arch/x86/events/amd/core.c
	@@ -3,6 +3,7 @@
	#include <linux/types.h>
	#include <linux/init.h>
	#include <linux/slab.h>
	+#include <linux/delay.h>
	#include <asm/apicdef.h>

	#include "../perf_event.h"
	@@ -429,6 +430,64 @@ static void amd_pmu_cpu_dead(int cpu)
	}
	}

	+/*
	+ * When a PMC counter overflows, an NMI is used to process the event and
	+ * reset the counter. NMI latency can result in the counter being updated
	+ * before the NMI can run, which can result in what appear to be spurious
	+ * NMIs. This function is intended to wait for the NMI to run and reset
	+ * the counter to avoid possible unhandled NMI messages.
	+ */
	+#define OVERFLOW_WAIT_COUNT 50
	+
	+static void amd_pmu_wait_on_overflow(int idx)
	+{
	+ unsigned int i;
	+ u64 counter;
	+
	+ /*
	+ * Wait for the counter to be reset if it has overflowed. This loop
	+ * should exit very, very quickly, but just in case, don't wait
	+ * forever...
	+ */
	+ for (i = 0; i < OVERFLOW_WAIT_COUNT; i++) {
	+ rdmsrl(x86_pmu_event_addr(idx), counter);
	+ if (counter & (1ULL << (x86_pmu.cntval_bits - 1)))
	+ break;
	+
	+ /* Might be in IRQ context, so can't sleep */
	+ udelay(1);
	+ }
	+}
	+
	+static void amd_pmu_disable_all(void)
	+{
	+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
	+ int idx;
	+
	+ x86_pmu_disable_all();
	+
	+ /*
	+ * This shouldn't be called from NMI context, but add a safeguard here
	+ * to return, since if we're in NMI context we can't wait for an NMI
	+ * to reset an overflowed counter value.
	+ */
	+ if (in_nmi())
	+ return;
	+
	+ /*
	+ * Check each counter for overflow and wait for it to be reset by the
	+ * NMI if it has overflowed. This relies on the fact that all active
	+ * counters are always enabled when this function is caled and
	+ * ARCH_PERFMON_EVENTSEL_INT is always set.
	+ */
	+ for (idx = 0; idx < x86_pmu.num_counters; idx++) {
	+ if (!test_bit(idx, cpuc->active_mask))
	+ continue;
	+
	+ amd_pmu_wait_on_overflow(idx);
	+ }
	+}
	+
	static struct event_constraint *
	amd_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
	struct perf_event *event)
	@@ -622,7 +681,7 @@ static ssize_t amd_event_sysfs_show(char
	static __initconst const struct x86_pmu amd_pmu = {
	.name = "AMD",
	.handle_irq = x86_pmu_handle_irq,
	- .disable_all = x86_pmu_disable_all,
	+ .disable_all = amd_pmu_disable_all,
	.enable_all = x86_pmu_enable_all,
	.enable = x86_pmu_enable_event,
	.disable = x86_pmu_disable_event,
	@@ -732,7 +791,7 @@ void amd_pmu_enable_virt(void)
	cpuc->perf_ctr_virt_mask = 0;

	/* Reload all events */
	- x86_pmu_disable_all();
	+ amd_pmu_disable_all();
	x86_pmu_enable_all(0);
	}
	EXPORT_SYMBOL_GPL(amd_pmu_enable_virt);
	@@ -750,7 +809,7 @@ void amd_pmu_disable_virt(void)
	cpuc->perf_ctr_virt_mask = AMD64_EVENTSEL_HOSTONLY;

	/* Reload all events */
	- x86_pmu_disable_all();
	+ amd_pmu_disable_all();
	x86_pmu_enable_all(0);
	}
	EXPORT_SYMBOL_GPL(amd_pmu_disable_virt);