| From f79a0c4550ae6c72562e54364193f55b2c011367 Mon Sep 17 00:00:00 2001 |
| From: Sasha Levin <sashal@kernel.org> |
| Date: Wed, 9 Mar 2022 16:42:57 +0800 |
| Subject: KVM: x86/pmu: Fix and isolate TSX-specific performance event logic |
| |
| From: Like Xu <likexu@tencent.com> |
| |
| [ Upstream commit e644896f5106aa3f6d7e8c7adf2e4dc0fce53555 ] |
| |
| HSW_IN_TX* bits are used in generic code which are not supported on |
| AMD. Worse, these bits overlap with AMD EventSelect[11:8] and hence |
| using HSW_IN_TX* bits unconditionally in generic code is resulting in |
| unintentional pmu behavior on AMD. For example, if EventSelect[11:8] |
| is 0x2, pmc_reprogram_counter() wrongly assumes that |
| HSW_IN_TX_CHECKPOINTED is set and thus forces sampling period to be 0. |
| |
| Also per the SDM, both bits 32 and 33 "may only be set if the processor |
| supports HLE or RTM" and for "IN_TXCP (bit 33): this bit may only be set |
| for IA32_PERFEVTSEL2." |
| |
| Opportunistically eliminate code redundancy, because if the HSW_IN_TX* |
| bit is set in pmc->eventsel, it is already set in attr.config. |
| |
| Reported-by: Ravi Bangoria <ravi.bangoria@amd.com> |
| Reported-by: Jim Mattson <jmattson@google.com> |
| Fixes: 103af0a98788 ("perf, kvm: Support the in_tx/in_tx_cp modifiers in KVM arch perfmon emulation v5") |
| Co-developed-by: Ravi Bangoria <ravi.bangoria@amd.com> |
| Signed-off-by: Ravi Bangoria <ravi.bangoria@amd.com> |
| Signed-off-by: Like Xu <likexu@tencent.com> |
| Message-Id: <20220309084257.88931-1-likexu@tencent.com> |
| Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> |
| Signed-off-by: Sasha Levin <sashal@kernel.org> |
| --- |
| arch/x86/kvm/pmu.c | 15 +++++---------- |
| arch/x86/kvm/vmx/pmu_intel.c | 13 ++++++++++--- |
| 2 files changed, 15 insertions(+), 13 deletions(-) |
| |
| diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c |
| index 255ef63a4354..227b06dd1fea 100644 |
| --- a/arch/x86/kvm/pmu.c |
| +++ b/arch/x86/kvm/pmu.c |
| @@ -96,8 +96,7 @@ static void kvm_perf_overflow_intr(struct perf_event *perf_event, |
| |
| static void pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type, |
| u64 config, bool exclude_user, |
| - bool exclude_kernel, bool intr, |
| - bool in_tx, bool in_tx_cp) |
| + bool exclude_kernel, bool intr) |
| { |
| struct perf_event *event; |
| struct perf_event_attr attr = { |
| @@ -113,16 +112,14 @@ static void pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type, |
| |
| attr.sample_period = get_sample_period(pmc, pmc->counter); |
| |
| - if (in_tx) |
| - attr.config |= HSW_IN_TX; |
| - if (in_tx_cp) { |
| + if ((attr.config & HSW_IN_TX_CHECKPOINTED) && |
| + guest_cpuid_is_intel(pmc->vcpu)) { |
| /* |
| * HSW_IN_TX_CHECKPOINTED is not supported with nonzero |
| * period. Just clear the sample period so at least |
| * allocating the counter doesn't fail. |
| */ |
| attr.sample_period = 0; |
| - attr.config |= HSW_IN_TX_CHECKPOINTED; |
| } |
| |
| event = perf_event_create_kernel_counter(&attr, -1, current, |
| @@ -229,9 +226,7 @@ void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel) |
| pmc_reprogram_counter(pmc, type, config, |
| !(eventsel & ARCH_PERFMON_EVENTSEL_USR), |
| !(eventsel & ARCH_PERFMON_EVENTSEL_OS), |
| - eventsel & ARCH_PERFMON_EVENTSEL_INT, |
| - (eventsel & HSW_IN_TX), |
| - (eventsel & HSW_IN_TX_CHECKPOINTED)); |
| + eventsel & ARCH_PERFMON_EVENTSEL_INT); |
| } |
| EXPORT_SYMBOL_GPL(reprogram_gp_counter); |
| |
| @@ -267,7 +262,7 @@ void reprogram_fixed_counter(struct kvm_pmc *pmc, u8 ctrl, int idx) |
| kvm_x86_ops.pmu_ops->find_fixed_event(idx), |
| !(en_field & 0x2), /* exclude user */ |
| !(en_field & 0x1), /* exclude kernel */ |
| - pmi, false, false); |
| + pmi); |
| } |
| EXPORT_SYMBOL_GPL(reprogram_fixed_counter); |
| |
| diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c |
| index 9e380a939c72..ee2452215e93 100644 |
| --- a/arch/x86/kvm/vmx/pmu_intel.c |
| +++ b/arch/x86/kvm/vmx/pmu_intel.c |
| @@ -395,6 +395,7 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) |
| struct kvm_pmc *pmc; |
| u32 msr = msr_info->index; |
| u64 data = msr_info->data; |
| + u64 reserved_bits; |
| |
| switch (msr) { |
| case MSR_CORE_PERF_FIXED_CTR_CTRL: |
| @@ -449,7 +450,11 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) |
| } else if ((pmc = get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0))) { |
| if (data == pmc->eventsel) |
| return 0; |
| - if (!(data & pmu->reserved_bits)) { |
| + reserved_bits = pmu->reserved_bits; |
| + if ((pmc->idx == 2) && |
| + (pmu->raw_event_mask & HSW_IN_TX_CHECKPOINTED)) |
| + reserved_bits ^= HSW_IN_TX_CHECKPOINTED; |
| + if (!(data & reserved_bits)) { |
| reprogram_gp_counter(pmc, data); |
| return 0; |
| } |
| @@ -523,8 +528,10 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu) |
| entry = kvm_find_cpuid_entry(vcpu, 7, 0); |
| if (entry && |
| (boot_cpu_has(X86_FEATURE_HLE) || boot_cpu_has(X86_FEATURE_RTM)) && |
| - (entry->ebx & (X86_FEATURE_HLE|X86_FEATURE_RTM))) |
| - pmu->reserved_bits ^= HSW_IN_TX|HSW_IN_TX_CHECKPOINTED; |
| + (entry->ebx & (X86_FEATURE_HLE|X86_FEATURE_RTM))) { |
| + pmu->reserved_bits ^= HSW_IN_TX; |
| + pmu->raw_event_mask |= (HSW_IN_TX|HSW_IN_TX_CHECKPOINTED); |
| + } |
| |
| bitmap_set(pmu->all_valid_pmc_idx, |
| 0, pmu->nr_arch_gp_counters); |
| -- |
| 2.35.1 |
| |