| From 7a3ebf358c60cdf6f7ef1c175053ec17e59945c3 Mon Sep 17 00:00:00 2001 |
| From: Sasha Levin <sashal@kernel.org> |
| Date: Thu, 14 Aug 2025 17:11:45 -0700 |
| Subject: KVM: SVM: Set RFLAGS.IF=1 in C code, to get VMRUN out of the STI |
| shadow |
| |
| From: Sean Christopherson <seanjc@google.com> |
| |
| [ Upstream commit be45bc4eff33d9a7dae84a2150f242a91a617402 ] |
| |
| Enable/disable local IRQs, i.e. set/clear RFLAGS.IF, in the common |
| svm_vcpu_enter_exit() just after/before guest_state_{enter,exit}_irqoff() |
| so that VMRUN is not executed in an STI shadow. AMD CPUs have a quirk |
| (some would say "bug"), where the STI shadow bleeds into the guest's |
| intr_state field if a #VMEXIT occurs during injection of an event, i.e. if |
| the VMRUN doesn't complete before the subsequent #VMEXIT. |
| |
| The spurious "interrupts masked" state is relatively benign, as it only |
| occurs during event injection and is transient. Because KVM is already |
| injecting an event, the guest can't be in HLT, and if KVM is querying IRQ |
| blocking for injection, then KVM would need to force an immediate exit |
| anyways since injecting multiple events is impossible. |
| |
| However, because KVM copies int_state verbatim from vmcb02 to vmcb12, the |
| spurious STI shadow is visible to L1 when running a nested VM, which can |
| trip sanity checks, e.g. in VMware's VMM. |
| |
| Hoist the STI+CLI all the way to C code, as the aforementioned calls to |
| guest_state_{enter,exit}_irqoff() already inform lockdep that IRQs are |
| enabled/disabled, and taking a fault on VMRUN with RFLAGS.IF=1 is already |
| possible. I.e. if there's kernel code that is confused by running with |
| RFLAGS.IF=1, then it's already a problem. In practice, since GIF=0 also |
| blocks NMIs, the only change in exposure to non-KVM code (relative to |
| surrounding VMRUN with STI+CLI) is exception handling code, and except for |
| the kvm_rebooting=1 case, all exception in the core VM-Enter/VM-Exit path |
| are fatal. |
| |
| Use the "raw" variants to enable/disable IRQs to avoid tracing in the |
| "no instrumentation" code; the guest state helpers also take care of |
| tracing IRQ state. |
| |
| Oppurtunstically document why KVM needs to do STI in the first place. |
| |
| Reported-by: Doug Covelli <doug.covelli@broadcom.com> |
| Closes: https://lore.kernel.org/all/CADH9ctBs1YPmE4aCfGPNBwA10cA8RuAk2gO7542DjMZgs4uzJQ@mail.gmail.com |
| Fixes: f14eec0a3203 ("KVM: SVM: move more vmentry code to assembly") |
| Cc: stable@vger.kernel.org |
| Reviewed-by: Jim Mattson <jmattson@google.com> |
| Link: https://lore.kernel.org/r/20250224165442.2338294-2-seanjc@google.com |
| Signed-off-by: Sean Christopherson <seanjc@google.com> |
| [sean: resolve minor syntatic conflict in __svm_sev_es_vcpu_run()] |
| Signed-off-by: Sean Christopherson <seanjc@google.com> |
| Signed-off-by: Sasha Levin <sashal@kernel.org> |
| --- |
| arch/x86/kvm/svm/svm.c | 14 ++++++++++++++ |
| arch/x86/kvm/svm/vmenter.S | 9 +-------- |
| 2 files changed, 15 insertions(+), 8 deletions(-) |
| |
| diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c |
| index b6bbd0dc4e65..c95a84afc35f 100644 |
| --- a/arch/x86/kvm/svm/svm.c |
| +++ b/arch/x86/kvm/svm/svm.c |
| @@ -3982,6 +3982,18 @@ static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu, bool spec_ctrl_in |
| |
| guest_state_enter_irqoff(); |
| |
| + /* |
| + * Set RFLAGS.IF prior to VMRUN, as the host's RFLAGS.IF at the time of |
| + * VMRUN controls whether or not physical IRQs are masked (KVM always |
| + * runs with V_INTR_MASKING_MASK). Toggle RFLAGS.IF here to avoid the |
| + * temptation to do STI+VMRUN+CLI, as AMD CPUs bleed the STI shadow |
| + * into guest state if delivery of an event during VMRUN triggers a |
| + * #VMEXIT, and the guest_state transitions already tell lockdep that |
| + * IRQs are being enabled/disabled. Note! GIF=0 for the entirety of |
| + * this path, so IRQs aren't actually unmasked while running host code. |
| + */ |
| + raw_local_irq_enable(); |
| + |
| amd_clear_divider(); |
| |
| if (sev_es_guest(vcpu->kvm)) |
| @@ -3989,6 +4001,8 @@ static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu, bool spec_ctrl_in |
| else |
| __svm_vcpu_run(svm, spec_ctrl_intercepted); |
| |
| + raw_local_irq_disable(); |
| + |
| guest_state_exit_irqoff(); |
| } |
| |
| diff --git a/arch/x86/kvm/svm/vmenter.S b/arch/x86/kvm/svm/vmenter.S |
| index 42824f9b06a2..48b72625cc45 100644 |
| --- a/arch/x86/kvm/svm/vmenter.S |
| +++ b/arch/x86/kvm/svm/vmenter.S |
| @@ -170,12 +170,8 @@ SYM_FUNC_START(__svm_vcpu_run) |
| VM_CLEAR_CPU_BUFFERS |
| |
| /* Enter guest mode */ |
| - sti |
| - |
| 3: vmrun %_ASM_AX |
| 4: |
| - cli |
| - |
| /* Pop @svm to RAX while it's the only available register. */ |
| pop %_ASM_AX |
| |
| @@ -343,11 +339,8 @@ SYM_FUNC_START(__svm_sev_es_vcpu_run) |
| VM_CLEAR_CPU_BUFFERS |
| |
| /* Enter guest mode */ |
| - sti |
| - |
| 1: vmrun %_ASM_AX |
| - |
| -2: cli |
| +2: |
| |
| /* Pop @svm to RDI, guest registers have been saved already. */ |
| pop %_ASM_DI |
| -- |
| 2.50.1 |
| |