| From 2fbc005722e5d1985ef69a071a4a889ff1cb6120 Mon Sep 17 00:00:00 2001 |
| From: Sasha Levin <sashal@kernel.org> |
| Date: Thu, 14 Aug 2025 17:11:48 -0700 |
| Subject: KVM: nVMX: Defer SVI update to vmcs01 on EOI when L2 is active w/o |
| VID |
| MIME-Version: 1.0 |
| Content-Type: text/plain; charset=UTF-8 |
| Content-Transfer-Encoding: 8bit |
| |
| From: Chao Gao <chao.gao@intel.com> |
| |
| [ Upstream commit 04bc93cf49d16d01753b95ddb5d4f230b809a991 ] |
| |
| If KVM emulates an EOI for L1's virtual APIC while L2 is active, defer |
| updating GUEST_INTERUPT_STATUS.SVI, i.e. the VMCS's cache of the highest |
| in-service IRQ, until L1 is active, as vmcs01, not vmcs02, needs to track |
| vISR. The missed SVI update for vmcs01 can result in L1 interrupts being |
| incorrectly blocked, e.g. if there is a pending interrupt with lower |
| priority than the interrupt that was EOI'd. |
| |
| This bug only affects use cases where L1's vAPIC is effectively passed |
| through to L2, e.g. in a pKVM scenario where L2 is L1's depriveleged host, |
| as KVM will only emulate an EOI for L1's vAPIC if Virtual Interrupt |
| Delivery (VID) is disabled in vmc12, and L1 isn't intercepting L2 accesses |
| to its (virtual) APIC page (or if x2APIC is enabled, the EOI MSR). |
| |
| WARN() if KVM updates L1's ISR while L2 is active with VID enabled, as an |
| EOI from L2 is supposed to affect L2's vAPIC, but still defer the update, |
| to try to keep L1 alive. Specifically, KVM forwards all APICv-related |
| VM-Exits to L1 via nested_vmx_l1_wants_exit(): |
| |
| case EXIT_REASON_APIC_ACCESS: |
| case EXIT_REASON_APIC_WRITE: |
| case EXIT_REASON_EOI_INDUCED: |
| /* |
| * The controls for "virtualize APIC accesses," "APIC- |
| * register virtualization," and "virtual-interrupt |
| * delivery" only come from vmcs12. |
| */ |
| return true; |
| |
| Fixes: c7c9c56ca26f ("x86, apicv: add virtual interrupt delivery support") |
| Cc: stable@vger.kernel.org |
| Link: https://lore.kernel.org/kvm/20230312180048.1778187-1-jason.cj.chen@intel.com |
| Reported-by: Markku Ahvenjärvi <mankku@gmail.com> |
| Closes: https://lore.kernel.org/all/20240920080012.74405-1-mankku@gmail.com |
| Cc: Janne Karhunen <janne.karhunen@gmail.com> |
| Signed-off-by: Chao Gao <chao.gao@intel.com> |
| [sean: drop request, handle in VMX, write changelog] |
| Tested-by: Chao Gao <chao.gao@intel.com> |
| Link: https://lore.kernel.org/r/20241128000010.4051275-3-seanjc@google.com |
| Signed-off-by: Sean Christopherson <seanjc@google.com> |
| [sean: resolve minor syntactic conflict in lapic.h, account for lack of |
| kvm_x86_call(), drop sanity check due to lack of wants_to_run] |
| Signed-off-by: Sean Christopherson <seanjc@google.com> |
| Signed-off-by: Sasha Levin <sashal@kernel.org> |
| --- |
| arch/x86/kvm/lapic.c | 11 +++++++++++ |
| arch/x86/kvm/lapic.h | 1 + |
| arch/x86/kvm/vmx/nested.c | 5 +++++ |
| arch/x86/kvm/vmx/vmx.c | 16 ++++++++++++++++ |
| arch/x86/kvm/vmx/vmx.h | 1 + |
| 5 files changed, 34 insertions(+) |
| |
| diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c |
| index 3d65d6a023c9..9aae76b74417 100644 |
| --- a/arch/x86/kvm/lapic.c |
| +++ b/arch/x86/kvm/lapic.c |
| @@ -640,6 +640,17 @@ static inline void apic_clear_isr(int vec, struct kvm_lapic *apic) |
| } |
| } |
| |
| +void kvm_apic_update_hwapic_isr(struct kvm_vcpu *vcpu) |
| +{ |
| + struct kvm_lapic *apic = vcpu->arch.apic; |
| + |
| + if (WARN_ON_ONCE(!lapic_in_kernel(vcpu)) || !apic->apicv_active) |
| + return; |
| + |
| + static_call(kvm_x86_hwapic_isr_update)(vcpu, apic_find_highest_isr(apic)); |
| +} |
| +EXPORT_SYMBOL_GPL(kvm_apic_update_hwapic_isr); |
| + |
| int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu) |
| { |
| /* This may race with setting of irr in __apic_accept_irq() and |
| diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h |
| index a5ac4a5a5179..e5d2dc58fcf8 100644 |
| --- a/arch/x86/kvm/lapic.h |
| +++ b/arch/x86/kvm/lapic.h |
| @@ -122,6 +122,7 @@ int kvm_set_apic_base(struct kvm_vcpu *vcpu, struct msr_data *msr_info); |
| int kvm_apic_get_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s); |
| int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s); |
| enum lapic_mode kvm_get_apic_mode(struct kvm_vcpu *vcpu); |
| +void kvm_apic_update_hwapic_isr(struct kvm_vcpu *vcpu); |
| int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu); |
| |
| u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu); |
| diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c |
| index 8052f8b7d8e1..d55f7edc0860 100644 |
| --- a/arch/x86/kvm/vmx/nested.c |
| +++ b/arch/x86/kvm/vmx/nested.c |
| @@ -4839,6 +4839,11 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason, |
| kvm_make_request(KVM_REQ_APICV_UPDATE, vcpu); |
| } |
| |
| + if (vmx->nested.update_vmcs01_hwapic_isr) { |
| + vmx->nested.update_vmcs01_hwapic_isr = false; |
| + kvm_apic_update_hwapic_isr(vcpu); |
| + } |
| + |
| if ((vm_exit_reason != -1) && |
| (enable_shadow_vmcs || evmptr_is_valid(vmx->nested.hv_evmcs_vmptr))) |
| vmx->nested.need_vmcs12_to_shadow_sync = true; |
| diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c |
| index 721ba6ddb121..7b87fbc69b21 100644 |
| --- a/arch/x86/kvm/vmx/vmx.c |
| +++ b/arch/x86/kvm/vmx/vmx.c |
| @@ -6713,6 +6713,22 @@ static void vmx_hwapic_isr_update(struct kvm_vcpu *vcpu, int max_isr) |
| u16 status; |
| u8 old; |
| |
| + /* |
| + * If L2 is active, defer the SVI update until vmcs01 is loaded, as SVI |
| + * is only relevant for if and only if Virtual Interrupt Delivery is |
| + * enabled in vmcs12, and if VID is enabled then L2 EOIs affect L2's |
| + * vAPIC, not L1's vAPIC. KVM must update vmcs01 on the next nested |
| + * VM-Exit, otherwise L1 with run with a stale SVI. |
| + */ |
| + if (is_guest_mode(vcpu)) { |
| + /* |
| + * KVM is supposed to forward intercepted L2 EOIs to L1 if VID |
| + * is enabled in vmcs12; as above, the EOIs affect L2's vAPIC. |
| + */ |
| + to_vmx(vcpu)->nested.update_vmcs01_hwapic_isr = true; |
| + return; |
| + } |
| + |
| if (max_isr == -1) |
| max_isr = 0; |
| |
| diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h |
| index 9e0bb98b116d..8b4b149bd9c1 100644 |
| --- a/arch/x86/kvm/vmx/vmx.h |
| +++ b/arch/x86/kvm/vmx/vmx.h |
| @@ -189,6 +189,7 @@ struct nested_vmx { |
| bool reload_vmcs01_apic_access_page; |
| bool update_vmcs01_cpu_dirty_logging; |
| bool update_vmcs01_apicv_status; |
| + bool update_vmcs01_hwapic_isr; |
| |
| /* |
| * Enlightened VMCS has been enabled. It does not mean that L1 has to |
| -- |
| 2.50.1 |
| |