| From 9e1075bdd03cf356ae89ba3b703080b5c4fa2278 Mon Sep 17 00:00:00 2001 |
| From: Sasha Levin <sashal@kernel.org> |
| Date: Thu, 14 Aug 2025 17:12:03 -0700 |
| Subject: KVM: nVMX: Check vmcs12->guest_ia32_debugctl on nested VM-Enter |
| |
| From: Maxim Levitsky <mlevitsk@redhat.com> |
| |
| [ Upstream commit 095686e6fcb4150f0a55b1a25987fad3d8af58d6 ] |
| |
| Add a consistency check for L2's guest_ia32_debugctl, as KVM only supports |
| a subset of hardware functionality, i.e. KVM can't rely on hardware to |
| detect illegal/unsupported values. Failure to check the vmcs12 value |
| would allow the guest to load any harware-supported value while running L2. |
| |
| Take care to exempt BTF and LBR from the validity check in order to match |
| KVM's behavior for writes via WRMSR, but without clobbering vmcs12. Even |
| if VM_EXIT_SAVE_DEBUG_CONTROLS is set in vmcs12, L1 can reasonably expect |
| that vmcs12->guest_ia32_debugctl will not be modified if writes to the MSR |
| are being intercepted. |
| |
| Arguably, KVM _should_ update vmcs12 if VM_EXIT_SAVE_DEBUG_CONTROLS is set |
| *and* writes to MSR_IA32_DEBUGCTLMSR are not being intercepted by L1, but |
| that would incur non-trivial complexity and wouldn't change the fact that |
| KVM's handling of DEBUGCTL is blatantly broken. I.e. the extra complexity |
| is not worth carrying. |
| |
| Cc: stable@vger.kernel.org |
| Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com> |
| Co-developed-by: Sean Christopherson <seanjc@google.com> |
| Link: https://lore.kernel.org/r/20250610232010.162191-7-seanjc@google.com |
| Stable-dep-of: 7d0cce6cbe71 ("KVM: VMX: Wrap all accesses to IA32_DEBUGCTL with getter/setter APIs") |
| Signed-off-by: Sasha Levin <sashal@kernel.org> |
| Signed-off-by: Sean Christopherson <seanjc@google.com> |
| Signed-off-by: Sasha Levin <sashal@kernel.org> |
| --- |
| arch/x86/kvm/vmx/nested.c | 12 ++++++++++-- |
| arch/x86/kvm/vmx/vmx.c | 5 ++--- |
| arch/x86/kvm/vmx/vmx.h | 3 +++ |
| 3 files changed, 15 insertions(+), 5 deletions(-) |
| |
| diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c |
| index d55f7edc0860..da129e12cff9 100644 |
| --- a/arch/x86/kvm/vmx/nested.c |
| +++ b/arch/x86/kvm/vmx/nested.c |
| @@ -2532,7 +2532,8 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, |
| if (vmx->nested.nested_run_pending && |
| (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) { |
| kvm_set_dr(vcpu, 7, vmcs12->guest_dr7); |
| - vmcs_write64(GUEST_IA32_DEBUGCTL, vmcs12->guest_ia32_debugctl); |
| + vmcs_write64(GUEST_IA32_DEBUGCTL, vmcs12->guest_ia32_debugctl & |
| + vmx_get_supported_debugctl(vcpu, false)); |
| } else { |
| kvm_set_dr(vcpu, 7, vcpu->arch.dr7); |
| vmcs_write64(GUEST_IA32_DEBUGCTL, vmx->nested.pre_vmenter_debugctl); |
| @@ -3022,7 +3023,8 @@ static int nested_vmx_check_guest_state(struct kvm_vcpu *vcpu, |
| return -EINVAL; |
| |
| if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS) && |
| - CC(!kvm_dr7_valid(vmcs12->guest_dr7))) |
| + (CC(!kvm_dr7_valid(vmcs12->guest_dr7)) || |
| + CC(!vmx_is_valid_debugctl(vcpu, vmcs12->guest_ia32_debugctl, false)))) |
| return -EINVAL; |
| |
| if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PAT) && |
| @@ -4374,6 +4376,12 @@ static void sync_vmcs02_to_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) |
| (vmcs12->vm_entry_controls & ~VM_ENTRY_IA32E_MODE) | |
| (vm_entry_controls_get(to_vmx(vcpu)) & VM_ENTRY_IA32E_MODE); |
| |
| + /* |
| + * Note! Save DR7, but intentionally don't grab DEBUGCTL from vmcs02. |
| + * Writes to DEBUGCTL that aren't intercepted by L1 are immediately |
| + * propagated to vmcs12 (see vmx_set_msr()), as the value loaded into |
| + * vmcs02 doesn't strictly track vmcs12. |
| + */ |
| if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_DEBUG_CONTROLS) |
| kvm_get_dr(vcpu, 7, (unsigned long *)&vmcs12->guest_dr7); |
| |
| diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c |
| index 6517b9d929bf..0b37e21d55b1 100644 |
| --- a/arch/x86/kvm/vmx/vmx.c |
| +++ b/arch/x86/kvm/vmx/vmx.c |
| @@ -2052,7 +2052,7 @@ static u64 nested_vmx_truncate_sysenter_addr(struct kvm_vcpu *vcpu, |
| return (unsigned long)data; |
| } |
| |
| -static u64 vmx_get_supported_debugctl(struct kvm_vcpu *vcpu, bool host_initiated) |
| +u64 vmx_get_supported_debugctl(struct kvm_vcpu *vcpu, bool host_initiated) |
| { |
| u64 debugctl = 0; |
| |
| @@ -2071,8 +2071,7 @@ static u64 vmx_get_supported_debugctl(struct kvm_vcpu *vcpu, bool host_initiated |
| return debugctl; |
| } |
| |
| -static bool vmx_is_valid_debugctl(struct kvm_vcpu *vcpu, u64 data, |
| - bool host_initiated) |
| +bool vmx_is_valid_debugctl(struct kvm_vcpu *vcpu, u64 data, bool host_initiated) |
| { |
| u64 invalid; |
| |
| diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h |
| index ddbe73958d7f..99e3f46de2ec 100644 |
| --- a/arch/x86/kvm/vmx/vmx.h |
| +++ b/arch/x86/kvm/vmx/vmx.h |
| @@ -442,6 +442,9 @@ static inline void vmx_set_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, |
| |
| void vmx_update_cpu_dirty_logging(struct kvm_vcpu *vcpu); |
| |
| +u64 vmx_get_supported_debugctl(struct kvm_vcpu *vcpu, bool host_initiated); |
| +bool vmx_is_valid_debugctl(struct kvm_vcpu *vcpu, u64 data, bool host_initiated); |
| + |
| /* |
| * Note, early Intel manuals have the write-low and read-high bitmap offsets |
| * the wrong way round. The bitmaps control MSRs 0x00000000-0x00001fff and |
| -- |
| 2.50.1 |
| |