| From 74b2f61bbec6c3920631294e2c2fac1f0134057a Mon Sep 17 00:00:00 2001 |
| From: Sean Christopherson <sean.j.christopherson@intel.com> |
| Date: Fri, 22 Nov 2019 08:58:18 -0800 |
| Subject: [PATCH] KVM: x86: Grab KVM's srcu lock when setting nested state |
| |
| commit ad5996d9a0e8019c3ae5151e687939369acfe044 upstream. |
| |
| Acquire kvm->srcu for the duration of ->set_nested_state() to fix a bug |
| where nVMX derefences ->memslots without holding ->srcu or ->slots_lock. |
| |
| The other half of nested migration, ->get_nested_state(), does not need |
| to acquire ->srcu as it is a purely a dump of internal KVM (and CPU) |
| state to userspace. |
| |
| Detected as an RCU lockdep splat that is 100% reproducible by running |
| KVM's state_test selftest with CONFIG_PROVE_LOCKING=y. Note that the |
| failing function, kvm_is_visible_gfn(), is only checking the validity of |
| a gfn, it's not actually accessing guest memory (which is more or less |
| unsupported during vmx_set_nested_state() due to incorrect MMU state), |
| i.e. vmx_set_nested_state() itself isn't fundamentally broken. In any |
| case, setting nested state isn't a fast path so there's no reason to go |
| out of our way to avoid taking ->srcu. |
| |
| ============================= |
| WARNING: suspicious RCU usage |
| 5.4.0-rc7+ #94 Not tainted |
| ----------------------------- |
| include/linux/kvm_host.h:626 suspicious rcu_dereference_check() usage! |
| |
| other info that might help us debug this: |
| |
| rcu_scheduler_active = 2, debug_locks = 1 |
| 1 lock held by evmcs_test/10939: |
| #0: ffff88826ffcb800 (&vcpu->mutex){+.+.}, at: kvm_vcpu_ioctl+0x85/0x630 [kvm] |
| |
| stack backtrace: |
| CPU: 1 PID: 10939 Comm: evmcs_test Not tainted 5.4.0-rc7+ #94 |
| Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 02/06/2015 |
| Call Trace: |
| dump_stack+0x68/0x9b |
| kvm_is_visible_gfn+0x179/0x180 [kvm] |
| mmu_check_root+0x11/0x30 [kvm] |
| fast_cr3_switch+0x40/0x120 [kvm] |
| kvm_mmu_new_cr3+0x34/0x60 [kvm] |
| nested_vmx_load_cr3+0xbd/0x1f0 [kvm_intel] |
| nested_vmx_enter_non_root_mode+0xab8/0x1d60 [kvm_intel] |
| vmx_set_nested_state+0x256/0x340 [kvm_intel] |
| kvm_arch_vcpu_ioctl+0x491/0x11a0 [kvm] |
| kvm_vcpu_ioctl+0xde/0x630 [kvm] |
| do_vfs_ioctl+0xa2/0x6c0 |
| ksys_ioctl+0x66/0x70 |
| __x64_sys_ioctl+0x16/0x20 |
| do_syscall_64+0x54/0x200 |
| entry_SYSCALL_64_after_hwframe+0x49/0xbe |
| RIP: 0033:0x7f59a2b95f47 |
| |
| Fixes: 8fcc4b5923af5 ("kvm: nVMX: Introduce KVM_CAP_NESTED_STATE") |
| Cc: stable@vger.kernel.org |
| Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com> |
| Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> |
| Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com> |
| |
| diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c |
| index cfdbfe33469d..1241b67137b1 100644 |
| --- a/arch/x86/kvm/x86.c |
| +++ b/arch/x86/kvm/x86.c |
| @@ -4290,6 +4290,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp, |
| case KVM_SET_NESTED_STATE: { |
| struct kvm_nested_state __user *user_kvm_nested_state = argp; |
| struct kvm_nested_state kvm_state; |
| + int idx; |
| |
| r = -EINVAL; |
| if (!kvm_x86_ops->set_nested_state) |
| @@ -4313,7 +4314,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp, |
| && !(kvm_state.flags & KVM_STATE_NESTED_GUEST_MODE)) |
| break; |
| |
| + idx = srcu_read_lock(&vcpu->kvm->srcu); |
| r = kvm_x86_ops->set_nested_state(vcpu, user_kvm_nested_state, &kvm_state); |
| + srcu_read_unlock(&vcpu->kvm->srcu, idx); |
| break; |
| } |
| case KVM_GET_SUPPORTED_HV_CPUID: { |
| -- |
| 2.7.4 |
| |