| // SPDX-License-Identifier: GPL-2.0-only |
| #include <linux/cpu.h> |
| #include <linux/cpumask.h> |
| #include <linux/errno.h> |
| #include <linux/kvm_types.h> |
| #include <linux/list.h> |
| #include <linux/percpu.h> |
| |
| #include <asm/perf_event.h> |
| #include <asm/processor.h> |
| #include <asm/virt.h> |
| #include <asm/vmx.h> |
| |
| struct x86_virt_ops { |
| int feature; |
| int (*enable_virtualization_cpu)(void); |
| int (*disable_virtualization_cpu)(void); |
| void (*emergency_disable_virtualization_cpu)(void); |
| }; |
| static struct x86_virt_ops virt_ops __ro_after_init; |
| |
| __visible bool virt_rebooting; |
| EXPORT_SYMBOL_FOR_KVM(virt_rebooting); |
| |
| static DEFINE_PER_CPU(int, virtualization_nr_users); |
| |
| static cpu_emergency_virt_cb __rcu *kvm_emergency_callback; |
| |
| void x86_virt_register_emergency_callback(cpu_emergency_virt_cb *callback) |
| { |
| if (WARN_ON_ONCE(rcu_access_pointer(kvm_emergency_callback))) |
| return; |
| |
| rcu_assign_pointer(kvm_emergency_callback, callback); |
| } |
| EXPORT_SYMBOL_FOR_KVM(x86_virt_register_emergency_callback); |
| |
| void x86_virt_unregister_emergency_callback(cpu_emergency_virt_cb *callback) |
| { |
| if (WARN_ON_ONCE(rcu_access_pointer(kvm_emergency_callback) != callback)) |
| return; |
| |
| rcu_assign_pointer(kvm_emergency_callback, NULL); |
| synchronize_rcu(); |
| } |
| EXPORT_SYMBOL_FOR_KVM(x86_virt_unregister_emergency_callback); |
| |
| static void x86_virt_invoke_kvm_emergency_callback(void) |
| { |
| cpu_emergency_virt_cb *kvm_callback; |
| |
| kvm_callback = rcu_dereference(kvm_emergency_callback); |
| if (kvm_callback) |
| kvm_callback(); |
| } |
| |
| #if IS_ENABLED(CONFIG_KVM_INTEL) |
| static DEFINE_PER_CPU(struct vmcs *, root_vmcs); |
| |
| static int x86_virt_cpu_vmxon(void) |
| { |
| u64 vmxon_pointer = __pa(per_cpu(root_vmcs, raw_smp_processor_id())); |
| u64 msr; |
| |
| cr4_set_bits(X86_CR4_VMXE); |
| |
| asm goto("1: vmxon %[vmxon_pointer]\n\t" |
| _ASM_EXTABLE(1b, %l[fault]) |
| : : [vmxon_pointer] "m"(vmxon_pointer) |
| : : fault); |
| return 0; |
| |
| fault: |
| WARN_ONCE(1, "VMXON faulted, MSR_IA32_FEAT_CTL (0x3a) = 0x%llx\n", |
| rdmsrq_safe(MSR_IA32_FEAT_CTL, &msr) ? 0xdeadbeef : msr); |
| cr4_clear_bits(X86_CR4_VMXE); |
| |
| return -EFAULT; |
| } |
| |
| static int x86_vmx_enable_virtualization_cpu(void) |
| { |
| int r; |
| |
| if (cr4_read_shadow() & X86_CR4_VMXE) |
| return -EBUSY; |
| |
| intel_pt_handle_vmx(1); |
| |
| r = x86_virt_cpu_vmxon(); |
| if (r) { |
| intel_pt_handle_vmx(0); |
| return r; |
| } |
| |
| return 0; |
| } |
| |
| /* |
| * Disable VMX and clear CR4.VMXE (even if VMXOFF faults) |
| * |
| * Note, VMXOFF causes a #UD if the CPU is !post-VMXON, but it's impossible to |
| * atomically track post-VMXON state, e.g. this may be called in NMI context. |
| * Eat all faults as all other faults on VMXOFF faults are mode related, i.e. |
| * faults are guaranteed to be due to the !post-VMXON check unless the CPU is |
| * magically in RM, VM86, compat mode, or at CPL>0. |
| */ |
| static int x86_vmx_disable_virtualization_cpu(void) |
| { |
| int r = -EIO; |
| |
| asm goto("1: vmxoff\n\t" |
| _ASM_EXTABLE(1b, %l[fault]) |
| ::: "cc", "memory" : fault); |
| r = 0; |
| |
| fault: |
| cr4_clear_bits(X86_CR4_VMXE); |
| intel_pt_handle_vmx(0); |
| return r; |
| } |
| |
| static void x86_vmx_emergency_disable_virtualization_cpu(void) |
| { |
| virt_rebooting = true; |
| |
| /* |
| * Note, CR4.VMXE can be _cleared_ in NMI context, but it can only be |
| * set in task context. If this races with _another_ emergency call |
| * from NMI context, VMCLEAR (in KVM) and VMXOFF may #UD, but KVM and |
| * the kernel will eat those faults due to virt_rebooting being set by |
| * the interrupting NMI callback. |
| */ |
| if (!(__read_cr4() & X86_CR4_VMXE)) |
| return; |
| |
| x86_virt_invoke_kvm_emergency_callback(); |
| |
| x86_vmx_disable_virtualization_cpu(); |
| } |
| |
| static __init void x86_vmx_exit(void) |
| { |
| int cpu; |
| |
| for_each_possible_cpu(cpu) { |
| free_page((unsigned long)per_cpu(root_vmcs, cpu)); |
| per_cpu(root_vmcs, cpu) = NULL; |
| } |
| } |
| |
| static __init int __x86_vmx_init(void) |
| { |
| const struct x86_virt_ops vmx_ops = { |
| .feature = X86_FEATURE_VMX, |
| .enable_virtualization_cpu = x86_vmx_enable_virtualization_cpu, |
| .disable_virtualization_cpu = x86_vmx_disable_virtualization_cpu, |
| .emergency_disable_virtualization_cpu = x86_vmx_emergency_disable_virtualization_cpu, |
| }; |
| |
| u64 basic_msr; |
| u32 rev_id; |
| int cpu; |
| |
| if (!cpu_feature_enabled(X86_FEATURE_VMX)) |
| return -EOPNOTSUPP; |
| |
| rdmsrq(MSR_IA32_VMX_BASIC, basic_msr); |
| |
| /* IA-32 SDM Vol 3B: VMCS size is never greater than 4kB. */ |
| if (WARN_ON_ONCE(vmx_basic_vmcs_size(basic_msr) > PAGE_SIZE)) |
| return -EIO; |
| |
| /* |
| * Even if eVMCS is enabled (or will be enabled?), and even though not |
| * explicitly documented by TLFS, the root VMCS passed to VMXON should |
| * still be marked with the revision_id reported by the physical CPU. |
| */ |
| rev_id = vmx_basic_vmcs_revision_id(basic_msr); |
| |
| for_each_possible_cpu(cpu) { |
| int node = cpu_to_node(cpu); |
| struct page *page; |
| struct vmcs *vmcs; |
| |
| page = __alloc_pages_node(node, GFP_KERNEL | __GFP_ZERO, 0); |
| if (WARN_ON_ONCE(!page)) { |
| x86_vmx_exit(); |
| return -ENOMEM; |
| } |
| |
| vmcs = page_address(page); |
| vmcs->hdr.revision_id = rev_id; |
| per_cpu(root_vmcs, cpu) = vmcs; |
| } |
| |
| memcpy(&virt_ops, &vmx_ops, sizeof(virt_ops)); |
| return 0; |
| } |
| |
| static __init int x86_vmx_init(void) |
| { |
| int r; |
| |
| r = __x86_vmx_init(); |
| if (r) |
| setup_clear_cpu_cap(X86_FEATURE_VMX); |
| return r; |
| } |
| #else |
| static __init int x86_vmx_init(void) { return -EOPNOTSUPP; } |
| static __init void x86_vmx_exit(void) { } |
| #endif |
| |
| #if IS_ENABLED(CONFIG_KVM_AMD) |
| static int x86_svm_enable_virtualization_cpu(void) |
| { |
| u64 efer; |
| |
| rdmsrq(MSR_EFER, efer); |
| if (efer & EFER_SVME) |
| return -EBUSY; |
| |
| wrmsrq(MSR_EFER, efer | EFER_SVME); |
| return 0; |
| } |
| |
| static int x86_svm_disable_virtualization_cpu(void) |
| { |
| int r = -EIO; |
| u64 efer; |
| |
| /* |
| * Force GIF=1 prior to disabling SVM, e.g. to ensure INIT and |
| * NMI aren't blocked. |
| */ |
| asm goto("1: stgi\n\t" |
| _ASM_EXTABLE(1b, %l[fault]) |
| ::: "memory" : fault); |
| r = 0; |
| |
| fault: |
| rdmsrq(MSR_EFER, efer); |
| wrmsrq(MSR_EFER, efer & ~EFER_SVME); |
| return r; |
| } |
| |
| static void x86_svm_emergency_disable_virtualization_cpu(void) |
| { |
| u64 efer; |
| |
| virt_rebooting = true; |
| |
| rdmsrq(MSR_EFER, efer); |
| if (!(efer & EFER_SVME)) |
| return; |
| |
| x86_virt_invoke_kvm_emergency_callback(); |
| |
| x86_svm_disable_virtualization_cpu(); |
| } |
| |
| static __init int x86_svm_init(void) |
| { |
| const struct x86_virt_ops svm_ops = { |
| .feature = X86_FEATURE_SVM, |
| .enable_virtualization_cpu = x86_svm_enable_virtualization_cpu, |
| .disable_virtualization_cpu = x86_svm_disable_virtualization_cpu, |
| .emergency_disable_virtualization_cpu = x86_svm_emergency_disable_virtualization_cpu, |
| }; |
| |
| if (!cpu_feature_enabled(X86_FEATURE_SVM) || |
| cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT)) |
| return -EOPNOTSUPP; |
| |
| memcpy(&virt_ops, &svm_ops, sizeof(virt_ops)); |
| return 0; |
| } |
| #else |
| static __init int x86_svm_init(void) { return -EOPNOTSUPP; } |
| #endif |
| |
| int x86_virt_get_ref(int feat) |
| { |
| int r; |
| |
| /* Ensure the !feature check can't get false positives. */ |
| BUILD_BUG_ON(!X86_FEATURE_SVM || !X86_FEATURE_VMX); |
| |
| if (!virt_ops.feature || virt_ops.feature != feat) |
| return -EOPNOTSUPP; |
| |
| guard(preempt)(); |
| |
| if (this_cpu_inc_return(virtualization_nr_users) > 1) |
| return 0; |
| |
| r = virt_ops.enable_virtualization_cpu(); |
| if (r) |
| WARN_ON_ONCE(this_cpu_dec_return(virtualization_nr_users)); |
| |
| return r; |
| } |
| EXPORT_SYMBOL_FOR_KVM(x86_virt_get_ref); |
| |
| void x86_virt_put_ref(int feat) |
| { |
| guard(preempt)(); |
| |
| if (WARN_ON_ONCE(!this_cpu_read(virtualization_nr_users)) || |
| this_cpu_dec_return(virtualization_nr_users)) |
| return; |
| |
| BUG_ON(virt_ops.disable_virtualization_cpu() && !virt_rebooting); |
| } |
| EXPORT_SYMBOL_FOR_KVM(x86_virt_put_ref); |
| |
| /* |
| * Disable virtualization, i.e. VMX or SVM, to ensure INIT is recognized during |
| * reboot. VMX blocks INIT if the CPU is post-VMXON, and SVM blocks INIT if |
| * GIF=0, i.e. if the crash occurred between CLGI and STGI. |
| */ |
| int x86_virt_emergency_disable_virtualization_cpu(void) |
| { |
| if (!virt_ops.feature) |
| return -EOPNOTSUPP; |
| |
| /* |
| * IRQs must be disabled as virtualization is enabled in hardware via |
| * function call IPIs, i.e. IRQs need to be disabled to guarantee |
| * virtualization stays disabled. |
| */ |
| lockdep_assert_irqs_disabled(); |
| |
| /* |
| * Do the NMI shootdown even if virtualization is off on _this_ CPU, as |
| * other CPUs may have virtualization enabled. |
| * |
| * TODO: Track whether or not virtualization might be enabled on other |
| * CPUs? May not be worth avoiding the NMI shootdown... |
| */ |
| virt_ops.emergency_disable_virtualization_cpu(); |
| return 0; |
| } |
| |
| void __init x86_virt_init(void) |
| { |
| /* |
| * Attempt to initialize both SVM and VMX, and simply use whichever one |
| * is present. Rsefuse to enable/use SVM or VMX if both are somehow |
| * supported. No known CPU supports both SVM and VMX. |
| */ |
| bool has_vmx = !x86_vmx_init(); |
| bool has_svm = !x86_svm_init(); |
| |
| if (WARN_ON_ONCE(has_vmx && has_svm)) { |
| x86_vmx_exit(); |
| memset(&virt_ops, 0, sizeof(virt_ops)); |
| } |
| } |