| From foo@baz Tue Aug 14 16:14:56 CEST 2018 |
| From: Paolo Bonzini <pbonzini@redhat.com> |
| Date: Sun, 5 Aug 2018 16:07:47 +0200 |
| Subject: KVM: VMX: Tell the nested hypervisor to skip L1D flush on vmentry |
| |
| From: Paolo Bonzini <pbonzini@redhat.com> |
| |
| commit 5b76a3cff011df2dcb6186c965a2e4d809a05ad4 upstream |
| |
| When nested virtualization is in use, VMENTER operations from the nested |
| hypervisor into the nested guest will always be processed by the bare metal |
| hypervisor, and KVM's "conditional cache flushes" mode in particular does a |
| flush on nested vmentry. Therefore, include the "skip L1D flush on |
| vmentry" bit in KVM's suggested ARCH_CAPABILITIES setting. |
| |
| Add the relevant Documentation. |
| |
| Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> |
| Signed-off-by: Thomas Gleixner <tglx@linutronix.de> |
| Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> |
| Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> |
| --- |
| Documentation/l1tf.rst | 21 +++++++++++++++++++++ |
| arch/x86/include/asm/kvm_host.h | 1 + |
| arch/x86/kvm/vmx.c | 3 +-- |
| arch/x86/kvm/x86.c | 26 +++++++++++++++++++++++++- |
| 4 files changed, 48 insertions(+), 3 deletions(-) |
| |
| --- a/Documentation/l1tf.rst |
| +++ b/Documentation/l1tf.rst |
| @@ -546,6 +546,27 @@ available: |
| EPT can be disabled in the hypervisor via the 'kvm-intel.ept' |
| parameter. |
| |
| +3.4. Nested virtual machines |
| +"""""""""""""""""""""""""""" |
| + |
| +When nested virtualization is in use, three operating systems are involved: |
| +the bare metal hypervisor, the nested hypervisor and the nested virtual |
| +machine. VMENTER operations from the nested hypervisor into the nested |
| +guest will always be processed by the bare metal hypervisor. If KVM is the |
| +bare metal hypervisor it wiil: |
| + |
| + - Flush the L1D cache on every switch from the nested hypervisor to the |
| + nested virtual machine, so that the nested hypervisor's secrets are not |
| + exposed to the nested virtual machine; |
| + |
| + - Flush the L1D cache on every switch from the nested virtual machine to |
| + the nested hypervisor; this is a complex operation, and flushing the L1D |
| + cache avoids that the bare metal hypervisor's secrets are exposed to the |
| + nested virtual machine; |
| + |
| + - Instruct the nested hypervisor to not perform any L1D cache flush. This |
| + is an optimization to avoid double L1D flushing. |
| + |
| |
| .. _default_mitigations: |
| |
| --- a/arch/x86/include/asm/kvm_host.h |
| +++ b/arch/x86/include/asm/kvm_host.h |
| @@ -1346,6 +1346,7 @@ void kvm_vcpu_reload_apic_access_page(st |
| void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm, |
| unsigned long address); |
| |
| +u64 kvm_get_arch_capabilities(void); |
| void kvm_define_shared_msr(unsigned index, u32 msr); |
| int kvm_set_shared_msr(unsigned index, u64 val, u64 mask); |
| |
| --- a/arch/x86/kvm/vmx.c |
| +++ b/arch/x86/kvm/vmx.c |
| @@ -5461,8 +5461,7 @@ static int vmx_vcpu_setup(struct vcpu_vm |
| ++vmx->nmsrs; |
| } |
| |
| - if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES)) |
| - rdmsrl(MSR_IA32_ARCH_CAPABILITIES, vmx->arch_capabilities); |
| + vmx->arch_capabilities = kvm_get_arch_capabilities(); |
| |
| vm_exit_controls_init(vmx, vmcs_config.vmexit_ctrl); |
| |
| --- a/arch/x86/kvm/x86.c |
| +++ b/arch/x86/kvm/x86.c |
| @@ -1020,11 +1020,35 @@ static u32 msr_based_features[] = { |
| |
| static unsigned int num_msr_based_features; |
| |
| +u64 kvm_get_arch_capabilities(void) |
| +{ |
| + u64 data; |
| + |
| + rdmsrl_safe(MSR_IA32_ARCH_CAPABILITIES, &data); |
| + |
| + /* |
| + * If we're doing cache flushes (either "always" or "cond") |
| + * we will do one whenever the guest does a vmlaunch/vmresume. |
| + * If an outer hypervisor is doing the cache flush for us |
| + * (VMENTER_L1D_FLUSH_NESTED_VM), we can safely pass that |
| + * capability to the guest too, and if EPT is disabled we're not |
| + * vulnerable. Overall, only VMENTER_L1D_FLUSH_NEVER will |
| + * require a nested hypervisor to do a flush of its own. |
| + */ |
| + if (l1tf_vmx_mitigation != VMENTER_L1D_FLUSH_NEVER) |
| + data |= ARCH_CAP_SKIP_VMENTRY_L1DFLUSH; |
| + |
| + return data; |
| +} |
| +EXPORT_SYMBOL_GPL(kvm_get_arch_capabilities); |
| + |
| static int kvm_get_msr_feature(struct kvm_msr_entry *msr) |
| { |
| switch (msr->index) { |
| - case MSR_IA32_UCODE_REV: |
| case MSR_IA32_ARCH_CAPABILITIES: |
| + msr->data = kvm_get_arch_capabilities(); |
| + break; |
| + case MSR_IA32_UCODE_REV: |
| rdmsrl_safe(msr->index, &msr->data); |
| break; |
| default: |