blob: ac29e39a4b07c512dd65443d6871b8325a7fe97e [file] [log] [blame]
From 56b8fd24f63151d3c294dfd8e800bb8d0d10fc9f Mon Sep 17 00:00:00 2001
From: Sean Christopherson <>
Date: Thu, 1 Aug 2019 13:35:22 -0700
Subject: [PATCH] KVM: x86/mmu: Add explicit access mask for MMIO SPTEs
commit 4af7715110a2617fc40ac2c1232f664019269f3a upstream.
When shadow paging is enabled, KVM tracks the allowed access type for
MMIO SPTEs so that it can do a permission check on a MMIO GVA cache hit
without having to walk the guest's page tables. The tracking is done
by retaining the WRITE and USER bits of the access when inserting the
MMIO SPTE (read access is implicitly allowed), which allows the MMIO
page fault handler to retrieve and cache the WRITE/USER bits from the
Unfortunately for EPT, the mask used to retain the WRITE/USER bits is
hardcoded using the x86 paging versions of the bits. This funkiness
happens to work because KVM uses a completely different mask/value for
MMIO SPTEs when EPT is enabled, and the EPT mask/value just happens to
overlap exactly with the x86 WRITE/USER bits[*].
Explicitly define the access mask for MMIO SPTEs to accurately reflect
that EPT does not want to incorporate any access bits into the SPTE, and
so that KVM isn't subtly relying on EPT's WX bits always being set in
MMIO SPTEs, e.g. attempting to use other bits for experimentation breaks
Note, vcpu_match_mmio_gva() explicits prevents matching GVA==0, and all
TDP flows explicit set mmio_gva to 0, i.e. zeroing vcpu->arch.access for
EPT has no (known) functional impact.
[*] Using WX to generate EPT misconfigurations (equivalent to reserved
bit page fault) ensures KVM can employ its MMIO page fault tricks
even platforms without reserved address bits.
Fixes: ce88decffd17 ("KVM: MMU: mmio page fault support")
Signed-off-by: Sean Christopherson <>
Signed-off-by: Paolo Bonzini <>
[PG: use older x86 file path for v5.2.x code base.]
Signed-off-by: Paul Gortmaker <>
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index d80af60cffba..5a5b831360f5 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -237,6 +237,7 @@ static u64 __read_mostly shadow_accessed_mask;
static u64 __read_mostly shadow_dirty_mask;
static u64 __read_mostly shadow_mmio_mask;
static u64 __read_mostly shadow_mmio_value;
+static u64 __read_mostly shadow_mmio_access_mask;
static u64 __read_mostly shadow_present_mask;
static u64 __read_mostly shadow_me_mask;
@@ -318,11 +319,13 @@ static void kvm_flush_remote_tlbs_with_address(struct kvm *kvm,
kvm_flush_remote_tlbs_with_range(kvm, &range);
-void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask, u64 mmio_value)
+void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask, u64 mmio_value, u64 access_mask)
+ BUG_ON((u64)(unsigned)access_mask != access_mask);
BUG_ON((mmio_mask & mmio_value) != mmio_value);
shadow_mmio_value = mmio_value | SPTE_SPECIAL_MASK;
shadow_mmio_mask = mmio_mask | SPTE_SPECIAL_MASK;
+ shadow_mmio_access_mask = access_mask;
@@ -413,7 +416,7 @@ static void mark_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, u64 gfn,
u64 mask = generation_mmio_spte_mask(gen);
u64 gpa = gfn << PAGE_SHIFT;
+ access &= shadow_mmio_access_mask;
mask |= shadow_mmio_value | access;
mask |= gpa | shadow_nonpresent_or_rsvd_mask;
mask |= (gpa & shadow_nonpresent_or_rsvd_mask)
@@ -440,8 +443,7 @@ static gfn_t get_mmio_spte_gfn(u64 spte)
static unsigned get_mmio_spte_access(u64 spte)
- u64 mask = generation_mmio_spte_mask(MMIO_SPTE_GEN_MASK) | shadow_mmio_mask;
- return (spte & ~mask) & ~PAGE_MASK;
+ return spte & shadow_mmio_access_mask;
static bool set_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, gfn_t gfn,
@@ -3384,7 +3386,8 @@ static bool handle_abnormal_pfn(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn,
if (unlikely(is_noslot_pfn(pfn)))
- vcpu_cache_mmio_info(vcpu, gva, gfn, access);
+ vcpu_cache_mmio_info(vcpu, gva, gfn,
+ access & shadow_mmio_access_mask);
return false;
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index 87185302a53d..c3fa8e77a1e7 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -56,7 +56,7 @@ static inline u64 rsvd_bits(int s, int e)
return ((1ULL << (e - s + 1)) - 1) << s;
-void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask, u64 mmio_value);
+void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask, u64 mmio_value, u64 access_mask);
reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context);
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 58ab535d4687..c05805a752a8 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -4002,7 +4002,7 @@ static void ept_set_mmio_spte_mask(void)
* of an EPT paging-structure entry is 110b (write/execute).
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 1c83722652e8..9273af4e36a4 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -7055,7 +7055,7 @@ static void kvm_set_mmio_spte_mask(void)
if (shadow_phys_bits == 52)
mask &= ~1ull;
- kvm_mmu_set_mmio_spte_mask(mask, mask);
+ kvm_mmu_set_mmio_spte_mask(mask, mask, ACC_WRITE_MASK | ACC_USER_MASK);
#ifdef CONFIG_X86_64