Merge tag 'probes-fixes-v6.6-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace
Pull probes fixes from Masami Hiramatsu:
- Fix fprobe document to add a new ret_ip parameter for callback
functions. This has been introduced in v6.5 but the document was not
updated.
- Fix fprobe to check the number of active retprobes is not zero. This
number is passed from parameter or calculated by the parameter and it
can be zero which is not acceptable. But current code only check it
is not minus.
* tag 'probes-fixes-v6.6-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace:
fprobe: Fix to ensure the number of active retprobes is not zero
Documentation: probes: Add a new ret_ip callback parameter
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index 5882b24..1095c66 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -344,14 +344,14 @@
*/
#define __HFGRTR_EL2_RES0 (GENMASK(63, 56) | GENMASK(53, 51))
#define __HFGRTR_EL2_MASK GENMASK(49, 0)
-#define __HFGRTR_EL2_nMASK (GENMASK(55, 54) | BIT(50))
+#define __HFGRTR_EL2_nMASK (GENMASK(58, 57) | GENMASK(55, 54) | BIT(50))
#define __HFGWTR_EL2_RES0 (GENMASK(63, 56) | GENMASK(53, 51) | \
BIT(46) | BIT(42) | BIT(40) | BIT(28) | \
GENMASK(26, 25) | BIT(21) | BIT(18) | \
GENMASK(15, 14) | GENMASK(10, 9) | BIT(2))
#define __HFGWTR_EL2_MASK GENMASK(49, 0)
-#define __HFGWTR_EL2_nMASK (GENMASK(55, 54) | BIT(50))
+#define __HFGWTR_EL2_nMASK (GENMASK(58, 57) | GENMASK(55, 54) | BIT(50))
#define __HFGITR_EL2_RES0 GENMASK(63, 57)
#define __HFGITR_EL2_MASK GENMASK(54, 0)
diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c
index 6dcdae4..a1e2422 100644
--- a/arch/arm64/kvm/arch_timer.c
+++ b/arch/arm64/kvm/arch_timer.c
@@ -55,11 +55,6 @@
.get_input_level = kvm_arch_timer_get_input_level,
};
-static bool has_cntpoff(void)
-{
- return (has_vhe() && cpus_have_final_cap(ARM64_HAS_ECV_CNTPOFF));
-}
-
static int nr_timers(struct kvm_vcpu *vcpu)
{
if (!vcpu_has_nv(vcpu))
@@ -180,7 +175,7 @@
return timecounter->cc->read(timecounter->cc);
}
-static void get_timer_map(struct kvm_vcpu *vcpu, struct timer_map *map)
+void get_timer_map(struct kvm_vcpu *vcpu, struct timer_map *map)
{
if (vcpu_has_nv(vcpu)) {
if (is_hyp_ctxt(vcpu)) {
@@ -548,8 +543,7 @@
timer_set_ctl(ctx, read_sysreg_el0(SYS_CNTP_CTL));
cval = read_sysreg_el0(SYS_CNTP_CVAL);
- if (!has_cntpoff())
- cval -= timer_get_offset(ctx);
+ cval -= timer_get_offset(ctx);
timer_set_cval(ctx, cval);
@@ -636,8 +630,7 @@
cval = timer_get_cval(ctx);
offset = timer_get_offset(ctx);
set_cntpoff(offset);
- if (!has_cntpoff())
- cval += offset;
+ cval += offset;
write_sysreg_el0(cval, SYS_CNTP_CVAL);
isb();
write_sysreg_el0(timer_get_ctl(ctx), SYS_CNTP_CTL);
diff --git a/arch/arm64/kvm/emulate-nested.c b/arch/arm64/kvm/emulate-nested.c
index 9ced1bf..ee902ff 100644
--- a/arch/arm64/kvm/emulate-nested.c
+++ b/arch/arm64/kvm/emulate-nested.c
@@ -977,6 +977,8 @@
static const struct encoding_to_trap_config encoding_to_fgt[] __initconst = {
/* HFGRTR_EL2, HFGWTR_EL2 */
+ SR_FGT(SYS_PIR_EL1, HFGxTR, nPIR_EL1, 0),
+ SR_FGT(SYS_PIRE0_EL1, HFGxTR, nPIRE0_EL1, 0),
SR_FGT(SYS_TPIDR2_EL0, HFGxTR, nTPIDR2_EL0, 0),
SR_FGT(SYS_SMPRI_EL1, HFGxTR, nSMPRI_EL1, 0),
SR_FGT(SYS_ACCDATA_EL1, HFGxTR, nACCDATA_EL1, 0),
diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c
index 6537f58..448b170 100644
--- a/arch/arm64/kvm/hyp/vhe/switch.c
+++ b/arch/arm64/kvm/hyp/vhe/switch.c
@@ -39,6 +39,26 @@
___activate_traps(vcpu);
+ if (has_cntpoff()) {
+ struct timer_map map;
+
+ get_timer_map(vcpu, &map);
+
+ /*
+ * We're entrering the guest. Reload the correct
+ * values from memory now that TGE is clear.
+ */
+ if (map.direct_ptimer == vcpu_ptimer(vcpu))
+ val = __vcpu_sys_reg(vcpu, CNTP_CVAL_EL0);
+ if (map.direct_ptimer == vcpu_hptimer(vcpu))
+ val = __vcpu_sys_reg(vcpu, CNTHP_CVAL_EL2);
+
+ if (map.direct_ptimer) {
+ write_sysreg_el0(val, SYS_CNTP_CVAL);
+ isb();
+ }
+ }
+
val = read_sysreg(cpacr_el1);
val |= CPACR_ELx_TTA;
val &= ~(CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN |
@@ -77,6 +97,30 @@
write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2);
+ if (has_cntpoff()) {
+ struct timer_map map;
+ u64 val, offset;
+
+ get_timer_map(vcpu, &map);
+
+ /*
+ * We're exiting the guest. Save the latest CVAL value
+ * to memory and apply the offset now that TGE is set.
+ */
+ val = read_sysreg_el0(SYS_CNTP_CVAL);
+ if (map.direct_ptimer == vcpu_ptimer(vcpu))
+ __vcpu_sys_reg(vcpu, CNTP_CVAL_EL0) = val;
+ if (map.direct_ptimer == vcpu_hptimer(vcpu))
+ __vcpu_sys_reg(vcpu, CNTHP_CVAL_EL2) = val;
+
+ offset = read_sysreg_s(SYS_CNTPOFF_EL2);
+
+ if (map.direct_ptimer && offset) {
+ write_sysreg_el0(val + offset, SYS_CNTP_CVAL);
+ isb();
+ }
+ }
+
/*
* ARM errata 1165522 and 1530923 require the actual execution of the
* above before we can switch to the EL2/EL0 translation regime used by
diff --git a/arch/arm64/kvm/pmu.c b/arch/arm64/kvm/pmu.c
index 0eea225..a243934c 100644
--- a/arch/arm64/kvm/pmu.c
+++ b/arch/arm64/kvm/pmu.c
@@ -39,7 +39,7 @@
{
struct kvm_pmu_events *pmu = kvm_get_pmu_events();
- if (!kvm_arm_support_pmu_v3() || !pmu || !kvm_pmu_switch_needed(attr))
+ if (!kvm_arm_support_pmu_v3() || !kvm_pmu_switch_needed(attr))
return;
if (!attr->exclude_host)
@@ -55,7 +55,7 @@
{
struct kvm_pmu_events *pmu = kvm_get_pmu_events();
- if (!kvm_arm_support_pmu_v3() || !pmu)
+ if (!kvm_arm_support_pmu_v3())
return;
pmu->events_host &= ~clr;
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index e92ec81..0afd613 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -2122,8 +2122,8 @@
{ SYS_DESC(SYS_PMMIR_EL1), trap_raz_wi },
{ SYS_DESC(SYS_MAIR_EL1), access_vm_reg, reset_unknown, MAIR_EL1 },
- { SYS_DESC(SYS_PIRE0_EL1), access_vm_reg, reset_unknown, PIRE0_EL1 },
- { SYS_DESC(SYS_PIR_EL1), access_vm_reg, reset_unknown, PIR_EL1 },
+ { SYS_DESC(SYS_PIRE0_EL1), NULL, reset_unknown, PIRE0_EL1 },
+ { SYS_DESC(SYS_PIR_EL1), NULL, reset_unknown, PIR_EL1 },
{ SYS_DESC(SYS_AMAIR_EL1), access_vm_reg, reset_amair_el1, AMAIR_EL1 },
{ SYS_DESC(SYS_LORSA_EL1), trap_loregion },
diff --git a/arch/mips/kvm/mmu.c b/arch/mips/kvm/mmu.c
index 7b2ac13..467ee6b 100644
--- a/arch/mips/kvm/mmu.c
+++ b/arch/mips/kvm/mmu.c
@@ -592,7 +592,7 @@
gfn_t gfn = gpa >> PAGE_SHIFT;
int srcu_idx, err;
kvm_pfn_t pfn;
- pte_t *ptep, entry, old_pte;
+ pte_t *ptep, entry;
bool writeable;
unsigned long prot_bits;
unsigned long mmu_seq;
@@ -664,7 +664,6 @@
entry = pfn_pte(pfn, __pgprot(prot_bits));
/* Write the PTE */
- old_pte = *ptep;
set_pte(ptep, entry);
err = 0;
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index c1b47d6..efaebba 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -303,11 +303,6 @@
return 0;
}
-static inline int gisa_in_alert_list(struct kvm_s390_gisa *gisa)
-{
- return READ_ONCE(gisa->next_alert) != (u32)virt_to_phys(gisa);
-}
-
static inline void gisa_set_ipm_gisc(struct kvm_s390_gisa *gisa, u32 gisc)
{
set_bit_inv(IPM_BIT_OFFSET + gisc, (unsigned long *) gisa);
@@ -3216,11 +3211,12 @@
if (!gi->origin)
return;
- if (gi->alert.mask)
- KVM_EVENT(3, "vm 0x%pK has unexpected iam 0x%02x",
- kvm, gi->alert.mask);
- while (gisa_in_alert_list(gi->origin))
- cpu_relax();
+ WARN(gi->alert.mask != 0x00,
+ "unexpected non zero alert.mask 0x%02x",
+ gi->alert.mask);
+ gi->alert.mask = 0x00;
+ if (gisa_set_iam(gi->origin, gi->alert.mask))
+ process_gib_alert_list();
hrtimer_cancel(&gi->timer);
gi->origin = NULL;
VM_EVENT(kvm, 3, "gisa 0x%pK destroyed", gisa);
diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h
index 31089b85..a2be3ae 100644
--- a/arch/x86/include/asm/fpu/api.h
+++ b/arch/x86/include/asm/fpu/api.h
@@ -157,7 +157,8 @@
static inline void fpu_sync_guest_vmexit_xfd_state(void) { }
#endif
-extern void fpu_copy_guest_fpstate_to_uabi(struct fpu_guest *gfpu, void *buf, unsigned int size, u32 pkru);
+extern void fpu_copy_guest_fpstate_to_uabi(struct fpu_guest *gfpu, void *buf,
+ unsigned int size, u64 xfeatures, u32 pkru);
extern int fpu_copy_uabi_to_guest_fpstate(struct fpu_guest *gfpu, const void *buf, u64 xcr0, u32 *vpkru);
static inline void fpstate_set_confidential(struct fpu_guest *gfpu)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 17715cb..70d1394 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -528,7 +528,6 @@
u64 raw_event_mask;
struct kvm_pmc gp_counters[KVM_INTEL_PMC_MAX_GENERIC];
struct kvm_pmc fixed_counters[KVM_PMC_MAX_FIXED];
- struct irq_work irq_work;
/*
* Overlay the bitmap with a 64-bit atomic so that all bits can be
diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
index 19bf955..3ac0ffc 100644
--- a/arch/x86/include/asm/svm.h
+++ b/arch/x86/include/asm/svm.h
@@ -268,6 +268,7 @@
AVIC_IPI_FAILURE_TARGET_NOT_RUNNING,
AVIC_IPI_FAILURE_INVALID_TARGET,
AVIC_IPI_FAILURE_INVALID_BACKING_PAGE,
+ AVIC_IPI_FAILURE_INVALID_IPI_VECTOR,
};
#define AVIC_PHYSICAL_MAX_INDEX_MASK GENMASK_ULL(8, 0)
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index a86d370..a21a4d0 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -369,14 +369,15 @@
EXPORT_SYMBOL_GPL(fpu_swap_kvm_fpstate);
void fpu_copy_guest_fpstate_to_uabi(struct fpu_guest *gfpu, void *buf,
- unsigned int size, u32 pkru)
+ unsigned int size, u64 xfeatures, u32 pkru)
{
struct fpstate *kstate = gfpu->fpstate;
union fpregs_state *ustate = buf;
struct membuf mb = { .p = buf, .left = size };
if (cpu_feature_enabled(X86_FEATURE_XSAVE)) {
- __copy_xstate_to_uabi_buf(mb, kstate, pkru, XSTATE_COPY_XSAVE);
+ __copy_xstate_to_uabi_buf(mb, kstate, xfeatures, pkru,
+ XSTATE_COPY_XSAVE);
} else {
memcpy(&ustate->fxsave, &kstate->regs.fxsave,
sizeof(ustate->fxsave));
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index cadf687..ef690610 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -1049,6 +1049,7 @@
* __copy_xstate_to_uabi_buf - Copy kernel saved xstate to a UABI buffer
* @to: membuf descriptor
* @fpstate: The fpstate buffer from which to copy
+ * @xfeatures: The mask of xfeatures to save (XSAVE mode only)
* @pkru_val: The PKRU value to store in the PKRU component
* @copy_mode: The requested copy mode
*
@@ -1059,7 +1060,8 @@
* It supports partial copy but @to.pos always starts from zero.
*/
void __copy_xstate_to_uabi_buf(struct membuf to, struct fpstate *fpstate,
- u32 pkru_val, enum xstate_copy_mode copy_mode)
+ u64 xfeatures, u32 pkru_val,
+ enum xstate_copy_mode copy_mode)
{
const unsigned int off_mxcsr = offsetof(struct fxregs_state, mxcsr);
struct xregs_state *xinit = &init_fpstate.regs.xsave;
@@ -1083,7 +1085,7 @@
break;
case XSTATE_COPY_XSAVE:
- header.xfeatures &= fpstate->user_xfeatures;
+ header.xfeatures &= fpstate->user_xfeatures & xfeatures;
break;
}
@@ -1185,6 +1187,7 @@
enum xstate_copy_mode copy_mode)
{
__copy_xstate_to_uabi_buf(to, tsk->thread.fpu.fpstate,
+ tsk->thread.fpu.fpstate->user_xfeatures,
tsk->thread.pkru, copy_mode);
}
@@ -1536,10 +1539,7 @@
fpregs_restore_userregs();
newfps->xfeatures = curfps->xfeatures | xfeatures;
-
- if (!guest_fpu)
- newfps->user_xfeatures = curfps->user_xfeatures | xfeatures;
-
+ newfps->user_xfeatures = curfps->user_xfeatures | xfeatures;
newfps->xfd = curfps->xfd & ~xfeatures;
/* Do the final updates within the locked region */
diff --git a/arch/x86/kernel/fpu/xstate.h b/arch/x86/kernel/fpu/xstate.h
index a4ecb04..3518fb2 100644
--- a/arch/x86/kernel/fpu/xstate.h
+++ b/arch/x86/kernel/fpu/xstate.h
@@ -43,7 +43,8 @@
struct membuf;
extern void __copy_xstate_to_uabi_buf(struct membuf to, struct fpstate *fpstate,
- u32 pkru_val, enum xstate_copy_mode copy_mode);
+ u64 xfeatures, u32 pkru_val,
+ enum xstate_copy_mode copy_mode);
extern void copy_xstate_to_uabi_buf(struct membuf to, struct task_struct *tsk,
enum xstate_copy_mode mode);
extern int copy_uabi_from_kernel_to_xstate(struct fpstate *fpstate, const void *kbuf, u32 *pkru);
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 0544e30..773132c 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -360,14 +360,6 @@
vcpu->arch.guest_supported_xcr0 =
cpuid_get_supported_xcr0(vcpu->arch.cpuid_entries, vcpu->arch.cpuid_nent);
- /*
- * FP+SSE can always be saved/restored via KVM_{G,S}ET_XSAVE, even if
- * XSAVE/XCRO are not exposed to the guest, and even if XSAVE isn't
- * supported by the host.
- */
- vcpu->arch.guest_fpu.fpstate->user_xfeatures = vcpu->arch.guest_supported_xcr0 |
- XFEATURE_MASK_FPSSE;
-
kvm_update_pv_runtime(vcpu);
vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu);
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index dcd60b3..3e977db 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -2759,13 +2759,17 @@
{
u32 reg = kvm_lapic_get_reg(apic, lvt_type);
int vector, mode, trig_mode;
+ int r;
if (kvm_apic_hw_enabled(apic) && !(reg & APIC_LVT_MASKED)) {
vector = reg & APIC_VECTOR_MASK;
mode = reg & APIC_MODE_MASK;
trig_mode = reg & APIC_LVT_LEVEL_TRIGGER;
- return __apic_accept_irq(apic, mode, vector, 1, trig_mode,
- NULL);
+
+ r = __apic_accept_irq(apic, mode, vector, 1, trig_mode, NULL);
+ if (r && lvt_type == APIC_LVTPC)
+ kvm_lapic_set_reg(apic, APIC_LVTPC, reg | APIC_LVT_MASKED);
+ return r;
}
return 0;
}
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index edb89b5..9ae07db 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -93,14 +93,6 @@
#undef __KVM_X86_PMU_OP
}
-static void kvm_pmi_trigger_fn(struct irq_work *irq_work)
-{
- struct kvm_pmu *pmu = container_of(irq_work, struct kvm_pmu, irq_work);
- struct kvm_vcpu *vcpu = pmu_to_vcpu(pmu);
-
- kvm_pmu_deliver_pmi(vcpu);
-}
-
static inline void __kvm_perf_overflow(struct kvm_pmc *pmc, bool in_pmi)
{
struct kvm_pmu *pmu = pmc_to_pmu(pmc);
@@ -124,20 +116,7 @@
__set_bit(pmc->idx, (unsigned long *)&pmu->global_status);
}
- if (!pmc->intr || skip_pmi)
- return;
-
- /*
- * Inject PMI. If vcpu was in a guest mode during NMI PMI
- * can be ejected on a guest mode re-entry. Otherwise we can't
- * be sure that vcpu wasn't executing hlt instruction at the
- * time of vmexit and is not going to re-enter guest mode until
- * woken up. So we should wake it, but this is impossible from
- * NMI context. Do it from irq work instead.
- */
- if (in_pmi && !kvm_handling_nmi_from_guest(pmc->vcpu))
- irq_work_queue(&pmc_to_pmu(pmc)->irq_work);
- else
+ if (pmc->intr && !skip_pmi)
kvm_make_request(KVM_REQ_PMI, pmc->vcpu);
}
@@ -675,9 +654,6 @@
void kvm_pmu_reset(struct kvm_vcpu *vcpu)
{
- struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
-
- irq_work_sync(&pmu->irq_work);
static_call(kvm_x86_pmu_reset)(vcpu);
}
@@ -687,7 +663,6 @@
memset(pmu, 0, sizeof(*pmu));
static_call(kvm_x86_pmu_init)(vcpu);
- init_irq_work(&pmu->irq_work, kvm_pmi_trigger_fn);
pmu->event_count = 0;
pmu->need_cleanup = false;
kvm_pmu_refresh(vcpu);
diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h
index 7d9ba30..1d64113 100644
--- a/arch/x86/kvm/pmu.h
+++ b/arch/x86/kvm/pmu.h
@@ -74,6 +74,12 @@
return counter & pmc_bitmask(pmc);
}
+static inline void pmc_write_counter(struct kvm_pmc *pmc, u64 val)
+{
+ pmc->counter += val - pmc_read_counter(pmc);
+ pmc->counter &= pmc_bitmask(pmc);
+}
+
static inline void pmc_release_perf_event(struct kvm_pmc *pmc)
{
if (pmc->perf_event) {
diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
index 2092db8..4b74ea9 100644
--- a/arch/x86/kvm/svm/avic.c
+++ b/arch/x86/kvm/svm/avic.c
@@ -529,8 +529,11 @@
case AVIC_IPI_FAILURE_INVALID_BACKING_PAGE:
WARN_ONCE(1, "Invalid backing page\n");
break;
+ case AVIC_IPI_FAILURE_INVALID_IPI_VECTOR:
+ /* Invalid IPI with vector < 16 */
+ break;
default:
- pr_err("Unknown IPI interception\n");
+ vcpu_unimpl(vcpu, "Unknown avic incomplete IPI interception\n");
}
return 1;
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index dd496c9..3fea8c4 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -1253,6 +1253,9 @@
nested_svm_uninit_mmu_context(vcpu);
vmcb_mark_all_dirty(svm->vmcb);
+
+ if (kvm_apicv_activated(vcpu->kvm))
+ kvm_make_request(KVM_REQ_APICV_UPDATE, vcpu);
}
kvm_clear_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu);
diff --git a/arch/x86/kvm/svm/pmu.c b/arch/x86/kvm/svm/pmu.c
index cef5a3d..373ff6a 100644
--- a/arch/x86/kvm/svm/pmu.c
+++ b/arch/x86/kvm/svm/pmu.c
@@ -160,7 +160,7 @@
/* MSR_PERFCTRn */
pmc = get_gp_pmc_amd(pmu, msr, PMU_TYPE_COUNTER);
if (pmc) {
- pmc->counter += data - pmc_read_counter(pmc);
+ pmc_write_counter(pmc, data);
pmc_update_sample_period(pmc);
return 0;
}
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 9507df9..beea99c 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -691,7 +691,7 @@
*/
if (boot_cpu_has(X86_FEATURE_V_TSC_AUX)) {
struct sev_es_save_area *hostsa;
- u32 msr_hi;
+ u32 __maybe_unused msr_hi;
hostsa = (struct sev_es_save_area *)(page_address(sd->save_area) + 0x400);
@@ -913,8 +913,7 @@
if (intercept == svm->x2avic_msrs_intercepted)
return;
- if (!x2avic_enabled ||
- !apic_x2apic_mode(svm->vcpu.arch.apic))
+ if (!x2avic_enabled)
return;
for (i = 0; i < MAX_DIRECT_ACCESS_MSRS; i++) {
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index f2efa0b..820d3e1 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -436,11 +436,11 @@
if (!msr_info->host_initiated &&
!(msr & MSR_PMC_FULL_WIDTH_BIT))
data = (s64)(s32)data;
- pmc->counter += data - pmc_read_counter(pmc);
+ pmc_write_counter(pmc, data);
pmc_update_sample_period(pmc);
break;
} else if ((pmc = get_fixed_pmc(pmu, msr))) {
- pmc->counter += data - pmc_read_counter(pmc);
+ pmc_write_counter(pmc, data);
pmc_update_sample_period(pmc);
break;
} else if ((pmc = get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0))) {
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 9f18b06..41cce50 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5382,26 +5382,37 @@
return 0;
}
-static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
- struct kvm_xsave *guest_xsave)
-{
- if (fpstate_is_confidential(&vcpu->arch.guest_fpu))
- return;
-
- fpu_copy_guest_fpstate_to_uabi(&vcpu->arch.guest_fpu,
- guest_xsave->region,
- sizeof(guest_xsave->region),
- vcpu->arch.pkru);
-}
static void kvm_vcpu_ioctl_x86_get_xsave2(struct kvm_vcpu *vcpu,
u8 *state, unsigned int size)
{
+ /*
+ * Only copy state for features that are enabled for the guest. The
+ * state itself isn't problematic, but setting bits in the header for
+ * features that are supported in *this* host but not exposed to the
+ * guest can result in KVM_SET_XSAVE failing when live migrating to a
+ * compatible host without the features that are NOT exposed to the
+ * guest.
+ *
+ * FP+SSE can always be saved/restored via KVM_{G,S}ET_XSAVE, even if
+ * XSAVE/XCRO are not exposed to the guest, and even if XSAVE isn't
+ * supported by the host.
+ */
+ u64 supported_xcr0 = vcpu->arch.guest_supported_xcr0 |
+ XFEATURE_MASK_FPSSE;
+
if (fpstate_is_confidential(&vcpu->arch.guest_fpu))
return;
- fpu_copy_guest_fpstate_to_uabi(&vcpu->arch.guest_fpu,
- state, size, vcpu->arch.pkru);
+ fpu_copy_guest_fpstate_to_uabi(&vcpu->arch.guest_fpu, state, size,
+ supported_xcr0, vcpu->arch.pkru);
+}
+
+static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
+ struct kvm_xsave *guest_xsave)
+{
+ return kvm_vcpu_ioctl_x86_get_xsave2(vcpu, (void *)guest_xsave->region,
+ sizeof(guest_xsave->region));
}
static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
@@ -12843,6 +12854,9 @@
return true;
#endif
+ if (kvm_test_request(KVM_REQ_PMI, vcpu))
+ return true;
+
if (kvm_arch_interrupt_allowed(vcpu) &&
(kvm_cpu_has_interrupt(vcpu) ||
kvm_guest_apic_has_interrupt(vcpu)))
diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h
index bb3cb00..e748bc9 100644
--- a/include/kvm/arm_arch_timer.h
+++ b/include/kvm/arm_arch_timer.h
@@ -82,6 +82,8 @@
struct arch_timer_context *emul_ptimer;
};
+void get_timer_map(struct kvm_vcpu *vcpu, struct timer_map *map);
+
struct arch_timer_cpu {
struct arch_timer_context timers[NR_KVM_TIMERS];
@@ -145,4 +147,9 @@
void kvm_timer_cpu_up(void);
void kvm_timer_cpu_down(void);
+static inline bool has_cntpoff(void)
+{
+ return (has_vhe() && cpus_have_final_cap(ARM64_HAS_ECV_CNTPOFF));
+}
+
#endif
diff --git a/tools/testing/selftests/kvm/include/ucall_common.h b/tools/testing/selftests/kvm/include/ucall_common.h
index 112bc1d..ce33d30 100644
--- a/tools/testing/selftests/kvm/include/ucall_common.h
+++ b/tools/testing/selftests/kvm/include/ucall_common.h
@@ -1,7 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
- * tools/testing/selftests/kvm/include/kvm_util.h
- *
* Copyright (C) 2018, Google LLC.
*/
#ifndef SELFTEST_KVM_UCALL_COMMON_H
diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h
index 4fd0421..25bc61d 100644
--- a/tools/testing/selftests/kvm/include/x86_64/processor.h
+++ b/tools/testing/selftests/kvm/include/x86_64/processor.h
@@ -68,6 +68,12 @@
#define XFEATURE_MASK_OPMASK BIT_ULL(5)
#define XFEATURE_MASK_ZMM_Hi256 BIT_ULL(6)
#define XFEATURE_MASK_Hi16_ZMM BIT_ULL(7)
+#define XFEATURE_MASK_PT BIT_ULL(8)
+#define XFEATURE_MASK_PKRU BIT_ULL(9)
+#define XFEATURE_MASK_PASID BIT_ULL(10)
+#define XFEATURE_MASK_CET_USER BIT_ULL(11)
+#define XFEATURE_MASK_CET_KERNEL BIT_ULL(12)
+#define XFEATURE_MASK_LBR BIT_ULL(15)
#define XFEATURE_MASK_XTILE_CFG BIT_ULL(17)
#define XFEATURE_MASK_XTILE_DATA BIT_ULL(18)
@@ -147,6 +153,7 @@
#define X86_FEATURE_CLWB KVM_X86_CPU_FEATURE(0x7, 0, EBX, 24)
#define X86_FEATURE_UMIP KVM_X86_CPU_FEATURE(0x7, 0, ECX, 2)
#define X86_FEATURE_PKU KVM_X86_CPU_FEATURE(0x7, 0, ECX, 3)
+#define X86_FEATURE_OSPKE KVM_X86_CPU_FEATURE(0x7, 0, ECX, 4)
#define X86_FEATURE_LA57 KVM_X86_CPU_FEATURE(0x7, 0, ECX, 16)
#define X86_FEATURE_RDPID KVM_X86_CPU_FEATURE(0x7, 0, ECX, 22)
#define X86_FEATURE_SGX_LC KVM_X86_CPU_FEATURE(0x7, 0, ECX, 30)
@@ -553,6 +560,13 @@
__asm__ __volatile__("xsetbv" :: "a" (eax), "d" (edx), "c" (index));
}
+static inline void wrpkru(u32 pkru)
+{
+ /* Note, ECX and EDX are architecturally required to be '0'. */
+ asm volatile(".byte 0x0f,0x01,0xef\n\t"
+ : : "a" (pkru), "c"(0), "d"(0));
+}
+
static inline struct desc_ptr get_gdt(void)
{
struct desc_ptr gdt;
@@ -908,6 +922,15 @@
!kvm_cpu_has(feature.anti_feature);
}
+static __always_inline uint64_t kvm_cpu_supported_xcr0(void)
+{
+ if (!kvm_cpu_has_p(X86_PROPERTY_SUPPORTED_XCR0_LO))
+ return 0;
+
+ return kvm_cpu_property(X86_PROPERTY_SUPPORTED_XCR0_LO) |
+ ((uint64_t)kvm_cpu_property(X86_PROPERTY_SUPPORTED_XCR0_HI) << 32);
+}
+
static inline size_t kvm_cpuid2_size(int nr_entries)
{
return sizeof(struct kvm_cpuid2) +
diff --git a/tools/testing/selftests/kvm/lib/guest_sprintf.c b/tools/testing/selftests/kvm/lib/guest_sprintf.c
index c4a69d8..7462751 100644
--- a/tools/testing/selftests/kvm/lib/guest_sprintf.c
+++ b/tools/testing/selftests/kvm/lib/guest_sprintf.c
@@ -200,6 +200,13 @@
++fmt;
}
+ /*
+ * Play nice with %llu, %llx, etc. KVM selftests only support
+ * 64-bit builds, so just treat %ll* the same as %l*.
+ */
+ if (qualifier == 'l' && *fmt == 'l')
+ ++fmt;
+
/* default base */
base = 10;
diff --git a/tools/testing/selftests/kvm/lib/x86_64/apic.c b/tools/testing/selftests/kvm/lib/x86_64/apic.c
index 7168e25..89153a3 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/apic.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/apic.c
@@ -1,7 +1,5 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
- * tools/testing/selftests/kvm/lib/x86_64/processor.c
- *
* Copyright (C) 2021, Google LLC.
*/
diff --git a/tools/testing/selftests/kvm/memslot_perf_test.c b/tools/testing/selftests/kvm/memslot_perf_test.c
index 20eb2e7..8698d1a 100644
--- a/tools/testing/selftests/kvm/memslot_perf_test.c
+++ b/tools/testing/selftests/kvm/memslot_perf_test.c
@@ -1033,9 +1033,8 @@
struct test_result *rbestruntime)
{
uint64_t maxslots;
- struct test_result result;
+ struct test_result result = {};
- result.nloops = 0;
if (!test_execute(targs->nslots, &maxslots, targs->seconds, data,
&result.nloops,
&result.slot_runtime, &result.guest_runtime)) {
@@ -1089,7 +1088,7 @@
.seconds = 5,
.runs = 1,
};
- struct test_result rbestslottime;
+ struct test_result rbestslottime = {};
int tctr;
if (!check_memory_sizes())
@@ -1098,11 +1097,10 @@
if (!parse_args(argc, argv, &targs))
return -1;
- rbestslottime.slottimens = 0;
for (tctr = targs.tfirst; tctr <= targs.tlast; tctr++) {
const struct test_data *data = &tests[tctr];
unsigned int runctr;
- struct test_result rbestruntime;
+ struct test_result rbestruntime = {};
if (tctr > targs.tfirst)
pr_info("\n");
@@ -1110,7 +1108,6 @@
pr_info("Testing %s performance with %i runs, %d seconds each\n",
data->name, targs.runs, targs.seconds);
- rbestruntime.runtimens = 0;
for (runctr = 0; runctr < targs.runs; runctr++)
if (!test_loop(data, &targs,
&rbestslottime, &rbestruntime))
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c b/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c
index e446d76..6c12785 100644
--- a/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c
+++ b/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c
@@ -1,7 +1,5 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
- * KVM_GET/SET_* tests
- *
* Copyright (C) 2022, Red Hat, Inc.
*
* Tests for Hyper-V extensions to SVM.
diff --git a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c
index 7f36c32..18ac5c1 100644
--- a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c
+++ b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c
@@ -1,7 +1,5 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
- * tools/testing/selftests/kvm/nx_huge_page_test.c
- *
* Usage: to be run via nx_huge_page_test.sh, which does the necessary
* environment setup and teardown
*
diff --git a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.sh b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.sh
index 0560149..7cbb409 100755
--- a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.sh
+++ b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.sh
@@ -4,7 +4,6 @@
# Wrapper script which performs setup and cleanup for nx_huge_pages_test.
# Makes use of root privileges to set up huge pages and KVM module parameters.
#
-# tools/testing/selftests/kvm/nx_huge_page_test.sh
# Copyright (C) 2022, Google LLC.
set -e
diff --git a/tools/testing/selftests/kvm/x86_64/state_test.c b/tools/testing/selftests/kvm/x86_64/state_test.c
index 4c4925a..88b58aa 100644
--- a/tools/testing/selftests/kvm/x86_64/state_test.c
+++ b/tools/testing/selftests/kvm/x86_64/state_test.c
@@ -139,6 +139,83 @@
static void __attribute__((__flatten__)) guest_code(void *arg)
{
GUEST_SYNC(1);
+
+ if (this_cpu_has(X86_FEATURE_XSAVE)) {
+ uint64_t supported_xcr0 = this_cpu_supported_xcr0();
+ uint8_t buffer[4096];
+
+ memset(buffer, 0xcc, sizeof(buffer));
+
+ set_cr4(get_cr4() | X86_CR4_OSXSAVE);
+ GUEST_ASSERT(this_cpu_has(X86_FEATURE_OSXSAVE));
+
+ xsetbv(0, xgetbv(0) | supported_xcr0);
+
+ /*
+ * Modify state for all supported xfeatures to take them out of
+ * their "init" state, i.e. to make them show up in XSTATE_BV.
+ *
+ * Note off-by-default features, e.g. AMX, are out of scope for
+ * this particular testcase as they have a different ABI.
+ */
+ GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_FP);
+ asm volatile ("fincstp");
+
+ GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_SSE);
+ asm volatile ("vmovdqu %0, %%xmm0" :: "m" (buffer));
+
+ if (supported_xcr0 & XFEATURE_MASK_YMM)
+ asm volatile ("vmovdqu %0, %%ymm0" :: "m" (buffer));
+
+ if (supported_xcr0 & XFEATURE_MASK_AVX512) {
+ asm volatile ("kmovq %0, %%k1" :: "r" (-1ull));
+ asm volatile ("vmovupd %0, %%zmm0" :: "m" (buffer));
+ asm volatile ("vmovupd %0, %%zmm16" :: "m" (buffer));
+ }
+
+ if (this_cpu_has(X86_FEATURE_MPX)) {
+ uint64_t bounds[2] = { 10, 0xffffffffull };
+ uint64_t output[2] = { };
+
+ GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_BNDREGS);
+ GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_BNDCSR);
+
+ /*
+ * Don't bother trying to get BNDCSR into the INUSE
+ * state. MSR_IA32_BNDCFGS doesn't count as it isn't
+ * managed via XSAVE/XRSTOR, and BNDCFGU can only be
+ * modified by XRSTOR. Stuffing XSTATE_BV in the host
+ * is simpler than doing XRSTOR here in the guest.
+ *
+ * However, temporarily enable MPX in BNDCFGS so that
+ * BNDMOV actually loads BND1. If MPX isn't *fully*
+ * enabled, all MPX instructions are treated as NOPs.
+ *
+ * Hand encode "bndmov (%rax),%bnd1" as support for MPX
+ * mnemonics/registers has been removed from gcc and
+ * clang (and was never fully supported by clang).
+ */
+ wrmsr(MSR_IA32_BNDCFGS, BIT_ULL(0));
+ asm volatile (".byte 0x66,0x0f,0x1a,0x08" :: "a" (bounds));
+ /*
+ * Hand encode "bndmov %bnd1, (%rax)" to sanity check
+ * that BND1 actually got loaded.
+ */
+ asm volatile (".byte 0x66,0x0f,0x1b,0x08" :: "a" (output));
+ wrmsr(MSR_IA32_BNDCFGS, 0);
+
+ GUEST_ASSERT_EQ(bounds[0], output[0]);
+ GUEST_ASSERT_EQ(bounds[1], output[1]);
+ }
+ if (this_cpu_has(X86_FEATURE_PKU)) {
+ GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_PKRU);
+ set_cr4(get_cr4() | X86_CR4_PKE);
+ GUEST_ASSERT(this_cpu_has(X86_FEATURE_OSPKE));
+
+ wrpkru(-1u);
+ }
+ }
+
GUEST_SYNC(2);
if (arg) {
@@ -153,10 +230,11 @@
int main(int argc, char *argv[])
{
+ uint64_t *xstate_bv, saved_xstate_bv;
vm_vaddr_t nested_gva = 0;
-
+ struct kvm_cpuid2 empty_cpuid = {};
struct kvm_regs regs1, regs2;
- struct kvm_vcpu *vcpu;
+ struct kvm_vcpu *vcpu, *vcpuN;
struct kvm_vm *vm;
struct kvm_x86_state *state;
struct ucall uc;
@@ -209,6 +287,34 @@
/* Restore state in a new VM. */
vcpu = vm_recreate_with_one_vcpu(vm);
vcpu_load_state(vcpu, state);
+
+ /*
+ * Restore XSAVE state in a dummy vCPU, first without doing
+ * KVM_SET_CPUID2, and then with an empty guest CPUID. Except
+ * for off-by-default xfeatures, e.g. AMX, KVM is supposed to
+ * allow KVM_SET_XSAVE regardless of guest CPUID. Manually
+ * load only XSAVE state, MSRs in particular have a much more
+ * convoluted ABI.
+ *
+ * Load two versions of XSAVE state: one with the actual guest
+ * XSAVE state, and one with all supported features forced "on"
+ * in xstate_bv, e.g. to ensure that KVM allows loading all
+ * supported features, even if something goes awry in saving
+ * the original snapshot.
+ */
+ xstate_bv = (void *)&((uint8_t *)state->xsave->region)[512];
+ saved_xstate_bv = *xstate_bv;
+
+ vcpuN = __vm_vcpu_add(vm, vcpu->id + 1);
+ vcpu_xsave_set(vcpuN, state->xsave);
+ *xstate_bv = kvm_cpu_supported_xcr0();
+ vcpu_xsave_set(vcpuN, state->xsave);
+
+ vcpu_init_cpuid(vcpuN, &empty_cpuid);
+ vcpu_xsave_set(vcpuN, state->xsave);
+ *xstate_bv = saved_xstate_bv;
+ vcpu_xsave_set(vcpuN, state->xsave);
+
kvm_x86_state_cleanup(state);
memset(®s2, 0, sizeof(regs2));
diff --git a/tools/testing/selftests/kvm/x86_64/tsc_scaling_sync.c b/tools/testing/selftests/kvm/x86_64/tsc_scaling_sync.c
index 5b66981..59c7304 100644
--- a/tools/testing/selftests/kvm/x86_64/tsc_scaling_sync.c
+++ b/tools/testing/selftests/kvm/x86_64/tsc_scaling_sync.c
@@ -1,10 +1,6 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
- * svm_vmcall_test
- *
* Copyright © 2021 Amazon.com, Inc. or its affiliates.
- *
- * Xen shared_info / pvclock testing
*/
#include "test_util.h"
diff --git a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
index 05898ad..9ec9ab6 100644
--- a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
+++ b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
@@ -1,10 +1,6 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
- * svm_vmcall_test
- *
* Copyright © 2021 Amazon.com, Inc. or its affiliates.
- *
- * Xen shared_info / pvclock testing
*/
#include "test_util.h"