Merge git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull kvm fixes from Marcelo Tosatti.
* git://git.kernel.org/pub/scm/virt/kvm/kvm:
KVM: x86 emulator: use stack size attribute to mask rsp in stack ops
KVM: MMU: Fix mmu_shrink() so that it can free mmu pages as intended
ppc: e500_tlb memset clears nothing
KVM: PPC: Add cache flush on page map
KVM: PPC: Book3S HV: Fix incorrect branch in H_CEDE code
KVM: x86: update KVM_SAVE_MSRS_BEGIN to correct value
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 50ea12f..a8bf5c6 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -33,6 +33,7 @@
#include <asm/kvm_asm.h>
#include <asm/processor.h>
#include <asm/page.h>
+#include <asm/cacheflush.h>
#define KVM_MAX_VCPUS NR_CPUS
#define KVM_MAX_VCORES NR_CPUS
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 0124937..e006f0b 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -219,4 +219,16 @@
void kvmppc_free_lpid(long lpid);
void kvmppc_init_lpid(unsigned long nr_lpids);
+static inline void kvmppc_mmu_flush_icache(pfn_t pfn)
+{
+ /* Clear i-cache for new pages */
+ struct page *page;
+ page = pfn_to_page(pfn);
+ if (!test_bit(PG_arch_1, &page->flags)) {
+ flush_dcache_icache_page(page);
+ set_bit(PG_arch_1, &page->flags);
+ }
+}
+
+
#endif /* __POWERPC_KVM_PPC_H__ */
diff --git a/arch/powerpc/kvm/book3s_32_mmu_host.c b/arch/powerpc/kvm/book3s_32_mmu_host.c
index f922c29..837f13e 100644
--- a/arch/powerpc/kvm/book3s_32_mmu_host.c
+++ b/arch/powerpc/kvm/book3s_32_mmu_host.c
@@ -211,6 +211,9 @@
pteg1 |= PP_RWRX;
}
+ if (orig_pte->may_execute)
+ kvmppc_mmu_flush_icache(hpaddr >> PAGE_SHIFT);
+
local_irq_disable();
if (pteg[rr]) {
diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c b/arch/powerpc/kvm/book3s_64_mmu_host.c
index 10fc8ec..0688b6b 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_host.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_host.c
@@ -126,6 +126,8 @@
if (!orig_pte->may_execute)
rflags |= HPTE_R_N;
+ else
+ kvmppc_mmu_flush_icache(hpaddr >> PAGE_SHIFT);
hash = hpt_hash(va, PTE_SIZE, MMU_SEGSIZE_256M);
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 5a84c8d..44b72fe 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -1421,13 +1421,13 @@
sync /* order setting ceded vs. testing prodded */
lbz r5,VCPU_PRODDED(r3)
cmpwi r5,0
- bne 1f
+ bne kvm_cede_prodded
li r0,0 /* set trap to 0 to say hcall is handled */
stw r0,VCPU_TRAP(r3)
li r0,H_SUCCESS
std r0,VCPU_GPR(R3)(r3)
BEGIN_FTR_SECTION
- b 2f /* just send it up to host on 970 */
+ b kvm_cede_exit /* just send it up to host on 970 */
END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_206)
/*
@@ -1446,7 +1446,7 @@
or r4,r4,r0
PPC_POPCNTW(R7,R4)
cmpw r7,r8
- bge 2f
+ bge kvm_cede_exit
stwcx. r4,0,r6
bne 31b
li r0,1
@@ -1555,7 +1555,8 @@
b hcall_real_fallback
/* cede when already previously prodded case */
-1: li r0,0
+kvm_cede_prodded:
+ li r0,0
stb r0,VCPU_PRODDED(r3)
sync /* order testing prodded vs. clearing ceded */
stb r0,VCPU_CEDED(r3)
@@ -1563,7 +1564,8 @@
blr
/* we've ceded but we want to give control to the host */
-2: li r3,H_TOO_HARD
+kvm_cede_exit:
+ li r3,H_TOO_HARD
blr
secondary_too_late:
diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c
index c510fc9..a2b6671 100644
--- a/arch/powerpc/kvm/e500_tlb.c
+++ b/arch/powerpc/kvm/e500_tlb.c
@@ -322,11 +322,11 @@
static void clear_tlb1_bitmap(struct kvmppc_vcpu_e500 *vcpu_e500)
{
if (vcpu_e500->g2h_tlb1_map)
- memset(vcpu_e500->g2h_tlb1_map,
- sizeof(u64) * vcpu_e500->gtlb_params[1].entries, 0);
+ memset(vcpu_e500->g2h_tlb1_map, 0,
+ sizeof(u64) * vcpu_e500->gtlb_params[1].entries);
if (vcpu_e500->h2g_tlb1_rmap)
- memset(vcpu_e500->h2g_tlb1_rmap,
- sizeof(unsigned int) * host_tlb_params[1].entries, 0);
+ memset(vcpu_e500->h2g_tlb1_rmap, 0,
+ sizeof(unsigned int) * host_tlb_params[1].entries);
}
static void clear_tlb_privs(struct kvmppc_vcpu_e500 *vcpu_e500)
@@ -539,6 +539,9 @@
kvmppc_e500_setup_stlbe(&vcpu_e500->vcpu, gtlbe, tsize,
ref, gvaddr, stlbe);
+
+ /* Clear i-cache for new pages */
+ kvmppc_mmu_flush_icache(pfn);
}
/* XXX only map the one-one case, for now use TLB0 */
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index baaafde..fbdad0e 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -469,6 +469,7 @@
__flush_dcache_icache_phys(page_to_pfn(page) << PAGE_SHIFT);
#endif
}
+EXPORT_SYMBOL(flush_dcache_icache_page);
void clear_user_page(void *page, unsigned long vaddr, struct page *pg)
{
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 97d9a99..a3b57a2 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -475,13 +475,26 @@
return address_mask(ctxt, reg);
}
+static void masked_increment(ulong *reg, ulong mask, int inc)
+{
+ assign_masked(reg, *reg + inc, mask);
+}
+
static inline void
register_address_increment(struct x86_emulate_ctxt *ctxt, unsigned long *reg, int inc)
{
+ ulong mask;
+
if (ctxt->ad_bytes == sizeof(unsigned long))
- *reg += inc;
+ mask = ~0UL;
else
- *reg = (*reg & ~ad_mask(ctxt)) | ((*reg + inc) & ad_mask(ctxt));
+ mask = ad_mask(ctxt);
+ masked_increment(reg, mask, inc);
+}
+
+static void rsp_increment(struct x86_emulate_ctxt *ctxt, int inc)
+{
+ masked_increment(&ctxt->regs[VCPU_REGS_RSP], stack_mask(ctxt), inc);
}
static inline void jmp_rel(struct x86_emulate_ctxt *ctxt, int rel)
@@ -1522,8 +1535,8 @@
{
struct segmented_address addr;
- register_address_increment(ctxt, &ctxt->regs[VCPU_REGS_RSP], -bytes);
- addr.ea = register_address(ctxt, ctxt->regs[VCPU_REGS_RSP]);
+ rsp_increment(ctxt, -bytes);
+ addr.ea = ctxt->regs[VCPU_REGS_RSP] & stack_mask(ctxt);
addr.seg = VCPU_SREG_SS;
return segmented_write(ctxt, addr, data, bytes);
@@ -1542,13 +1555,13 @@
int rc;
struct segmented_address addr;
- addr.ea = register_address(ctxt, ctxt->regs[VCPU_REGS_RSP]);
+ addr.ea = ctxt->regs[VCPU_REGS_RSP] & stack_mask(ctxt);
addr.seg = VCPU_SREG_SS;
rc = segmented_read(ctxt, addr, dest, len);
if (rc != X86EMUL_CONTINUE)
return rc;
- register_address_increment(ctxt, &ctxt->regs[VCPU_REGS_RSP], len);
+ rsp_increment(ctxt, len);
return rc;
}
@@ -1688,8 +1701,7 @@
while (reg >= VCPU_REGS_RAX) {
if (reg == VCPU_REGS_RSP) {
- register_address_increment(ctxt, &ctxt->regs[VCPU_REGS_RSP],
- ctxt->op_bytes);
+ rsp_increment(ctxt, ctxt->op_bytes);
--reg;
}
@@ -2825,7 +2837,7 @@
rc = emulate_pop(ctxt, &ctxt->dst.val, ctxt->op_bytes);
if (rc != X86EMUL_CONTINUE)
return rc;
- register_address_increment(ctxt, &ctxt->regs[VCPU_REGS_RSP], ctxt->src.val);
+ rsp_increment(ctxt, ctxt->src.val);
return X86EMUL_CONTINUE;
}
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 01ca004..7fbd0d2 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -4113,16 +4113,21 @@
LIST_HEAD(invalid_list);
/*
+ * Never scan more than sc->nr_to_scan VM instances.
+ * Will not hit this condition practically since we do not try
+ * to shrink more than one VM and it is very unlikely to see
+ * !n_used_mmu_pages so many times.
+ */
+ if (!nr_to_scan--)
+ break;
+ /*
* n_used_mmu_pages is accessed without holding kvm->mmu_lock
* here. We may skip a VM instance errorneosly, but we do not
* want to shrink a VM that only started to populate its MMU
* anyway.
*/
- if (kvm->arch.n_used_mmu_pages > 0) {
- if (!nr_to_scan--)
- break;
+ if (!kvm->arch.n_used_mmu_pages)
continue;
- }
idx = srcu_read_lock(&kvm->srcu);
spin_lock(&kvm->mmu_lock);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 42bce48..dce75b76 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -806,7 +806,7 @@
* kvm-specific. Those are put in the beginning of the list.
*/
-#define KVM_SAVE_MSRS_BEGIN 9
+#define KVM_SAVE_MSRS_BEGIN 10
static u32 msrs_to_save[] = {
MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,