blob: 29f3d0eed9256c2cd55ef421780a1c204f5558f3 [file] [log] [blame]
/*
* All test cases of nested virtualization should be in this file
*
* Author : Arthur Chunqi Li <yzt356@gmail.com>
*/
#include <asm/debugreg.h>
#include "vmx.h"
#include "msr.h"
#include "processor.h"
#include "vm.h"
#include "pci.h"
#include "fwcfg.h"
#include "isr.h"
#include "desc.h"
#include "apic.h"
#include "types.h"
#include "vmalloc.h"
#include "alloc_page.h"
#include "smp.h"
#include "delay.h"
#define NONCANONICAL 0xaaaaaaaaaaaaaaaaull
#define VPID_CAP_INVVPID_TYPES_SHIFT 40
u64 ia32_pat;
u64 ia32_efer;
void *io_bitmap_a, *io_bitmap_b;
u16 ioport;
unsigned long *pml4;
u64 eptp;
void *data_page1, *data_page2;
phys_addr_t pci_physaddr;
void *pml_log;
#define PML_INDEX 512
static inline unsigned ffs(unsigned x)
{
int pos = -1;
__asm__ __volatile__("bsf %1, %%eax; cmovnz %%eax, %0"
: "+r"(pos) : "rm"(x) : "eax");
return pos + 1;
}
static inline void vmcall(void)
{
asm volatile("vmcall");
}
static void basic_guest_main(void)
{
report(1, "Basic VMX test");
}
static int basic_exit_handler(union exit_reason exit_reason)
{
report(0, "Basic VMX test");
print_vmexit_info(exit_reason);
return VMX_TEST_EXIT;
}
static void vmenter_main(void)
{
u64 rax;
u64 rsp, resume_rsp;
report(1, "test vmlaunch");
asm volatile(
"mov %%rsp, %0\n\t"
"mov %3, %%rax\n\t"
"vmcall\n\t"
"mov %%rax, %1\n\t"
"mov %%rsp, %2\n\t"
: "=r"(rsp), "=r"(rax), "=r"(resume_rsp)
: "g"(0xABCD));
report((rax == 0xFFFF) && (rsp == resume_rsp), "test vmresume");
}
static int vmenter_exit_handler(union exit_reason exit_reason)
{
u64 guest_rip = vmcs_read(GUEST_RIP);
switch (exit_reason.basic) {
case VMX_VMCALL:
if (regs.rax != 0xABCD) {
report(0, "test vmresume");
return VMX_TEST_VMEXIT;
}
regs.rax = 0xFFFF;
vmcs_write(GUEST_RIP, guest_rip + 3);
return VMX_TEST_RESUME;
default:
report(0, "test vmresume");
print_vmexit_info(exit_reason);
}
return VMX_TEST_VMEXIT;
}
u32 preempt_scale;
volatile unsigned long long tsc_val;
volatile u32 preempt_val;
u64 saved_rip;
static int preemption_timer_init(struct vmcs *vmcs)
{
if (!(ctrl_pin_rev.clr & PIN_PREEMPT)) {
printf("\tPreemption timer is not supported\n");
return VMX_TEST_EXIT;
}
vmcs_write(PIN_CONTROLS, vmcs_read(PIN_CONTROLS) | PIN_PREEMPT);
preempt_val = 10000000;
vmcs_write(PREEMPT_TIMER_VALUE, preempt_val);
preempt_scale = rdmsr(MSR_IA32_VMX_MISC) & 0x1F;
if (!(ctrl_exit_rev.clr & EXI_SAVE_PREEMPT))
printf("\tSave preemption value is not supported\n");
return VMX_TEST_START;
}
static void preemption_timer_main(void)
{
tsc_val = rdtsc();
if (ctrl_exit_rev.clr & EXI_SAVE_PREEMPT) {
vmx_set_test_stage(0);
vmcall();
if (vmx_get_test_stage() == 1)
vmcall();
}
vmx_set_test_stage(1);
while (vmx_get_test_stage() == 1) {
if (((rdtsc() - tsc_val) >> preempt_scale)
> 10 * preempt_val) {
vmx_set_test_stage(2);
vmcall();
}
}
tsc_val = rdtsc();
asm volatile ("hlt");
vmcall();
vmx_set_test_stage(5);
vmcall();
}
static int preemption_timer_exit_handler(union exit_reason exit_reason)
{
bool guest_halted;
u64 guest_rip;
u32 insn_len;
u32 ctrl_exit;
guest_rip = vmcs_read(GUEST_RIP);
insn_len = vmcs_read(EXI_INST_LEN);
switch (exit_reason.basic) {
case VMX_PREEMPT:
switch (vmx_get_test_stage()) {
case 1:
case 2:
report(((rdtsc() - tsc_val) >> preempt_scale) >= preempt_val,
"busy-wait for preemption timer");
vmx_set_test_stage(3);
vmcs_write(PREEMPT_TIMER_VALUE, preempt_val);
return VMX_TEST_RESUME;
case 3:
guest_halted =
(vmcs_read(GUEST_ACTV_STATE) == ACTV_HLT);
report(((rdtsc() - tsc_val) >> preempt_scale) >= preempt_val
&& guest_halted,
"preemption timer during hlt");
vmx_set_test_stage(4);
vmcs_write(PIN_CONTROLS,
vmcs_read(PIN_CONTROLS) & ~PIN_PREEMPT);
vmcs_write(EXI_CONTROLS,
vmcs_read(EXI_CONTROLS) & ~EXI_SAVE_PREEMPT);
vmcs_write(GUEST_ACTV_STATE, ACTV_ACTIVE);
return VMX_TEST_RESUME;
case 4:
report(saved_rip == guest_rip,
"preemption timer with 0 value");
break;
default:
report(false, "Invalid stage.");
print_vmexit_info(exit_reason);
break;
}
break;
case VMX_VMCALL:
vmcs_write(GUEST_RIP, guest_rip + insn_len);
switch (vmx_get_test_stage()) {
case 0:
report(vmcs_read(PREEMPT_TIMER_VALUE) == preempt_val,
"Keep preemption value");
vmx_set_test_stage(1);
vmcs_write(PREEMPT_TIMER_VALUE, preempt_val);
ctrl_exit = (vmcs_read(EXI_CONTROLS) |
EXI_SAVE_PREEMPT) & ctrl_exit_rev.clr;
vmcs_write(EXI_CONTROLS, ctrl_exit);
return VMX_TEST_RESUME;
case 1:
report(vmcs_read(PREEMPT_TIMER_VALUE) < preempt_val,
"Save preemption value");
return VMX_TEST_RESUME;
case 2:
report(0, "busy-wait for preemption timer");
vmx_set_test_stage(3);
vmcs_write(PREEMPT_TIMER_VALUE, preempt_val);
return VMX_TEST_RESUME;
case 3:
report(0, "preemption timer during hlt");
vmx_set_test_stage(4);
/* fall through */
case 4:
vmcs_write(PIN_CONTROLS,
vmcs_read(PIN_CONTROLS) | PIN_PREEMPT);
vmcs_write(PREEMPT_TIMER_VALUE, 0);
saved_rip = guest_rip + insn_len;
return VMX_TEST_RESUME;
case 5:
report(0,
"preemption timer with 0 value (vmcall stage 5)");
break;
default:
// Should not reach here
report(false, "unexpected stage, %d",
vmx_get_test_stage());
print_vmexit_info(exit_reason);
return VMX_TEST_VMEXIT;
}
break;
default:
report(false, "Unknown exit reason, 0x%x", exit_reason.full);
print_vmexit_info(exit_reason);
}
vmcs_write(PIN_CONTROLS, vmcs_read(PIN_CONTROLS) & ~PIN_PREEMPT);
return VMX_TEST_VMEXIT;
}
static void msr_bmp_init(void)
{
void *msr_bitmap;
u32 ctrl_cpu0;
msr_bitmap = alloc_page();
ctrl_cpu0 = vmcs_read(CPU_EXEC_CTRL0);
ctrl_cpu0 |= CPU_MSR_BITMAP;
vmcs_write(CPU_EXEC_CTRL0, ctrl_cpu0);
vmcs_write(MSR_BITMAP, (u64)msr_bitmap);
}
static void *get_msr_bitmap(void)
{
void *msr_bitmap;
if (vmcs_read(CPU_EXEC_CTRL0) & CPU_MSR_BITMAP) {
msr_bitmap = (void *)vmcs_read(MSR_BITMAP);
} else {
msr_bitmap = alloc_page();
memset(msr_bitmap, 0xff, PAGE_SIZE);
vmcs_write(MSR_BITMAP, (u64)msr_bitmap);
vmcs_set_bits(CPU_EXEC_CTRL0, CPU_MSR_BITMAP);
}
return msr_bitmap;
}
static void disable_intercept_for_x2apic_msrs(void)
{
unsigned long *msr_bitmap = (unsigned long *)get_msr_bitmap();
u32 msr;
for (msr = APIC_BASE_MSR;
msr < (APIC_BASE_MSR+0xff);
msr += BITS_PER_LONG) {
unsigned int word = msr / BITS_PER_LONG;
msr_bitmap[word] = 0;
msr_bitmap[word + (0x800 / sizeof(long))] = 0;
}
}
static int test_ctrl_pat_init(struct vmcs *vmcs)
{
u64 ctrl_ent;
u64 ctrl_exi;
msr_bmp_init();
if (!(ctrl_exit_rev.clr & EXI_SAVE_PAT) &&
!(ctrl_exit_rev.clr & EXI_LOAD_PAT) &&
!(ctrl_enter_rev.clr & ENT_LOAD_PAT)) {
printf("\tSave/load PAT is not supported\n");
return 1;
}
ctrl_ent = vmcs_read(ENT_CONTROLS);
ctrl_exi = vmcs_read(EXI_CONTROLS);
ctrl_ent |= ctrl_enter_rev.clr & ENT_LOAD_PAT;
ctrl_exi |= ctrl_exit_rev.clr & (EXI_SAVE_PAT | EXI_LOAD_PAT);
vmcs_write(ENT_CONTROLS, ctrl_ent);
vmcs_write(EXI_CONTROLS, ctrl_exi);
ia32_pat = rdmsr(MSR_IA32_CR_PAT);
vmcs_write(GUEST_PAT, 0x0);
vmcs_write(HOST_PAT, ia32_pat);
return VMX_TEST_START;
}
static void test_ctrl_pat_main(void)
{
u64 guest_ia32_pat;
guest_ia32_pat = rdmsr(MSR_IA32_CR_PAT);
if (!(ctrl_enter_rev.clr & ENT_LOAD_PAT))
printf("\tENT_LOAD_PAT is not supported.\n");
else {
if (guest_ia32_pat != 0) {
report(0, "Entry load PAT");
return;
}
}
wrmsr(MSR_IA32_CR_PAT, 0x6);
vmcall();
guest_ia32_pat = rdmsr(MSR_IA32_CR_PAT);
if (ctrl_enter_rev.clr & ENT_LOAD_PAT)
report(guest_ia32_pat == ia32_pat, "Entry load PAT");
}
static int test_ctrl_pat_exit_handler(union exit_reason exit_reason)
{
u64 guest_rip;
u64 guest_pat;
guest_rip = vmcs_read(GUEST_RIP);
switch (exit_reason.basic) {
case VMX_VMCALL:
guest_pat = vmcs_read(GUEST_PAT);
if (!(ctrl_exit_rev.clr & EXI_SAVE_PAT)) {
printf("\tEXI_SAVE_PAT is not supported\n");
vmcs_write(GUEST_PAT, 0x6);
} else {
report(guest_pat == 0x6, "Exit save PAT");
}
if (!(ctrl_exit_rev.clr & EXI_LOAD_PAT))
printf("\tEXI_LOAD_PAT is not supported\n");
else
report(rdmsr(MSR_IA32_CR_PAT) == ia32_pat,
"Exit load PAT");
vmcs_write(GUEST_PAT, ia32_pat);
vmcs_write(GUEST_RIP, guest_rip + 3);
return VMX_TEST_RESUME;
default:
printf("ERROR : Unknown exit reason, 0x%x.\n", exit_reason.full);
break;
}
return VMX_TEST_VMEXIT;
}
static int test_ctrl_efer_init(struct vmcs *vmcs)
{
u64 ctrl_ent;
u64 ctrl_exi;
msr_bmp_init();
ctrl_ent = vmcs_read(ENT_CONTROLS) | ENT_LOAD_EFER;
ctrl_exi = vmcs_read(EXI_CONTROLS) | EXI_SAVE_EFER | EXI_LOAD_EFER;
vmcs_write(ENT_CONTROLS, ctrl_ent & ctrl_enter_rev.clr);
vmcs_write(EXI_CONTROLS, ctrl_exi & ctrl_exit_rev.clr);
ia32_efer = rdmsr(MSR_EFER);
vmcs_write(GUEST_EFER, ia32_efer ^ EFER_NX);
vmcs_write(HOST_EFER, ia32_efer ^ EFER_NX);
return VMX_TEST_START;
}
static void test_ctrl_efer_main(void)
{
u64 guest_ia32_efer;
guest_ia32_efer = rdmsr(MSR_EFER);
if (!(ctrl_enter_rev.clr & ENT_LOAD_EFER))
printf("\tENT_LOAD_EFER is not supported.\n");
else {
if (guest_ia32_efer != (ia32_efer ^ EFER_NX)) {
report(0, "Entry load EFER");
return;
}
}
wrmsr(MSR_EFER, ia32_efer);
vmcall();
guest_ia32_efer = rdmsr(MSR_EFER);
if (ctrl_enter_rev.clr & ENT_LOAD_EFER)
report(guest_ia32_efer == ia32_efer, "Entry load EFER");
}
static int test_ctrl_efer_exit_handler(union exit_reason exit_reason)
{
u64 guest_rip;
u64 guest_efer;
guest_rip = vmcs_read(GUEST_RIP);
switch (exit_reason.basic) {
case VMX_VMCALL:
guest_efer = vmcs_read(GUEST_EFER);
if (!(ctrl_exit_rev.clr & EXI_SAVE_EFER)) {
printf("\tEXI_SAVE_EFER is not supported\n");
vmcs_write(GUEST_EFER, ia32_efer);
} else {
report(guest_efer == ia32_efer, "Exit save EFER");
}
if (!(ctrl_exit_rev.clr & EXI_LOAD_EFER)) {
printf("\tEXI_LOAD_EFER is not supported\n");
wrmsr(MSR_EFER, ia32_efer ^ EFER_NX);
} else {
report(rdmsr(MSR_EFER) == (ia32_efer ^ EFER_NX),
"Exit load EFER");
}
vmcs_write(GUEST_PAT, ia32_efer);
vmcs_write(GUEST_RIP, guest_rip + 3);
return VMX_TEST_RESUME;
default:
printf("ERROR : Unknown exit reason, 0x%x.\n", exit_reason.full);
break;
}
return VMX_TEST_VMEXIT;
}
u32 guest_cr0, guest_cr4;
static void cr_shadowing_main(void)
{
u32 cr0, cr4, tmp;
// Test read through
vmx_set_test_stage(0);
guest_cr0 = read_cr0();
if (vmx_get_test_stage() == 1)
report(0, "Read through CR0");
else
vmcall();
vmx_set_test_stage(1);
guest_cr4 = read_cr4();
if (vmx_get_test_stage() == 2)
report(0, "Read through CR4");
else
vmcall();
// Test write through
guest_cr0 = guest_cr0 ^ (X86_CR0_TS | X86_CR0_MP);
guest_cr4 = guest_cr4 ^ (X86_CR4_TSD | X86_CR4_DE);
vmx_set_test_stage(2);
write_cr0(guest_cr0);
if (vmx_get_test_stage() == 3)
report(0, "Write throuth CR0");
else
vmcall();
vmx_set_test_stage(3);
write_cr4(guest_cr4);
if (vmx_get_test_stage() == 4)
report(0, "Write through CR4");
else
vmcall();
// Test read shadow
vmx_set_test_stage(4);
vmcall();
cr0 = read_cr0();
if (vmx_get_test_stage() != 5)
report(cr0 == guest_cr0, "Read shadowing CR0");
vmx_set_test_stage(5);
cr4 = read_cr4();
if (vmx_get_test_stage() != 6)
report(cr4 == guest_cr4, "Read shadowing CR4");
// Test write shadow (same value with shadow)
vmx_set_test_stage(6);
write_cr0(guest_cr0);
if (vmx_get_test_stage() == 7)
report(0, "Write shadowing CR0 (same value with shadow)");
else
vmcall();
vmx_set_test_stage(7);
write_cr4(guest_cr4);
if (vmx_get_test_stage() == 8)
report(0, "Write shadowing CR4 (same value with shadow)");
else
vmcall();
// Test write shadow (different value)
vmx_set_test_stage(8);
tmp = guest_cr0 ^ X86_CR0_TS;
asm volatile("mov %0, %%rsi\n\t"
"mov %%rsi, %%cr0\n\t"
::"m"(tmp)
:"rsi", "memory", "cc");
report(vmx_get_test_stage() == 9,
"Write shadowing different X86_CR0_TS");
vmx_set_test_stage(9);
tmp = guest_cr0 ^ X86_CR0_MP;
asm volatile("mov %0, %%rsi\n\t"
"mov %%rsi, %%cr0\n\t"
::"m"(tmp)
:"rsi", "memory", "cc");
report(vmx_get_test_stage() == 10,
"Write shadowing different X86_CR0_MP");
vmx_set_test_stage(10);
tmp = guest_cr4 ^ X86_CR4_TSD;
asm volatile("mov %0, %%rsi\n\t"
"mov %%rsi, %%cr4\n\t"
::"m"(tmp)
:"rsi", "memory", "cc");
report(vmx_get_test_stage() == 11,
"Write shadowing different X86_CR4_TSD");
vmx_set_test_stage(11);
tmp = guest_cr4 ^ X86_CR4_DE;
asm volatile("mov %0, %%rsi\n\t"
"mov %%rsi, %%cr4\n\t"
::"m"(tmp)
:"rsi", "memory", "cc");
report(vmx_get_test_stage() == 12,
"Write shadowing different X86_CR4_DE");
}
static int cr_shadowing_exit_handler(union exit_reason exit_reason)
{
u64 guest_rip;
u32 insn_len;
u32 exit_qual;
guest_rip = vmcs_read(GUEST_RIP);
insn_len = vmcs_read(EXI_INST_LEN);
exit_qual = vmcs_read(EXI_QUALIFICATION);
switch (exit_reason.basic) {
case VMX_VMCALL:
switch (vmx_get_test_stage()) {
case 0:
report(guest_cr0 == vmcs_read(GUEST_CR0),
"Read through CR0");
break;
case 1:
report(guest_cr4 == vmcs_read(GUEST_CR4),
"Read through CR4");
break;
case 2:
report(guest_cr0 == vmcs_read(GUEST_CR0),
"Write through CR0");
break;
case 3:
report(guest_cr4 == vmcs_read(GUEST_CR4),
"Write through CR4");
break;
case 4:
guest_cr0 = vmcs_read(GUEST_CR0) ^ (X86_CR0_TS | X86_CR0_MP);
guest_cr4 = vmcs_read(GUEST_CR4) ^ (X86_CR4_TSD | X86_CR4_DE);
vmcs_write(CR0_MASK, X86_CR0_TS | X86_CR0_MP);
vmcs_write(CR0_READ_SHADOW, guest_cr0 & (X86_CR0_TS | X86_CR0_MP));
vmcs_write(CR4_MASK, X86_CR4_TSD | X86_CR4_DE);
vmcs_write(CR4_READ_SHADOW, guest_cr4 & (X86_CR4_TSD | X86_CR4_DE));
break;
case 6:
report(guest_cr0 == (vmcs_read(GUEST_CR0) ^ (X86_CR0_TS | X86_CR0_MP)),
"Write shadowing CR0 (same value)");
break;
case 7:
report(guest_cr4 == (vmcs_read(GUEST_CR4) ^ (X86_CR4_TSD | X86_CR4_DE)),
"Write shadowing CR4 (same value)");
break;
default:
// Should not reach here
report(false, "unexpected stage, %d",
vmx_get_test_stage());
print_vmexit_info(exit_reason);
return VMX_TEST_VMEXIT;
}
vmcs_write(GUEST_RIP, guest_rip + insn_len);
return VMX_TEST_RESUME;
case VMX_CR:
switch (vmx_get_test_stage()) {
case 4:
report(0, "Read shadowing CR0");
vmx_inc_test_stage();
break;
case 5:
report(0, "Read shadowing CR4");
vmx_inc_test_stage();
break;
case 6:
report(0, "Write shadowing CR0 (same value)");
vmx_inc_test_stage();
break;
case 7:
report(0, "Write shadowing CR4 (same value)");
vmx_inc_test_stage();
break;
case 8:
case 9:
// 0x600 encodes "mov %esi, %cr0"
if (exit_qual == 0x600)
vmx_inc_test_stage();
break;
case 10:
case 11:
// 0x604 encodes "mov %esi, %cr4"
if (exit_qual == 0x604)
vmx_inc_test_stage();
break;
default:
// Should not reach here
report(false, "unexpected stage, %d",
vmx_get_test_stage());
print_vmexit_info(exit_reason);
return VMX_TEST_VMEXIT;
}
vmcs_write(GUEST_RIP, guest_rip + insn_len);
return VMX_TEST_RESUME;
default:
report(false, "Unknown exit reason, 0x%x", exit_reason.full);
print_vmexit_info(exit_reason);
}
return VMX_TEST_VMEXIT;
}
static int iobmp_init(struct vmcs *vmcs)
{
u32 ctrl_cpu0;
io_bitmap_a = alloc_page();
io_bitmap_b = alloc_page();
ctrl_cpu0 = vmcs_read(CPU_EXEC_CTRL0);
ctrl_cpu0 |= CPU_IO_BITMAP;
ctrl_cpu0 &= (~CPU_IO);
vmcs_write(CPU_EXEC_CTRL0, ctrl_cpu0);
vmcs_write(IO_BITMAP_A, (u64)io_bitmap_a);
vmcs_write(IO_BITMAP_B, (u64)io_bitmap_b);
return VMX_TEST_START;
}
static void iobmp_main(void)
{
// stage 0, test IO pass
vmx_set_test_stage(0);
inb(0x5000);
outb(0x0, 0x5000);
report(vmx_get_test_stage() == 0, "I/O bitmap - I/O pass");
// test IO width, in/out
((u8 *)io_bitmap_a)[0] = 0xFF;
vmx_set_test_stage(2);
inb(0x0);
report(vmx_get_test_stage() == 3, "I/O bitmap - trap in");
vmx_set_test_stage(3);
outw(0x0, 0x0);
report(vmx_get_test_stage() == 4, "I/O bitmap - trap out");
vmx_set_test_stage(4);
inl(0x0);
report(vmx_get_test_stage() == 5, "I/O bitmap - I/O width, long");
// test low/high IO port
vmx_set_test_stage(5);
((u8 *)io_bitmap_a)[0x5000 / 8] = (1 << (0x5000 % 8));
inb(0x5000);
report(vmx_get_test_stage() == 6, "I/O bitmap - I/O port, low part");
vmx_set_test_stage(6);
((u8 *)io_bitmap_b)[0x1000 / 8] = (1 << (0x1000 % 8));
inb(0x9000);
report(vmx_get_test_stage() == 7, "I/O bitmap - I/O port, high part");
// test partial pass
vmx_set_test_stage(7);
inl(0x4FFF);
report(vmx_get_test_stage() == 8, "I/O bitmap - partial pass");
// test overrun
vmx_set_test_stage(8);
memset(io_bitmap_a, 0x0, PAGE_SIZE);
memset(io_bitmap_b, 0x0, PAGE_SIZE);
inl(0xFFFF);
report(vmx_get_test_stage() == 9, "I/O bitmap - overrun");
vmx_set_test_stage(9);
vmcall();
outb(0x0, 0x0);
report(vmx_get_test_stage() == 9,
"I/O bitmap - ignore unconditional exiting");
vmx_set_test_stage(10);
vmcall();
outb(0x0, 0x0);
report(vmx_get_test_stage() == 11,
"I/O bitmap - unconditional exiting");
}
static int iobmp_exit_handler(union exit_reason exit_reason)
{
u64 guest_rip;
ulong exit_qual;
u32 insn_len, ctrl_cpu0;
guest_rip = vmcs_read(GUEST_RIP);
exit_qual = vmcs_read(EXI_QUALIFICATION);
insn_len = vmcs_read(EXI_INST_LEN);
switch (exit_reason.basic) {
case VMX_IO:
switch (vmx_get_test_stage()) {
case 0:
case 1:
vmx_inc_test_stage();
break;
case 2:
report((exit_qual & VMX_IO_SIZE_MASK) == _VMX_IO_BYTE,
"I/O bitmap - I/O width, byte");
report(exit_qual & VMX_IO_IN,
"I/O bitmap - I/O direction, in");
vmx_inc_test_stage();
break;
case 3:
report((exit_qual & VMX_IO_SIZE_MASK) == _VMX_IO_WORD,
"I/O bitmap - I/O width, word");
report(!(exit_qual & VMX_IO_IN),
"I/O bitmap - I/O direction, out");
vmx_inc_test_stage();
break;
case 4:
report((exit_qual & VMX_IO_SIZE_MASK) == _VMX_IO_LONG,
"I/O bitmap - I/O width, long");
vmx_inc_test_stage();
break;
case 5:
if (((exit_qual & VMX_IO_PORT_MASK) >> VMX_IO_PORT_SHIFT) == 0x5000)
vmx_inc_test_stage();
break;
case 6:
if (((exit_qual & VMX_IO_PORT_MASK) >> VMX_IO_PORT_SHIFT) == 0x9000)
vmx_inc_test_stage();
break;
case 7:
if (((exit_qual & VMX_IO_PORT_MASK) >> VMX_IO_PORT_SHIFT) == 0x4FFF)
vmx_inc_test_stage();
break;
case 8:
if (((exit_qual & VMX_IO_PORT_MASK) >> VMX_IO_PORT_SHIFT) == 0xFFFF)
vmx_inc_test_stage();
break;
case 9:
case 10:
ctrl_cpu0 = vmcs_read(CPU_EXEC_CTRL0);
vmcs_write(CPU_EXEC_CTRL0, ctrl_cpu0 & ~CPU_IO);
vmx_inc_test_stage();
break;
default:
// Should not reach here
report(false, "unexpected stage, %d",
vmx_get_test_stage());
print_vmexit_info(exit_reason);
return VMX_TEST_VMEXIT;
}
vmcs_write(GUEST_RIP, guest_rip + insn_len);
return VMX_TEST_RESUME;
case VMX_VMCALL:
switch (vmx_get_test_stage()) {
case 9:
ctrl_cpu0 = vmcs_read(CPU_EXEC_CTRL0);
ctrl_cpu0 |= CPU_IO | CPU_IO_BITMAP;
vmcs_write(CPU_EXEC_CTRL0, ctrl_cpu0);
break;
case 10:
ctrl_cpu0 = vmcs_read(CPU_EXEC_CTRL0);
ctrl_cpu0 = (ctrl_cpu0 & ~CPU_IO_BITMAP) | CPU_IO;
vmcs_write(CPU_EXEC_CTRL0, ctrl_cpu0);
break;
default:
// Should not reach here
report(false, "unexpected stage, %d",
vmx_get_test_stage());
print_vmexit_info(exit_reason);
return VMX_TEST_VMEXIT;
}
vmcs_write(GUEST_RIP, guest_rip + insn_len);
return VMX_TEST_RESUME;
default:
printf("guest_rip = %#lx\n", guest_rip);
printf("\tERROR : Unknown exit reason, 0x%x\n", exit_reason.full);
break;
}
return VMX_TEST_VMEXIT;
}
#define INSN_CPU0 0
#define INSN_CPU1 1
#define INSN_ALWAYS_TRAP 2
#define FIELD_EXIT_QUAL (1 << 0)
#define FIELD_INSN_INFO (1 << 1)
asm(
"insn_hlt: hlt;ret\n\t"
"insn_invlpg: invlpg 0x12345678;ret\n\t"
"insn_mwait: xor %eax, %eax; xor %ecx, %ecx; mwait;ret\n\t"
"insn_rdpmc: xor %ecx, %ecx; rdpmc;ret\n\t"
"insn_rdtsc: rdtsc;ret\n\t"
"insn_cr3_load: mov cr3,%rax; mov %rax,%cr3;ret\n\t"
"insn_cr3_store: mov %cr3,%rax;ret\n\t"
"insn_cr8_load: xor %eax, %eax; mov %rax,%cr8;ret\n\t"
"insn_cr8_store: mov %cr8,%rax;ret\n\t"
"insn_monitor: xor %eax, %eax; xor %ecx, %ecx; xor %edx, %edx; monitor;ret\n\t"
"insn_pause: pause;ret\n\t"
"insn_wbinvd: wbinvd;ret\n\t"
"insn_cpuid: mov $10, %eax; cpuid;ret\n\t"
"insn_invd: invd;ret\n\t"
"insn_sgdt: sgdt gdt64_desc;ret\n\t"
"insn_lgdt: lgdt gdt64_desc;ret\n\t"
"insn_sidt: sidt idt_descr;ret\n\t"
"insn_lidt: lidt idt_descr;ret\n\t"
"insn_sldt: sldt %ax;ret\n\t"
"insn_lldt: xor %eax, %eax; lldt %ax;ret\n\t"
"insn_str: str %ax;ret\n\t"
"insn_rdrand: rdrand %rax;ret\n\t"
"insn_rdseed: rdseed %rax;ret\n\t"
);
extern void insn_hlt(void);
extern void insn_invlpg(void);
extern void insn_mwait(void);
extern void insn_rdpmc(void);
extern void insn_rdtsc(void);
extern void insn_cr3_load(void);
extern void insn_cr3_store(void);
extern void insn_cr8_load(void);
extern void insn_cr8_store(void);
extern void insn_monitor(void);
extern void insn_pause(void);
extern void insn_wbinvd(void);
extern void insn_sgdt(void);
extern void insn_lgdt(void);
extern void insn_sidt(void);
extern void insn_lidt(void);
extern void insn_sldt(void);
extern void insn_lldt(void);
extern void insn_str(void);
extern void insn_cpuid(void);
extern void insn_invd(void);
extern void insn_rdrand(void);
extern void insn_rdseed(void);
u32 cur_insn;
u64 cr3;
#define X86_FEATURE_MONITOR (1 << 3)
typedef bool (*supported_fn)(void);
static bool monitor_supported(void)
{
return this_cpu_has(X86_FEATURE_MWAIT);
}
struct insn_table {
const char *name;
u32 flag;
void (*insn_func)(void);
u32 type;
u32 reason;
ulong exit_qual;
u32 insn_info;
// Use FIELD_EXIT_QUAL and FIELD_INSN_INFO to define
// which field need to be tested, reason is always tested
u32 test_field;
const supported_fn supported_fn;
u8 disabled;
};
/*
* Add more test cases of instruction intercept here. Elements in this
* table is:
* name/control flag/insn function/type/exit reason/exit qulification/
* instruction info/field to test
* The last field defines which fields (exit_qual and insn_info) need to be
* tested in exit handler. If set to 0, only "reason" is checked.
*/
static struct insn_table insn_table[] = {
// Flags for Primary Processor-Based VM-Execution Controls
{"HLT", CPU_HLT, insn_hlt, INSN_CPU0, 12, 0, 0, 0},
{"INVLPG", CPU_INVLPG, insn_invlpg, INSN_CPU0, 14,
0x12345678, 0, FIELD_EXIT_QUAL},
{"MWAIT", CPU_MWAIT, insn_mwait, INSN_CPU0, 36, 0, 0, 0, &monitor_supported},
{"RDPMC", CPU_RDPMC, insn_rdpmc, INSN_CPU0, 15, 0, 0, 0},
{"RDTSC", CPU_RDTSC, insn_rdtsc, INSN_CPU0, 16, 0, 0, 0},
{"CR3 load", CPU_CR3_LOAD, insn_cr3_load, INSN_CPU0, 28, 0x3, 0,
FIELD_EXIT_QUAL},
{"CR3 store", CPU_CR3_STORE, insn_cr3_store, INSN_CPU0, 28, 0x13, 0,
FIELD_EXIT_QUAL},
{"CR8 load", CPU_CR8_LOAD, insn_cr8_load, INSN_CPU0, 28, 0x8, 0,
FIELD_EXIT_QUAL},
{"CR8 store", CPU_CR8_STORE, insn_cr8_store, INSN_CPU0, 28, 0x18, 0,
FIELD_EXIT_QUAL},
{"MONITOR", CPU_MONITOR, insn_monitor, INSN_CPU0, 39, 0, 0, 0, &monitor_supported},
{"PAUSE", CPU_PAUSE, insn_pause, INSN_CPU0, 40, 0, 0, 0},
// Flags for Secondary Processor-Based VM-Execution Controls
{"WBINVD", CPU_WBINVD, insn_wbinvd, INSN_CPU1, 54, 0, 0, 0},
{"DESC_TABLE (SGDT)", CPU_DESC_TABLE, insn_sgdt, INSN_CPU1, 46, 0, 0, 0},
{"DESC_TABLE (LGDT)", CPU_DESC_TABLE, insn_lgdt, INSN_CPU1, 46, 0, 0, 0},
{"DESC_TABLE (SIDT)", CPU_DESC_TABLE, insn_sidt, INSN_CPU1, 46, 0, 0, 0},
{"DESC_TABLE (LIDT)", CPU_DESC_TABLE, insn_lidt, INSN_CPU1, 46, 0, 0, 0},
{"DESC_TABLE (SLDT)", CPU_DESC_TABLE, insn_sldt, INSN_CPU1, 47, 0, 0, 0},
{"DESC_TABLE (LLDT)", CPU_DESC_TABLE, insn_lldt, INSN_CPU1, 47, 0, 0, 0},
{"DESC_TABLE (STR)", CPU_DESC_TABLE, insn_str, INSN_CPU1, 47, 0, 0, 0},
/* LTR causes a #GP if done with a busy selector, so it is not tested. */
{"RDRAND", CPU_RDRAND, insn_rdrand, INSN_CPU1, VMX_RDRAND, 0, 0, 0},
{"RDSEED", CPU_RDSEED, insn_rdseed, INSN_CPU1, VMX_RDSEED, 0, 0, 0},
// Instructions always trap
{"CPUID", 0, insn_cpuid, INSN_ALWAYS_TRAP, 10, 0, 0, 0},
{"INVD", 0, insn_invd, INSN_ALWAYS_TRAP, 13, 0, 0, 0},
// Instructions never trap
{NULL},
};
static int insn_intercept_init(struct vmcs *vmcs)
{
u32 ctrl_cpu, cur_insn;
ctrl_cpu = ctrl_cpu_rev[0].set | CPU_SECONDARY;
ctrl_cpu &= ctrl_cpu_rev[0].clr;
vmcs_write(CPU_EXEC_CTRL0, ctrl_cpu);
vmcs_write(CPU_EXEC_CTRL1, ctrl_cpu_rev[1].set);
cr3 = read_cr3();
for (cur_insn = 0; insn_table[cur_insn].name != NULL; cur_insn++) {
if (insn_table[cur_insn].supported_fn == NULL)
continue;
insn_table[cur_insn].disabled = !insn_table[cur_insn].supported_fn();
}
return VMX_TEST_START;
}
static void insn_intercept_main(void)
{
for (cur_insn = 0; insn_table[cur_insn].name != NULL; cur_insn++) {
vmx_set_test_stage(cur_insn * 2);
if ((insn_table[cur_insn].type == INSN_CPU0 &&
!(ctrl_cpu_rev[0].clr & insn_table[cur_insn].flag)) ||
(insn_table[cur_insn].type == INSN_CPU1 &&
!(ctrl_cpu_rev[1].clr & insn_table[cur_insn].flag))) {
printf("\tCPU_CTRL%d.CPU_%s is not supported.\n",
insn_table[cur_insn].type - INSN_CPU0,
insn_table[cur_insn].name);
continue;
}
if (insn_table[cur_insn].disabled) {
printf("\tFeature required for %s is not supported.\n",
insn_table[cur_insn].name);
continue;
}
if ((insn_table[cur_insn].type == INSN_CPU0 &&
!(ctrl_cpu_rev[0].set & insn_table[cur_insn].flag)) ||
(insn_table[cur_insn].type == INSN_CPU1 &&
!(ctrl_cpu_rev[1].set & insn_table[cur_insn].flag))) {
/* skip hlt, it stalls the guest and is tested below */
if (insn_table[cur_insn].insn_func != insn_hlt)
insn_table[cur_insn].insn_func();
report(vmx_get_test_stage() == cur_insn * 2,
"execute %s",
insn_table[cur_insn].name);
} else if (insn_table[cur_insn].type != INSN_ALWAYS_TRAP)
printf("\tCPU_CTRL%d.CPU_%s always traps.\n",
insn_table[cur_insn].type - INSN_CPU0,
insn_table[cur_insn].name);
vmcall();
insn_table[cur_insn].insn_func();
report(vmx_get_test_stage() == cur_insn * 2 + 1,
"intercept %s",
insn_table[cur_insn].name);
vmx_set_test_stage(cur_insn * 2 + 1);
vmcall();
}
}
static int insn_intercept_exit_handler(union exit_reason exit_reason)
{
u64 guest_rip;
ulong exit_qual;
u32 insn_len;
u32 insn_info;
bool pass;
guest_rip = vmcs_read(GUEST_RIP);
exit_qual = vmcs_read(EXI_QUALIFICATION);
insn_len = vmcs_read(EXI_INST_LEN);
insn_info = vmcs_read(EXI_INST_INFO);
if (exit_reason.basic == VMX_VMCALL) {
u32 val = 0;
if (insn_table[cur_insn].type == INSN_CPU0)
val = vmcs_read(CPU_EXEC_CTRL0);
else if (insn_table[cur_insn].type == INSN_CPU1)
val = vmcs_read(CPU_EXEC_CTRL1);
if (vmx_get_test_stage() & 1)
val &= ~insn_table[cur_insn].flag;
else
val |= insn_table[cur_insn].flag;
if (insn_table[cur_insn].type == INSN_CPU0)
vmcs_write(CPU_EXEC_CTRL0, val | ctrl_cpu_rev[0].set);
else if (insn_table[cur_insn].type == INSN_CPU1)
vmcs_write(CPU_EXEC_CTRL1, val | ctrl_cpu_rev[1].set);
} else {
pass = (cur_insn * 2 == vmx_get_test_stage()) &&
insn_table[cur_insn].reason == exit_reason.full;
if (insn_table[cur_insn].test_field & FIELD_EXIT_QUAL &&
insn_table[cur_insn].exit_qual != exit_qual)
pass = false;
if (insn_table[cur_insn].test_field & FIELD_INSN_INFO &&
insn_table[cur_insn].insn_info != insn_info)
pass = false;
if (pass)
vmx_inc_test_stage();
}
vmcs_write(GUEST_RIP, guest_rip + insn_len);
return VMX_TEST_RESUME;
}
/**
* __setup_ept - Setup the VMCS fields to enable Extended Page Tables (EPT)
* @hpa: Host physical address of the top-level, a.k.a. root, EPT table
* @enable_ad: Whether or not to enable Access/Dirty bits for EPT entries
*
* Returns 0 on success, 1 on failure.
*
* Note that @hpa doesn't need to point at actual memory if VM-Launch is
* expected to fail, e.g. setup_dummy_ept() arbitrarily passes '0' to satisfy
* the various EPTP consistency checks, but doesn't ensure backing for HPA '0'.
*/
static int __setup_ept(u64 hpa, bool enable_ad)
{
if (!(ctrl_cpu_rev[0].clr & CPU_SECONDARY) ||
!(ctrl_cpu_rev[1].clr & CPU_EPT)) {
printf("\tEPT is not supported");
return 1;
}
if (!(ept_vpid.val & EPT_CAP_WB)) {
printf("WB memtype for EPT walks not supported\n");
return 1;
}
if (!(ept_vpid.val & EPT_CAP_PWL4)) {
printf("\tPWL4 is not supported\n");
return 1;
}
eptp = EPT_MEM_TYPE_WB;
eptp |= (3 << EPTP_PG_WALK_LEN_SHIFT);
eptp |= hpa;
if (enable_ad)
eptp |= EPTP_AD_FLAG;
vmcs_write(EPTP, eptp);
vmcs_write(CPU_EXEC_CTRL0, vmcs_read(CPU_EXEC_CTRL0)| CPU_SECONDARY);
vmcs_write(CPU_EXEC_CTRL1, vmcs_read(CPU_EXEC_CTRL1)| CPU_EPT);
return 0;
}
/**
* setup_ept - Enable Extended Page Tables (EPT) and setup an identity map
* @enable_ad: Whether or not to enable Access/Dirty bits for EPT entries
*
* Returns 0 on success, 1 on failure.
*
* This is the "real" function for setting up EPT tables, i.e. use this for
* tests that need to run code in the guest with EPT enabled.
*/
static int setup_ept(bool enable_ad)
{
unsigned long end_of_memory;
pml4 = alloc_page();
if (__setup_ept(virt_to_phys(pml4), enable_ad))
return 1;
end_of_memory = fwcfg_get_u64(FW_CFG_RAM_SIZE);
if (end_of_memory < (1ul << 32))
end_of_memory = (1ul << 32);
/* Cannot use large EPT pages if we need to track EPT
* accessed/dirty bits at 4K granularity.
*/
setup_ept_range(pml4, 0, end_of_memory, 0,
!enable_ad && ept_2m_supported(),
EPT_WA | EPT_RA | EPT_EA);
return 0;
}
/**
* setup_dummy_ept - Enable Extended Page Tables (EPT) with a dummy root HPA
*
* Setup EPT using a semi-arbitrary dummy root HPA. This function is intended
* for use by tests that need EPT enabled to verify dependent VMCS controls
* but never expect to fully enter the guest, i.e. don't need setup the actual
* EPT tables.
*/
static void setup_dummy_ept(void)
{
if (__setup_ept(0, false))
report_abort("EPT setup unexpectedly failed");
}
static int enable_unrestricted_guest(void)
{
if (!(ctrl_cpu_rev[0].clr & CPU_SECONDARY) ||
!(ctrl_cpu_rev[1].clr & CPU_URG) ||
!(ctrl_cpu_rev[1].clr & CPU_EPT))
return 1;
setup_dummy_ept();
vmcs_write(CPU_EXEC_CTRL0, vmcs_read(CPU_EXEC_CTRL0) | CPU_SECONDARY);
vmcs_write(CPU_EXEC_CTRL1, vmcs_read(CPU_EXEC_CTRL1) | CPU_URG);
return 0;
}
static void ept_enable_ad_bits(void)
{
eptp |= EPTP_AD_FLAG;
vmcs_write(EPTP, eptp);
}
static void ept_disable_ad_bits(void)
{
eptp &= ~EPTP_AD_FLAG;
vmcs_write(EPTP, eptp);
}
static int ept_ad_enabled(void)
{
return eptp & EPTP_AD_FLAG;
}
static void ept_enable_ad_bits_or_skip_test(void)
{
if (!ept_ad_bits_supported())
test_skip("EPT AD bits not supported.");
ept_enable_ad_bits();
}
static int apic_version;
static int ept_init_common(bool have_ad)
{
int ret;
struct pci_dev pcidev;
if (setup_ept(have_ad))
return VMX_TEST_EXIT;
data_page1 = alloc_page();
data_page2 = alloc_page();
*((u32 *)data_page1) = MAGIC_VAL_1;
*((u32 *)data_page2) = MAGIC_VAL_2;
install_ept(pml4, (unsigned long)data_page1, (unsigned long)data_page2,
EPT_RA | EPT_WA | EPT_EA);
apic_version = apic_read(APIC_LVR);
ret = pci_find_dev(PCI_VENDOR_ID_REDHAT, PCI_DEVICE_ID_REDHAT_TEST);
if (ret != PCIDEVADDR_INVALID) {
pci_dev_init(&pcidev, ret);
pci_physaddr = pcidev.resource[PCI_TESTDEV_BAR_MEM];
}
return VMX_TEST_START;
}
static int ept_init(struct vmcs *vmcs)
{
return ept_init_common(false);
}
static void ept_common(void)
{
vmx_set_test_stage(0);
if (*((u32 *)data_page2) != MAGIC_VAL_1 ||
*((u32 *)data_page1) != MAGIC_VAL_1)
report(0, "EPT basic framework - read");
else {
*((u32 *)data_page2) = MAGIC_VAL_3;
vmcall();
if (vmx_get_test_stage() == 1) {
if (*((u32 *)data_page1) == MAGIC_VAL_3 &&
*((u32 *)data_page2) == MAGIC_VAL_2)
report(1, "EPT basic framework");
else
report(1, "EPT basic framework - remap");
}
}
// Test EPT Misconfigurations
vmx_set_test_stage(1);
vmcall();
*((u32 *)data_page1) = MAGIC_VAL_1;
if (vmx_get_test_stage() != 2) {
report(0, "EPT misconfigurations");
goto t1;
}
vmx_set_test_stage(2);
vmcall();
*((u32 *)data_page1) = MAGIC_VAL_1;
report(vmx_get_test_stage() == 3, "EPT misconfigurations");
t1:
// Test EPT violation
vmx_set_test_stage(3);
vmcall();
*((u32 *)data_page1) = MAGIC_VAL_1;
report(vmx_get_test_stage() == 4, "EPT violation - page permission");
// Violation caused by EPT paging structure
vmx_set_test_stage(4);
vmcall();
*((u32 *)data_page1) = MAGIC_VAL_2;
report(vmx_get_test_stage() == 5, "EPT violation - paging structure");
// MMIO Read/Write
vmx_set_test_stage(5);
vmcall();
*(u32 volatile *)pci_physaddr;
report(vmx_get_test_stage() == 6, "MMIO EPT violation - read");
*(u32 volatile *)pci_physaddr = MAGIC_VAL_1;
report(vmx_get_test_stage() == 7, "MMIO EPT violation - write");
}
static void ept_main(void)
{
ept_common();
// Test EPT access to L1 MMIO
vmx_set_test_stage(7);
report(*((u32 *)0xfee00030UL) == apic_version, "EPT - MMIO access");
// Test invalid operand for INVEPT
vmcall();
report(vmx_get_test_stage() == 8, "EPT - unsupported INVEPT");
}
static bool invept_test(int type, u64 eptp)
{
bool ret, supported;
supported = ept_vpid.val & (EPT_CAP_INVEPT_SINGLE >> INVEPT_SINGLE << type);
ret = invept(type, eptp);
if (ret == !supported)
return false;
if (!supported)
printf("WARNING: unsupported invept passed!\n");
else
printf("WARNING: invept failed!\n");
return true;
}
static int pml_exit_handler(union exit_reason exit_reason)
{
u16 index, count;
u64 *pmlbuf = pml_log;
u64 guest_rip = vmcs_read(GUEST_RIP);;
u64 guest_cr3 = vmcs_read(GUEST_CR3);
u32 insn_len = vmcs_read(EXI_INST_LEN);
switch (exit_reason.basic) {
case VMX_VMCALL:
switch (vmx_get_test_stage()) {
case 0:
index = vmcs_read(GUEST_PML_INDEX);
for (count = index + 1; count < PML_INDEX; count++) {
if (pmlbuf[count] == (u64)data_page2) {
vmx_inc_test_stage();
clear_ept_ad(pml4, guest_cr3, (unsigned long)data_page2);
break;
}
}
break;
case 1:
index = vmcs_read(GUEST_PML_INDEX);
/* Keep clearing the dirty bit till a overflow */
clear_ept_ad(pml4, guest_cr3, (unsigned long)data_page2);
break;
default:
report(false, "unexpected stage, %d.",
vmx_get_test_stage());
print_vmexit_info(exit_reason);
return VMX_TEST_VMEXIT;
}
vmcs_write(GUEST_RIP, guest_rip + insn_len);
return VMX_TEST_RESUME;
case VMX_PML_FULL:
vmx_inc_test_stage();
vmcs_write(GUEST_PML_INDEX, PML_INDEX - 1);
return VMX_TEST_RESUME;
default:
report(false, "Unknown exit reason, 0x%x", exit_reason.full);
print_vmexit_info(exit_reason);
}
return VMX_TEST_VMEXIT;
}
static int ept_exit_handler_common(union exit_reason exit_reason, bool have_ad)
{
u64 guest_rip;
u64 guest_cr3;
u32 insn_len;
u32 exit_qual;
static unsigned long data_page1_pte, data_page1_pte_pte, memaddr_pte,
guest_pte_addr;
guest_rip = vmcs_read(GUEST_RIP);
guest_cr3 = vmcs_read(GUEST_CR3);
insn_len = vmcs_read(EXI_INST_LEN);
exit_qual = vmcs_read(EXI_QUALIFICATION);
pteval_t *ptep;
switch (exit_reason.basic) {
case VMX_VMCALL:
switch (vmx_get_test_stage()) {
case 0:
check_ept_ad(pml4, guest_cr3,
(unsigned long)data_page1,
have_ad ? EPT_ACCESS_FLAG : 0,
have_ad ? EPT_ACCESS_FLAG | EPT_DIRTY_FLAG : 0);
check_ept_ad(pml4, guest_cr3,
(unsigned long)data_page2,
have_ad ? EPT_ACCESS_FLAG | EPT_DIRTY_FLAG : 0,
have_ad ? EPT_ACCESS_FLAG | EPT_DIRTY_FLAG : 0);
clear_ept_ad(pml4, guest_cr3, (unsigned long)data_page1);
clear_ept_ad(pml4, guest_cr3, (unsigned long)data_page2);
if (have_ad)
ept_sync(INVEPT_SINGLE, eptp);;
if (*((u32 *)data_page1) == MAGIC_VAL_3 &&
*((u32 *)data_page2) == MAGIC_VAL_2) {
vmx_inc_test_stage();
install_ept(pml4, (unsigned long)data_page2,
(unsigned long)data_page2,
EPT_RA | EPT_WA | EPT_EA);
} else
report(0, "EPT basic framework - write");
break;
case 1:
install_ept(pml4, (unsigned long)data_page1,
(unsigned long)data_page1, EPT_WA);
ept_sync(INVEPT_SINGLE, eptp);
break;
case 2:
install_ept(pml4, (unsigned long)data_page1,
(unsigned long)data_page1,
EPT_RA | EPT_WA | EPT_EA |
(2 << EPT_MEM_TYPE_SHIFT));
ept_sync(INVEPT_SINGLE, eptp);
break;
case 3:
clear_ept_ad(pml4, guest_cr3, (unsigned long)data_page1);
TEST_ASSERT(get_ept_pte(pml4, (unsigned long)data_page1,
1, &data_page1_pte));
set_ept_pte(pml4, (unsigned long)data_page1,
1, data_page1_pte & ~EPT_PRESENT);
ept_sync(INVEPT_SINGLE, eptp);
break;
case 4:
ptep = get_pte_level((pgd_t *)guest_cr3, data_page1, /*level=*/2);
guest_pte_addr = virt_to_phys(ptep) & PAGE_MASK;
TEST_ASSERT(get_ept_pte(pml4, guest_pte_addr, 2, &data_page1_pte_pte));
set_ept_pte(pml4, guest_pte_addr, 2,
data_page1_pte_pte & ~EPT_PRESENT);
ept_sync(INVEPT_SINGLE, eptp);
break;
case 5:
install_ept(pml4, (unsigned long)pci_physaddr,
(unsigned long)pci_physaddr, 0);
ept_sync(INVEPT_SINGLE, eptp);
break;
case 7:
if (!invept_test(0, eptp))
vmx_inc_test_stage();
break;
// Should not reach here
default:
report(false, "ERROR - unexpected stage, %d.",
vmx_get_test_stage());
print_vmexit_info(exit_reason);
return VMX_TEST_VMEXIT;
}
vmcs_write(GUEST_RIP, guest_rip + insn_len);
return VMX_TEST_RESUME;
case VMX_EPT_MISCONFIG:
switch (vmx_get_test_stage()) {
case 1:
case 2:
vmx_inc_test_stage();
install_ept(pml4, (unsigned long)data_page1,
(unsigned long)data_page1,
EPT_RA | EPT_WA | EPT_EA);
ept_sync(INVEPT_SINGLE, eptp);
break;
// Should not reach here
default:
report(false, "ERROR - unexpected stage, %d.",
vmx_get_test_stage());
print_vmexit_info(exit_reason);
return VMX_TEST_VMEXIT;
}
return VMX_TEST_RESUME;
case VMX_EPT_VIOLATION:
/*
* Exit-qualifications are masked not to account for advanced
* VM-exit information. Once KVM supports this feature, this
* masking should be removed.
*/
exit_qual &= ~EPT_VLT_GUEST_MASK;
switch(vmx_get_test_stage()) {
case 3:
check_ept_ad(pml4, guest_cr3, (unsigned long)data_page1, 0,
have_ad ? EPT_ACCESS_FLAG | EPT_DIRTY_FLAG : 0);
clear_ept_ad(pml4, guest_cr3, (unsigned long)data_page1);
if (exit_qual == (EPT_VLT_WR | EPT_VLT_LADDR_VLD |
EPT_VLT_PADDR))
vmx_inc_test_stage();
set_ept_pte(pml4, (unsigned long)data_page1,
1, data_page1_pte | (EPT_PRESENT));
ept_sync(INVEPT_SINGLE, eptp);
break;
case 4:
check_ept_ad(pml4, guest_cr3, (unsigned long)data_page1, 0,
have_ad ? EPT_ACCESS_FLAG | EPT_DIRTY_FLAG : 0);
clear_ept_ad(pml4, guest_cr3, (unsigned long)data_page1);
if (exit_qual == (EPT_VLT_RD |
(have_ad ? EPT_VLT_WR : 0) |
EPT_VLT_LADDR_VLD))
vmx_inc_test_stage();
set_ept_pte(pml4, guest_pte_addr, 2,
data_page1_pte_pte | (EPT_PRESENT));
ept_sync(INVEPT_SINGLE, eptp);
break;
case 5:
if (exit_qual & EPT_VLT_RD)
vmx_inc_test_stage();
TEST_ASSERT(get_ept_pte(pml4, (unsigned long)pci_physaddr,
1, &memaddr_pte));
set_ept_pte(pml4, memaddr_pte, 1, memaddr_pte | EPT_RA);
ept_sync(INVEPT_SINGLE, eptp);
break;
case 6:
if (exit_qual & EPT_VLT_WR)
vmx_inc_test_stage();
TEST_ASSERT(get_ept_pte(pml4, (unsigned long)pci_physaddr,
1, &memaddr_pte));
set_ept_pte(pml4, memaddr_pte, 1, memaddr_pte | EPT_RA | EPT_WA);
ept_sync(INVEPT_SINGLE, eptp);
break;
default:
// Should not reach here
report(false, "ERROR : unexpected stage, %d",
vmx_get_test_stage());
print_vmexit_info(exit_reason);
return VMX_TEST_VMEXIT;
}
return VMX_TEST_RESUME;
default:
report(false, "Unknown exit reason, 0x%x", exit_reason.full);
print_vmexit_info(exit_reason);
}
return VMX_TEST_VMEXIT;
}
static int ept_exit_handler(union exit_reason exit_reason)
{
return ept_exit_handler_common(exit_reason, false);
}
static int eptad_init(struct vmcs *vmcs)
{
int r = ept_init_common(true);
if (r == VMX_TEST_EXIT)
return r;
if ((rdmsr(MSR_IA32_VMX_EPT_VPID_CAP) & EPT_CAP_AD_FLAG) == 0) {
printf("\tEPT A/D bits are not supported");
return VMX_TEST_EXIT;
}
return r;
}
static int pml_init(struct vmcs *vmcs)
{
u32 ctrl_cpu;
int r = eptad_init(vmcs);
if (r == VMX_TEST_EXIT)
return r;
if (!(ctrl_cpu_rev[0].clr & CPU_SECONDARY) ||
!(ctrl_cpu_rev[1].clr & CPU_PML)) {
printf("\tPML is not supported");
return VMX_TEST_EXIT;
}
pml_log = alloc_page();
vmcs_write(PMLADDR, (u64)pml_log);
vmcs_write(GUEST_PML_INDEX, PML_INDEX - 1);
ctrl_cpu = vmcs_read(CPU_EXEC_CTRL1) | CPU_PML;
vmcs_write(CPU_EXEC_CTRL1, ctrl_cpu);
return VMX_TEST_START;
}
static void pml_main(void)
{
int count = 0;
vmx_set_test_stage(0);
*((u32 *)data_page2) = 0x1;
vmcall();
report(vmx_get_test_stage() == 1, "PML - Dirty GPA Logging");
while (vmx_get_test_stage() == 1) {
vmcall();
*((u32 *)data_page2) = 0x1;
if (count++ > PML_INDEX)
break;
}
report(vmx_get_test_stage() == 2, "PML Full Event");
}
static void eptad_main(void)
{
ept_common();
}
static int eptad_exit_handler(union exit_reason exit_reason)
{
return ept_exit_handler_common(exit_reason, true);
}
static bool invvpid_test(int type, u16 vpid)
{
bool ret, supported;
supported = ept_vpid.val &
(VPID_CAP_INVVPID_ADDR >> INVVPID_ADDR << type);
ret = invvpid(type, vpid, 0);
if (ret == !supported)
return false;
if (!supported)
printf("WARNING: unsupported invvpid passed!\n");
else
printf("WARNING: invvpid failed!\n");
return true;
}
static int vpid_init(struct vmcs *vmcs)
{
u32 ctrl_cpu1;
if (!(ctrl_cpu_rev[0].clr & CPU_SECONDARY) ||
!(ctrl_cpu_rev[1].clr & CPU_VPID)) {
printf("\tVPID is not supported");
return VMX_TEST_EXIT;
}
ctrl_cpu1 = vmcs_read(CPU_EXEC_CTRL1);
ctrl_cpu1 |= CPU_VPID;
vmcs_write(CPU_EXEC_CTRL1, ctrl_cpu1);
return VMX_TEST_START;
}
static void vpid_main(void)
{
vmx_set_test_stage(0);
vmcall();
report(vmx_get_test_stage() == 1, "INVVPID SINGLE ADDRESS");
vmx_set_test_stage(2);
vmcall();
report(vmx_get_test_stage() == 3, "INVVPID SINGLE");
vmx_set_test_stage(4);
vmcall();
report(vmx_get_test_stage() == 5, "INVVPID ALL");
}
static int vpid_exit_handler(union exit_reason exit_reason)
{
u64 guest_rip;
u32 insn_len;
guest_rip = vmcs_read(GUEST_RIP);
insn_len = vmcs_read(EXI_INST_LEN);
switch (exit_reason.basic) {
case VMX_VMCALL:
switch(vmx_get_test_stage()) {
case 0:
if (!invvpid_test(INVVPID_ADDR, 1))
vmx_inc_test_stage();
break;
case 2:
if (!invvpid_test(INVVPID_CONTEXT_GLOBAL, 1))
vmx_inc_test_stage();
break;
case 4:
if (!invvpid_test(INVVPID_ALL, 1))
vmx_inc_test_stage();
break;
default:
report(false, "ERROR: unexpected stage, %d",
vmx_get_test_stage());
print_vmexit_info(exit_reason);
return VMX_TEST_VMEXIT;
}
vmcs_write(GUEST_RIP, guest_rip + insn_len);
return VMX_TEST_RESUME;
default:
report(false, "Unknown exit reason, 0x%x", exit_reason.full);
print_vmexit_info(exit_reason);
}
return VMX_TEST_VMEXIT;
}
#define TIMER_VECTOR 222
static volatile bool timer_fired;
static void timer_isr(isr_regs_t *regs)
{
timer_fired = true;
apic_write(APIC_EOI, 0);
}
static int interrupt_init(struct vmcs *vmcs)
{
msr_bmp_init();
vmcs_write(PIN_CONTROLS, vmcs_read(PIN_CONTROLS) & ~PIN_EXTINT);
handle_irq(TIMER_VECTOR, timer_isr);
return VMX_TEST_START;
}
static void interrupt_main(void)
{
long long start, loops;
vmx_set_test_stage(0);
apic_write(APIC_LVTT, TIMER_VECTOR);
irq_enable();
apic_write(APIC_TMICT, 1);
for (loops = 0; loops < 10000000 && !timer_fired; loops++)
asm volatile ("nop");
report(timer_fired, "direct interrupt while running guest");
apic_write(APIC_TMICT, 0);
irq_disable();
vmcall();
timer_fired = false;
apic_write(APIC_TMICT, 1);
for (loops = 0; loops < 10000000 && !timer_fired; loops++)
asm volatile ("nop");
report(timer_fired, "intercepted interrupt while running guest");
irq_enable();
apic_write(APIC_TMICT, 0);
irq_disable();
vmcall();
timer_fired = false;
start = rdtsc();
apic_write(APIC_TMICT, 1000000);
asm volatile ("sti; hlt");
report(rdtsc() - start > 1000000 && timer_fired,
"direct interrupt + hlt");
apic_write(APIC_TMICT, 0);
irq_disable();
vmcall();
timer_fired = false;
start = rdtsc();
apic_write(APIC_TMICT, 1000000);
asm volatile ("sti; hlt");
report(rdtsc() - start > 10000 && timer_fired,
"intercepted interrupt + hlt");
apic_write(APIC_TMICT, 0);
irq_disable();
vmcall();
timer_fired = false;
start = rdtsc();
apic_write(APIC_TMICT, 1000000);
irq_enable();
asm volatile ("nop");
vmcall();
report(rdtsc() - start > 10000 && timer_fired,
"direct interrupt + activity state hlt");
apic_write(APIC_TMICT, 0);
irq_disable();
vmcall();
timer_fired = false;
start = rdtsc();
apic_write(APIC_TMICT, 1000000);
irq_enable();
asm volatile ("nop");
vmcall();
report(rdtsc() - start > 10000 && timer_fired,
"intercepted interrupt + activity state hlt");
apic_write(APIC_TMICT, 0);
irq_disable();
vmx_set_test_stage(7);
vmcall();
timer_fired = false;
apic_write(APIC_TMICT, 1);
for (loops = 0; loops < 10000000 && !timer_fired; loops++)
asm volatile ("nop");
report(timer_fired,
"running a guest with interrupt acknowledgement set");
apic_write(APIC_TMICT, 0);
irq_enable();
timer_fired = false;
vmcall();
report(timer_fired, "Inject an event to a halted guest");
}
static int interrupt_exit_handler(union exit_reason exit_reason)
{
u64 guest_rip = vmcs_read(GUEST_RIP);
u32 insn_len = vmcs_read(EXI_INST_LEN);
switch (exit_reason.basic) {
case VMX_VMCALL:
switch (vmx_get_test_stage()) {
case 0:
case 2:
case 5:
vmcs_write(PIN_CONTROLS,
vmcs_read(PIN_CONTROLS) | PIN_EXTINT);
break;
case 7:
vmcs_write(EXI_CONTROLS, vmcs_read(EXI_CONTROLS) | EXI_INTA);
vmcs_write(PIN_CONTROLS,
vmcs_read(PIN_CONTROLS) | PIN_EXTINT);
break;
case 1:
case 3:
vmcs_write(PIN_CONTROLS,
vmcs_read(PIN_CONTROLS) & ~PIN_EXTINT);
break;
case 4:
case 6:
vmcs_write(GUEST_ACTV_STATE, ACTV_HLT);
break;
case 8:
vmcs_write(GUEST_ACTV_STATE, ACTV_HLT);
vmcs_write(ENT_INTR_INFO,
TIMER_VECTOR |
(VMX_INTR_TYPE_EXT_INTR << INTR_INFO_INTR_TYPE_SHIFT) |
INTR_INFO_VALID_MASK);
break;
}
vmx_inc_test_stage();
vmcs_write(GUEST_RIP, guest_rip + insn_len);
return VMX_TEST_RESUME;
case VMX_EXTINT:
if (vmcs_read(EXI_CONTROLS) & EXI_INTA) {
int vector = vmcs_read(EXI_INTR_INFO) & 0xff;
handle_external_interrupt(vector);
} else {
irq_enable();
asm volatile ("nop");
irq_disable();
}
if (vmx_get_test_stage() >= 2)
vmcs_write(GUEST_ACTV_STATE, ACTV_ACTIVE);
return VMX_TEST_RESUME;
default:
report(false, "Unknown exit reason, 0x%x", exit_reason.full);
print_vmexit_info(exit_reason);
}
return VMX_TEST_VMEXIT;
}
static volatile int nmi_fired;
#define NMI_DELAY 100000000ULL
static void nmi_isr(isr_regs_t *regs)
{
nmi_fired = true;
}
static int nmi_hlt_init(struct vmcs *vmcs)
{
msr_bmp_init();
handle_irq(NMI_VECTOR, nmi_isr);
vmcs_write(PIN_CONTROLS,
vmcs_read(PIN_CONTROLS) & ~PIN_NMI);
vmcs_write(PIN_CONTROLS,
vmcs_read(PIN_CONTROLS) & ~PIN_VIRT_NMI);
return VMX_TEST_START;
}
static void nmi_message_thread(void *data)
{
while (vmx_get_test_stage() != 1)
pause();
delay(NMI_DELAY);
apic_icr_write(APIC_DEST_PHYSICAL | APIC_DM_NMI | APIC_INT_ASSERT, id_map[0]);
while (vmx_get_test_stage() != 2)
pause();
delay(NMI_DELAY);
apic_icr_write(APIC_DEST_PHYSICAL | APIC_DM_NMI | APIC_INT_ASSERT, id_map[0]);
}
static void nmi_hlt_main(void)
{
long long start;
if (cpu_count() < 2) {
report_skip(__func__);
vmx_set_test_stage(-1);
return;
}
vmx_set_test_stage(0);
on_cpu_async(1, nmi_message_thread, NULL);
start = rdtsc();
vmx_set_test_stage(1);
asm volatile ("hlt");
report((rdtsc() - start > NMI_DELAY) && nmi_fired,
"direct NMI + hlt");
if (!nmi_fired)
vmx_set_test_stage(-1);
nmi_fired = false;
vmcall();
start = rdtsc();
vmx_set_test_stage(2);
asm volatile ("hlt");
report((rdtsc() - start > NMI_DELAY) && !nmi_fired,
"intercepted NMI + hlt");
if (nmi_fired) {
report(!nmi_fired, "intercepted NMI was dispatched");
vmx_set_test_stage(-1);
return;
}
vmx_set_test_stage(3);
}
static int nmi_hlt_exit_handler(union exit_reason exit_reason)
{
u64 guest_rip = vmcs_read(GUEST_RIP);
u32 insn_len = vmcs_read(EXI_INST_LEN);
switch (vmx_get_test_stage()) {
case 1:
if (exit_reason.basic != VMX_VMCALL) {
report(false, "VMEXIT not due to vmcall. Exit reason 0x%x",
exit_reason.full);
print_vmexit_info(exit_reason);
return VMX_TEST_VMEXIT;
}
vmcs_write(PIN_CONTROLS,
vmcs_read(PIN_CONTROLS) | PIN_NMI);
vmcs_write(PIN_CONTROLS,
vmcs_read(PIN_CONTROLS) | PIN_VIRT_NMI);
vmcs_write(GUEST_RIP, guest_rip + insn_len);
break;
case 2:
if (exit_reason.basic != VMX_EXC_NMI) {
report(false, "VMEXIT not due to NMI intercept. Exit reason 0x%x",
exit_reason.full);
print_vmexit_info(exit_reason);
return VMX_TEST_VMEXIT;
}
report(true, "NMI intercept while running guest");
vmcs_write(GUEST_ACTV_STATE, ACTV_ACTIVE);
break;
case 3:
break;
default:
return VMX_TEST_VMEXIT;
}
if (vmx_get_test_stage() == 3)
return VMX_TEST_VMEXIT;
return VMX_TEST_RESUME;
}
static int dbgctls_init(struct vmcs *vmcs)
{
u64 dr7 = 0x402;
u64 zero = 0;
msr_bmp_init();
asm volatile(
"mov %0,%%dr0\n\t"
"mov %0,%%dr1\n\t"
"mov %0,%%dr2\n\t"
"mov %1,%%dr7\n\t"
: : "r" (zero), "r" (dr7));
wrmsr(MSR_IA32_DEBUGCTLMSR, 0x1);
vmcs_write(GUEST_DR7, 0x404);
vmcs_write(GUEST_DEBUGCTL, 0x2);
vmcs_write(ENT_CONTROLS, vmcs_read(ENT_CONTROLS) | ENT_LOAD_DBGCTLS);
vmcs_write(EXI_CONTROLS, vmcs_read(EXI_CONTROLS) | EXI_SAVE_DBGCTLS);
return VMX_TEST_START;
}
static void dbgctls_main(void)
{
u64 dr7, debugctl;
asm volatile("mov %%dr7,%0" : "=r" (dr7));
debugctl = rdmsr(MSR_IA32_DEBUGCTLMSR);
/* Commented out: KVM does not support DEBUGCTL so far */
(void)debugctl;
report(dr7 == 0x404, "Load debug controls" /* && debugctl == 0x2 */);
dr7 = 0x408;
asm volatile("mov %0,%%dr7" : : "r" (dr7));
wrmsr(MSR_IA32_DEBUGCTLMSR, 0x3);
vmx_set_test_stage(0);
vmcall();
report(vmx_get_test_stage() == 1, "Save debug controls");
if (ctrl_enter_rev.set & ENT_LOAD_DBGCTLS ||
ctrl_exit_rev.set & EXI_SAVE_DBGCTLS) {
printf("\tDebug controls are always loaded/saved\n");
return;
}
vmx_set_test_stage(2);
vmcall();
asm volatile("mov %%dr7,%0" : "=r" (dr7));
debugctl = rdmsr(MSR_IA32_DEBUGCTLMSR);
/* Commented out: KVM does not support DEBUGCTL so far */
(void)debugctl;
report(dr7 == 0x402,
"Guest=host debug controls" /* && debugctl == 0x1 */);
dr7 = 0x408;
asm volatile("mov %0,%%dr7" : : "r" (dr7));
wrmsr(MSR_IA32_DEBUGCTLMSR, 0x3);
vmx_set_test_stage(3);
vmcall();
report(vmx_get_test_stage() == 4, "Don't save debug controls");
}
static int dbgctls_exit_handler(union exit_reason exit_reason)
{
u32 insn_len = vmcs_read(EXI_INST_LEN);
u64 guest_rip = vmcs_read(GUEST_RIP);
u64 dr7, debugctl;
asm volatile("mov %%dr7,%0" : "=r" (dr7));
debugctl = rdmsr(MSR_IA32_DEBUGCTLMSR);
switch (exit_reason.basic) {
case VMX_VMCALL:
switch (vmx_get_test_stage()) {
case 0:
if (dr7 == 0x400 && debugctl == 0 &&
vmcs_read(GUEST_DR7) == 0x408 /* &&
Commented out: KVM does not support DEBUGCTL so far
vmcs_read(GUEST_DEBUGCTL) == 0x3 */)
vmx_inc_test_stage();
break;
case 2:
dr7 = 0x402;
asm volatile("mov %0,%%dr7" : : "r" (dr7));
wrmsr(MSR_IA32_DEBUGCTLMSR, 0x1);
vmcs_write(GUEST_DR7, 0x404);
vmcs_write(GUEST_DEBUGCTL, 0x2);
vmcs_write(ENT_CONTROLS,
vmcs_read(ENT_CONTROLS) & ~ENT_LOAD_DBGCTLS);
vmcs_write(EXI_CONTROLS,
vmcs_read(EXI_CONTROLS) & ~EXI_SAVE_DBGCTLS);
break;
case 3:
if (dr7 == 0x400 && debugctl == 0 &&
vmcs_read(GUEST_DR7) == 0x404 /* &&
Commented out: KVM does not support DEBUGCTL so far
vmcs_read(GUEST_DEBUGCTL) == 0x2 */)
vmx_inc_test_stage();
break;
}
vmcs_write(GUEST_RIP, guest_rip + insn_len);
return VMX_TEST_RESUME;
default:
report(false, "Unknown exit reason, %d", exit_reason.full);
print_vmexit_info(exit_reason);
}
return VMX_TEST_VMEXIT;
}
struct vmx_msr_entry {
u32 index;
u32 reserved;
u64 value;
} __attribute__((packed));
#define MSR_MAGIC 0x31415926
struct vmx_msr_entry *exit_msr_store, *entry_msr_load, *exit_msr_load;
static int msr_switch_init(struct vmcs *vmcs)
{
msr_bmp_init();
exit_msr_store = alloc_page();
exit_msr_load = alloc_page();
entry_msr_load = alloc_page();
entry_msr_load[0].index = MSR_KERNEL_GS_BASE;
entry_msr_load[0].value = MSR_MAGIC;
vmx_set_test_stage(1);
vmcs_write(ENT_MSR_LD_CNT, 1);
vmcs_write(ENTER_MSR_LD_ADDR, (u64)entry_msr_load);
vmcs_write(EXI_MSR_ST_CNT, 1);
vmcs_write(EXIT_MSR_ST_ADDR, (u64)exit_msr_store);
vmcs_write(EXI_MSR_LD_CNT, 1);
vmcs_write(EXIT_MSR_LD_ADDR, (u64)exit_msr_load);
return VMX_TEST_START;
}
static void msr_switch_main(void)
{
if (vmx_get_test_stage() == 1) {
report(rdmsr(MSR_KERNEL_GS_BASE) == MSR_MAGIC,
"VM entry MSR load");
vmx_set_test_stage(2);
wrmsr(MSR_KERNEL_GS_BASE, MSR_MAGIC + 1);
exit_msr_store[0].index = MSR_KERNEL_GS_BASE;
exit_msr_load[0].index = MSR_KERNEL_GS_BASE;
exit_msr_load[0].value = MSR_MAGIC + 2;
}
vmcall();
}
static int msr_switch_exit_handler(union exit_reason exit_reason)
{
if (exit_reason.basic == VMX_VMCALL && vmx_get_test_stage() == 2) {
report(exit_msr_store[0].value == MSR_MAGIC + 1,
"VM exit MSR store");
report(rdmsr(MSR_KERNEL_GS_BASE) == MSR_MAGIC + 2,
"VM exit MSR load");
vmx_set_test_stage(3);
entry_msr_load[0].index = MSR_FS_BASE;
return VMX_TEST_RESUME;
}
printf("ERROR %s: unexpected stage=%u or reason=0x%x\n",
__func__, vmx_get_test_stage(), exit_reason.full);
return VMX_TEST_EXIT;
}
static int msr_switch_entry_failure(struct vmentry_result *result)
{
if (result->vm_fail) {
printf("ERROR %s: VM-Fail on %s\n", __func__, result->instr);
return VMX_TEST_EXIT;
}
if (result->exit_reason.failed_vmentry &&
result->exit_reason.basic == VMX_FAIL_MSR &&
vmx_get_test_stage() == 3) {
report(vmcs_read(EXI_QUALIFICATION) == 1,
"VM entry MSR load: try to load FS_BASE");
return VMX_TEST_VMEXIT;
}
printf("ERROR %s: unexpected stage=%u or reason=%x\n",
__func__, vmx_get_test_stage(), result->exit_reason.full);
return VMX_TEST_EXIT;
}
static int vmmcall_init(struct vmcs *vmcs)
{
vmcs_write(EXC_BITMAP, 1 << UD_VECTOR);
return VMX_TEST_START;
}
static void vmmcall_main(void)
{
asm volatile(
"mov $0xABCD, %%rax\n\t"
"vmmcall\n\t"
::: "rax");
report(0, "VMMCALL");
}
static int vmmcall_exit_handler(union exit_reason exit_reason)
{
switch (exit_reason.basic) {
case VMX_VMCALL:
printf("here\n");
report(0, "VMMCALL triggers #UD");
break;
case VMX_EXC_NMI:
report((vmcs_read(EXI_INTR_INFO) & 0xff) == UD_VECTOR,
"VMMCALL triggers #UD");
break;
default:
report(false, "Unknown exit reason, 0x%x", exit_reason.full);
print_vmexit_info(exit_reason);
}
return VMX_TEST_VMEXIT;
}
static int disable_rdtscp_init(struct vmcs *vmcs)
{
u32 ctrl_cpu1;
if (ctrl_cpu_rev[0].clr & CPU_SECONDARY) {
ctrl_cpu1 = vmcs_read(CPU_EXEC_CTRL1);
ctrl_cpu1 &= ~CPU_RDTSCP;
vmcs_write(CPU_EXEC_CTRL1, ctrl_cpu1);
}
return VMX_TEST_START;
}
static void disable_rdtscp_ud_handler(struct ex_regs *regs)
{
switch (vmx_get_test_stage()) {
case 0:
report(true, "RDTSCP triggers #UD");
vmx_inc_test_stage();
regs->rip += 3;
break;
case 2:
report(true, "RDPID triggers #UD");
vmx_inc_test_stage();
regs->rip += 4;
break;
}
return;
}
static void disable_rdtscp_main(void)
{
/* Test that #UD is properly injected in L2. */
handle_exception(UD_VECTOR, disable_rdtscp_ud_handler);
vmx_set_test_stage(0);
asm volatile("rdtscp" : : : "eax", "ecx", "edx");
vmcall();
asm volatile(".byte 0xf3, 0x0f, 0xc7, 0xf8" : : : "eax");
handle_exception(UD_VECTOR, 0);
vmcall();
}
static int disable_rdtscp_exit_handler(union exit_reason exit_reason)
{
switch (exit_reason.basic) {
case VMX_VMCALL:
switch (vmx_get_test_stage()) {
case 0:
report(false, "RDTSCP triggers #UD");
vmx_inc_test_stage();
/* fallthrough */
case 1:
vmx_inc_test_stage();
vmcs_write(GUEST_RIP, vmcs_read(GUEST_RIP) + 3);
return VMX_TEST_RESUME;
case 2:
report(false, "RDPID triggers #UD");
break;
}
break;
default:
report(false, "Unknown exit reason, 0x%x", exit_reason.full);
print_vmexit_info(exit_reason);
}
return VMX_TEST_VMEXIT;
}
static int int3_init(struct vmcs *vmcs)
{
vmcs_write(EXC_BITMAP, ~0u);
return VMX_TEST_START;
}
static void int3_guest_main(void)
{
asm volatile ("int3");
}
static int int3_exit_handler(union exit_reason exit_reason)
{
u32 intr_info = vmcs_read(EXI_INTR_INFO);
report(exit_reason.basic == VMX_EXC_NMI &&
(intr_info & INTR_INFO_VALID_MASK) &&
(intr_info & INTR_INFO_VECTOR_MASK) == BP_VECTOR &&
((intr_info & INTR_INFO_INTR_TYPE_MASK) >>
INTR_INFO_INTR_TYPE_SHIFT) == VMX_INTR_TYPE_SOFT_EXCEPTION,
"L1 intercepts #BP");
return VMX_TEST_VMEXIT;
}
static int into_init(struct vmcs *vmcs)
{
vmcs_write(EXC_BITMAP, ~0u);
return VMX_TEST_START;
}
static void into_guest_main(void)
{
struct far_pointer32 fp = {
.offset = (uintptr_t)&&into,
.selector = KERNEL_CS32,
};
uintptr_t rsp;
asm volatile ("mov %%rsp, %0" : "=r"(rsp));
if (fp.offset != (uintptr_t)&&into) {
printf("Code address too high.\n");
return;
}
if ((u32)rsp != rsp) {
printf("Stack address too high.\n");
return;
}
asm goto ("lcall *%0" : : "m" (fp) : "rax" : into);
return;
into:
asm volatile (".code32;"
"movl $0x7fffffff, %eax;"
"addl %eax, %eax;"
"into;"
"lret;"
".code64");
__builtin_unreachable();
}
static int into_exit_handler(union exit_reason exit_reason)
{
u32 intr_info = vmcs_read(EXI_INTR_INFO);
report(exit_reason.basic == VMX_EXC_NMI &&
(intr_info & INTR_INFO_VALID_MASK) &&
(intr_info & INTR_INFO_VECTOR_MASK) == OF_VECTOR &&
((intr_info & INTR_INFO_INTR_TYPE_MASK) >>
INTR_INFO_INTR_TYPE_SHIFT) == VMX_INTR_TYPE_SOFT_EXCEPTION,
"L1 intercepts #OF");
return VMX_TEST_VMEXIT;
}
static void exit_monitor_from_l2_main(void)
{
printf("Calling exit(0) from l2...\n");
exit(0);
}
static int exit_monitor_from_l2_handler(union exit_reason exit_reason)
{
report(false, "The guest should have killed the VMM");
return VMX_TEST_EXIT;
}
static void assert_exit_reason(u64 expected)
{
u64 actual = vmcs_read(EXI_REASON);
TEST_ASSERT_EQ_MSG(expected, actual, "Expected %s, got %s.",
exit_reason_description(expected),
exit_reason_description(actual));
}
static void skip_exit_insn(void)
{
u64 guest_rip = vmcs_read(GUEST_RIP);
u32 insn_len = vmcs_read(EXI_INST_LEN);
vmcs_write(GUEST_RIP, guest_rip + insn_len);
}
static void skip_exit_vmcall(void)
{
assert_exit_reason(VMX_VMCALL);
skip_exit_insn();
}
static void v2_null_test_guest(void)
{
}
static void v2_null_test(void)
{
test_set_guest(v2_null_test_guest);
enter_guest();
report(1, __func__);
}
static void v2_multiple_entries_test_guest(void)
{
vmx_set_test_stage(1);
vmcall();
vmx_set_test_stage(2);
}
static void v2_multiple_entries_test(void)
{
test_set_guest(v2_multiple_entries_test_guest);
enter_guest();
TEST_ASSERT_EQ(vmx_get_test_stage(), 1);
skip_exit_vmcall();
enter_guest();
TEST_ASSERT_EQ(vmx_get_test_stage(), 2);
report(1, __func__);
}
static int fixture_test_data = 1;
static void fixture_test_teardown(void *data)
{
*((int *) data) = 1;
}
static void fixture_test_guest(void)
{
fixture_test_data++;
}
static void fixture_test_setup(void)
{
TEST_ASSERT_EQ_MSG(1, fixture_test_data,
"fixture_test_teardown didn't run?!");
fixture_test_data = 2;
test_add_teardown(fixture_test_teardown, &fixture_test_data);
test_set_guest(fixture_test_guest);
}
static void fixture_test_case1(void)
{
fixture_test_setup();
TEST_ASSERT_EQ(2, fixture_test_data);
enter_guest();
TEST_ASSERT_EQ(3, fixture_test_data);
report(1, __func__);
}
static void fixture_test_case2(void)
{
fixture_test_setup();
TEST_ASSERT_EQ(2, fixture_test_data);
enter_guest();
TEST_ASSERT_EQ(3, fixture_test_data);
report(1, __func__);
}
enum ept_access_op {
OP_READ,
OP_WRITE,
OP_EXEC,
OP_FLUSH_TLB,
OP_EXIT,
};
static struct ept_access_test_data {
unsigned long gpa;
unsigned long *gva;
unsigned long hpa;
unsigned long *hva;
enum ept_access_op op;
} ept_access_test_data;
extern unsigned char ret42_start;
extern unsigned char ret42_end;
/* Returns 42. */
asm(
".align 64\n"
"ret42_start:\n"
"mov $42, %eax\n"
"ret\n"
"ret42_end:\n"
);
static void
diagnose_ept_violation_qual(u64 expected, u64 actual)
{
#define DIAGNOSE(flag) \
do { \
if ((expected & flag) != (actual & flag)) \
printf(#flag " %sexpected\n", \
(expected & flag) ? "" : "un"); \
} while (0)
DIAGNOSE(EPT_VLT_RD);
DIAGNOSE(EPT_VLT_WR);
DIAGNOSE(EPT_VLT_FETCH);
DIAGNOSE(EPT_VLT_PERM_RD);
DIAGNOSE(EPT_VLT_PERM_WR);
DIAGNOSE(EPT_VLT_PERM_EX);
DIAGNOSE(EPT_VLT_LADDR_VLD);
DIAGNOSE(EPT_VLT_PADDR);
#undef DIAGNOSE
}
static void do_ept_access_op(enum ept_access_op op)
{
ept_access_test_data.op = op;
enter_guest();
}
/*
* Force the guest to flush its TLB (i.e., flush gva -> gpa mappings). Only
* needed by tests that modify guest PTEs.
*/
static void ept_access_test_guest_flush_tlb(void)
{
do_ept_access_op(OP_FLUSH_TLB);
skip_exit_vmcall();
}
/*
* Modifies the EPT entry at @level in the mapping of @gpa. First clears the
* bits in @clear then sets the bits in @set. @mkhuge transforms the entry into
* a huge page.
*/
static unsigned long ept_twiddle(unsigned long gpa, bool mkhuge, int level,
unsigned long clear, unsigned long set)
{
struct ept_access_test_data *data = &ept_access_test_data;
unsigned long orig_pte;
unsigned long pte;
/* Screw with the mapping at the requested level. */
TEST_ASSERT(get_ept_pte(pml4, gpa, level, &orig_pte));
pte = orig_pte;
if (mkhuge)
pte = (orig_pte & ~EPT_ADDR_MASK) | data->hpa | EPT_LARGE_PAGE;
else
pte = orig_pte;
pte = (pte & ~clear) | set;
set_ept_pte(pml4, gpa, level, pte);
ept_sync(INVEPT_SINGLE, eptp);
return orig_pte;
}
static void ept_untwiddle(unsigned long gpa, int level, unsigned long orig_pte)
{
set_ept_pte(pml4, gpa, level, orig_pte);
ept_sync(INVEPT_SINGLE, eptp);
}
static void do_ept_violation(bool leaf, enum ept_access_op op,
u64 expected_qual, u64 expected_paddr)
{
u64 qual;
/* Try the access and observe the violation. */
do_ept_access_op(op);
assert_exit_reason(VMX_EPT_VIOLATION);
qual = vmcs_read(EXI_QUALIFICATION);
/* Mask undefined bits (which may later be defined in certain cases). */
qual &= ~(EPT_VLT_GUEST_USER | EPT_VLT_GUEST_RW | EPT_VLT_GUEST_EX |
EPT_VLT_PERM_USER_EX);
diagnose_ept_violation_qual(expected_qual, qual);
TEST_EXPECT_EQ(expected_qual, qual);
#if 0
/* Disable for now otherwise every test will fail */
TEST_EXPECT_EQ(vmcs_read(GUEST_LINEAR_ADDRESS),
(unsigned long) (
op == OP_EXEC ? data->gva + 1 : data->gva));
#endif
/*
* TODO: tests that probe expected_paddr in pages other than the one at
* the beginning of the 1g region.
*/
TEST_EXPECT_EQ(vmcs_read(INFO_PHYS_ADDR), expected_paddr);
}
static void
ept_violation_at_level_mkhuge(bool mkhuge, int level, unsigned long clear,
unsigned long set, enum ept_access_op op,
u64 expected_qual)
{
struct ept_access_test_data *data = &ept_access_test_data;
unsigned long orig_pte;
orig_pte = ept_twiddle(data->gpa, mkhuge, level, clear, set);
do_ept_violation(level == 1 || mkhuge, op, expected_qual,
op == OP_EXEC ? data->gpa + sizeof(unsigned long) :
data->gpa);
/* Fix the violation and resume the op loop. */
ept_untwiddle(data->gpa, level, orig_pte);
enter_guest();
skip_exit_vmcall();
}
static void
ept_violation_at_level(int level, unsigned long clear, unsigned long set,
enum ept_access_op op, u64 expected_qual)
{
ept_violation_at_level_mkhuge(false, level, clear, set, op,
expected_qual);
if (ept_huge_pages_supported(level))
ept_violation_at_level_mkhuge(true, level, clear, set, op,
expected_qual);
}
static void ept_violation(unsigned long clear, unsigned long set,
enum ept_access_op op, u64 expected_qual)
{
ept_violation_at_level(1, clear, set, op, expected_qual);
ept_violation_at_level(2, clear, set, op, expected_qual);
ept_violation_at_level(3, clear, set, op, expected_qual);
ept_violation_at_level(4, clear, set, op, expected_qual);
}
static void ept_access_violation(unsigned long access, enum ept_access_op op,
u64 expected_qual)
{
ept_violation(EPT_PRESENT, access, op,
expected_qual | EPT_VLT_LADDR_VLD | EPT_VLT_PADDR);
}
/*
* For translations that don't involve a GVA, that is physical address (paddr)
* accesses, EPT violations don't set the flag EPT_VLT_PADDR. For a typical
* guest memory access, the hardware does GVA -> GPA -> HPA. However, certain
* translations don't involve GVAs, such as when the hardware does the guest
* page table walk. For example, in translating GVA_1 -> GPA_1, the guest MMU
* might try to set an A bit on a guest PTE. If the GPA_2 that the PTE resides
* on isn't present in the EPT, then the EPT violation will be for GPA_2 and
* the EPT_VLT_PADDR bit will be clear in the exit qualification.
*
* Note that paddr violations can also be triggered by loading PAE page tables
* with wonky addresses. We don't test that yet.
*
* This function modifies the EPT entry that maps the GPA that the guest page
* table entry mapping ept_access_test_data.gva resides on.
*
* @ept_access EPT permissions to set. Other permissions are cleared.
*
* @pte_ad Set the A/D bits on the guest PTE accordingly.
*
* @op Guest operation to perform with
* ept_access_test_data.gva.
*
* @expect_violation
* Is a violation expected during the paddr access?
*
* @expected_qual Expected qualification for the EPT violation.
* EPT_VLT_PADDR should be clear.
*/
static void ept_access_paddr(unsigned long ept_access, unsigned long pte_ad,
enum ept_access_op op, bool expect_violation,
u64 expected_qual)
{
struct ept_access_test_data *data = &ept_access_test_data;
unsigned long *ptep;
unsigned long gpa;
unsigned long orig_epte;
unsigned long epte;
int i;
/* Modify the guest PTE mapping data->gva according to @pte_ad. */
ptep = get_pte_level(current_page_table(), data->gva, /*level=*/1);
TEST_ASSERT(ptep);
TEST_ASSERT_EQ(*ptep & PT_ADDR_MASK, data->gpa);
*ptep = (*ptep & ~PT_AD_MASK) | pte_ad;
ept_access_test_guest_flush_tlb();
/*
* Now modify the access bits on the EPT entry for the GPA that the
* guest PTE resides on. Note that by modifying a single EPT entry,
* we're potentially affecting 512 guest PTEs. However, we've carefully
* constructed our test such that those other 511 PTEs aren't used by
* the guest: data->gva is at the beginning of a 1G huge page, thus the
* PTE we're modifying is at the beginning of a 4K page and the
* following 511 entires are also under our control (and not touched by
* the guest).
*/
gpa = virt_to_phys(ptep);
TEST_ASSERT_EQ(gpa & ~PAGE_MASK, 0);
/*
* Make sure the guest page table page is mapped with a 4K EPT entry,
* otherwise our level=1 twiddling below will fail. We use the
* identity map (gpa = gpa) since page tables are shared with the host.
*/
install_ept(pml4, gpa, gpa, EPT_PRESENT);
orig_epte = ept_twiddle(gpa, /*mkhuge=*/0, /*level=*/1,
/*clear=*/EPT_PRESENT, /*set=*/ept_access);
if (expect_violation) {
do_ept_violation(/*leaf=*/true, op,
expected_qual | EPT_VLT_LADDR_VLD, gpa);
ept_untwiddle(gpa, /*level=*/1, orig_epte);
do_ept_access_op(op);
} else {
do_ept_access_op(op);
if (ept_ad_enabled()) {
for (i = EPT_PAGE_LEVEL; i > 0; i--) {
TEST_ASSERT(get_ept_pte(pml4, gpa, i, &epte));
TEST_ASSERT(epte & EPT_ACCESS_FLAG);
if (i == 1)
TEST_ASSERT(epte & EPT_DIRTY_FLAG);
else
TEST_ASSERT_EQ(epte & EPT_DIRTY_FLAG, 0);
}
}
ept_untwiddle(gpa, /*level=*/1, orig_epte);
}
TEST_ASSERT(*ptep & PT_ACCESSED_MASK);
if ((pte_ad & PT_DIRTY_MASK) || op == OP_WRITE)
TEST_ASSERT(*ptep & PT_DIRTY_MASK);
skip_exit_vmcall();
}
static void ept_access_allowed_paddr(unsigned long ept_access,
unsigned long pte_ad,
enum ept_access_op op)
{
ept_access_paddr(ept_access, pte_ad, op, /*expect_violation=*/false,
/*expected_qual=*/-1);
}
static void ept_access_violation_paddr(unsigned long ept_access,
unsigned long pte_ad,
enum ept_access_op op,
u64 expected_qual)
{
ept_access_paddr(ept_access, pte_ad, op, /*expect_violation=*/true,
expected_qual);
}
static void ept_allowed_at_level_mkhuge(bool mkhuge, int level,
unsigned long clear,
unsigned long set,
enum ept_access_op op)
{
struct ept_access_test_data *data = &ept_access_test_data;
unsigned long orig_pte;
orig_pte = ept_twiddle(data->gpa, mkhuge, level, clear, set);
/* No violation. Should proceed to vmcall. */
do_ept_access_op(op);
skip_exit_vmcall();
ept_untwiddle(data->gpa, level, orig_pte);
}
static void ept_allowed_at_level(int level, unsigned long clear,
unsigned long set, enum ept_access_op op)
{
ept_allowed_at_level_mkhuge(false, level, clear, set, op);
if (ept_huge_pages_supported(level))
ept_allowed_at_level_mkhuge(true, level, clear, set, op);
}
static void ept_allowed(unsigned long clear, unsigned long set,
enum ept_access_op op)
{
ept_allowed_at_level(1, clear, set, op);
ept_allowed_at_level(2, clear, set, op);
ept_allowed_at_level(3, clear, set, op);
ept_allowed_at_level(4, clear, set, op);
}
static void ept_ignored_bit(int bit)
{
/* Set the bit. */
ept_allowed(0, 1ul << bit, OP_READ);
ept_allowed(0, 1ul << bit, OP_WRITE);
ept_allowed(0, 1ul << bit, OP_EXEC);
/* Clear the bit. */
ept_allowed(1ul << bit, 0, OP_READ);
ept_allowed(1ul << bit, 0, OP_WRITE);
ept_allowed(1ul << bit, 0, OP_EXEC);
}
static void ept_access_allowed(unsigned long access, enum ept_access_op op)
{
ept_allowed(EPT_PRESENT, access, op);
}
static void ept_misconfig_at_level_mkhuge_op(bool mkhuge, int level,
unsigned long clear,
unsigned long set,
enum ept_access_op op)
{
struct ept_access_test_data *data = &ept_access_test_data;
unsigned long orig_pte;
orig_pte = ept_twiddle(data->gpa, mkhuge, level, clear, set);
do_ept_access_op(op);
assert_exit_reason(VMX_EPT_MISCONFIG);
/* Intel 27.2.1, "For all other VM exits, this field is cleared." */
#if 0
/* broken: */
TEST_EXPECT_EQ_MSG(vmcs_read(EXI_QUALIFICATION), 0);
#endif
#if 0
/*
* broken:
* According to description of exit qual for EPT violation,
* EPT_VLT_LADDR_VLD indicates if GUEST_LINEAR_ADDRESS is valid.
* However, I can't find anything that says GUEST_LINEAR_ADDRESS ought
* to be set for msiconfig.
*/
TEST_EXPECT_EQ(vmcs_read(GUEST_LINEAR_ADDRESS),
(unsigned long) (
op == OP_EXEC ? data->gva + 1 : data->gva));
#endif
/* Fix the violation and resume the op loop. */
ept_untwiddle(data->gpa, level, orig_pte);
enter_guest();
skip_exit_vmcall();
}
static void ept_misconfig_at_level_mkhuge(bool mkhuge, int level,
unsigned long clear,
unsigned long set)
{
/* The op shouldn't matter (read, write, exec), so try them all! */
ept_misconfig_at_level_mkhuge_op(mkhuge, level, clear, set, OP_READ);
ept_misconfig_at_level_mkhuge_op(mkhuge, level