| /* |
| * qemu/kvm integration |
| * |
| * Copyright (C) 2006-2008 Qumranet Technologies |
| * |
| * Licensed under the terms of the GNU GPL version 2 or higher. |
| */ |
| #include "config.h" |
| #include "config-host.h" |
| |
| #include <assert.h> |
| #include <string.h> |
| #include "hw/hw.h" |
| #include "sysemu.h" |
| #include "qemu-common.h" |
| #include "console.h" |
| #include "block.h" |
| #include "compatfd.h" |
| #include "gdbstub.h" |
| |
| #include "qemu-kvm.h" |
| #include "libkvm.h" |
| |
| #include <pthread.h> |
| #include <sys/utsname.h> |
| #include <sys/syscall.h> |
| #include <sys/mman.h> |
| #include <sys/ioctl.h> |
| #include <signal.h> |
| |
| #define false 0 |
| #define true 1 |
| |
| #define EXPECTED_KVM_API_VERSION 12 |
| |
| #if EXPECTED_KVM_API_VERSION != KVM_API_VERSION |
| #error libkvm: userspace and kernel version mismatch |
| #endif |
| |
| int kvm_allowed = 1; |
| int kvm_irqchip = 1; |
| int kvm_pit = 1; |
| int kvm_pit_reinject = 1; |
| int kvm_nested = 0; |
| |
| |
| KVMState *kvm_state; |
| kvm_context_t kvm_context; |
| |
| pthread_mutex_t qemu_mutex = PTHREAD_MUTEX_INITIALIZER; |
| pthread_cond_t qemu_vcpu_cond = PTHREAD_COND_INITIALIZER; |
| pthread_cond_t qemu_system_cond = PTHREAD_COND_INITIALIZER; |
| pthread_cond_t qemu_pause_cond = PTHREAD_COND_INITIALIZER; |
| pthread_cond_t qemu_work_cond = PTHREAD_COND_INITIALIZER; |
| __thread CPUState *current_env; |
| |
| static int qemu_system_ready; |
| |
| #define SIG_IPI (SIGRTMIN+4) |
| |
| pthread_t io_thread; |
| static int io_thread_fd = -1; |
| static int io_thread_sigfd = -1; |
| |
| static CPUState *kvm_debug_cpu_requested; |
| |
| static uint64_t phys_ram_size; |
| |
| /* The list of ioperm_data */ |
| static LIST_HEAD(, ioperm_data) ioperm_head; |
| |
| //#define DEBUG_MEMREG |
| #ifdef DEBUG_MEMREG |
| #define DPRINTF(fmt, args...) \ |
| do { fprintf(stderr, "%s:%d " fmt , __func__, __LINE__, ##args); } while (0) |
| #else |
| #define DPRINTF(fmt, args...) do {} while (0) |
| #endif |
| |
| #define ALIGN(x, y) (((x)+(y)-1) & ~((y)-1)) |
| |
| int kvm_abi = EXPECTED_KVM_API_VERSION; |
| int kvm_page_size; |
| |
| #ifdef KVM_CAP_SET_GUEST_DEBUG |
| static int kvm_debug(void *opaque, void *data, |
| struct kvm_debug_exit_arch *arch_info) |
| { |
| int handle = kvm_arch_debug(arch_info); |
| CPUState *env = data; |
| |
| if (handle) { |
| kvm_debug_cpu_requested = env; |
| env->stopped = 1; |
| } |
| return handle; |
| } |
| #endif |
| |
| int kvm_mmio_read(void *opaque, uint64_t addr, uint8_t *data, int len) |
| { |
| cpu_physical_memory_rw(addr, data, len, 0); |
| return 0; |
| } |
| |
| int kvm_mmio_write(void *opaque, uint64_t addr, uint8_t *data, int len) |
| { |
| cpu_physical_memory_rw(addr, data, len, 1); |
| return 0; |
| } |
| |
| static int handle_unhandled(uint64_t reason) |
| { |
| fprintf(stderr, "kvm: unhandled exit %"PRIx64"\n", reason); |
| return -EINVAL; |
| } |
| |
| |
| static inline void set_gsi(kvm_context_t kvm, unsigned int gsi) |
| { |
| uint32_t *bitmap = kvm->used_gsi_bitmap; |
| |
| if (gsi < kvm->max_gsi) |
| bitmap[gsi / 32] |= 1U << (gsi % 32); |
| else |
| DPRINTF("Invalid GSI %d\n"); |
| } |
| |
| static inline void clear_gsi(kvm_context_t kvm, unsigned int gsi) |
| { |
| uint32_t *bitmap = kvm->used_gsi_bitmap; |
| |
| if (gsi < kvm->max_gsi) |
| bitmap[gsi / 32] &= ~(1U << (gsi % 32)); |
| else |
| DPRINTF("Invalid GSI %d\n"); |
| } |
| |
| struct slot_info { |
| unsigned long phys_addr; |
| unsigned long len; |
| unsigned long userspace_addr; |
| unsigned flags; |
| int logging_count; |
| }; |
| |
| struct slot_info slots[KVM_MAX_NUM_MEM_REGIONS]; |
| |
| static void init_slots(void) |
| { |
| int i; |
| |
| for (i = 0; i < KVM_MAX_NUM_MEM_REGIONS; ++i) |
| slots[i].len = 0; |
| } |
| |
| static int get_free_slot(kvm_context_t kvm) |
| { |
| int i; |
| int tss_ext; |
| |
| #if defined(KVM_CAP_SET_TSS_ADDR) && !defined(__s390__) |
| tss_ext = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_SET_TSS_ADDR); |
| #else |
| tss_ext = 0; |
| #endif |
| |
| /* |
| * on older kernels where the set tss ioctl is not supprted we must save |
| * slot 0 to hold the extended memory, as the vmx will use the last 3 |
| * pages of this slot. |
| */ |
| if (tss_ext > 0) |
| i = 0; |
| else |
| i = 1; |
| |
| for (; i < KVM_MAX_NUM_MEM_REGIONS; ++i) |
| if (!slots[i].len) |
| return i; |
| return -1; |
| } |
| |
| static void register_slot(int slot, unsigned long phys_addr, unsigned long len, |
| unsigned long userspace_addr, unsigned flags) |
| { |
| slots[slot].phys_addr = phys_addr; |
| slots[slot].len = len; |
| slots[slot].userspace_addr = userspace_addr; |
| slots[slot].flags = flags; |
| } |
| |
| static void free_slot(int slot) |
| { |
| slots[slot].len = 0; |
| slots[slot].logging_count = 0; |
| } |
| |
| static int get_slot(unsigned long phys_addr) |
| { |
| int i; |
| |
| for (i = 0; i < KVM_MAX_NUM_MEM_REGIONS ; ++i) { |
| if (slots[i].len && slots[i].phys_addr <= phys_addr && |
| (slots[i].phys_addr + slots[i].len-1) >= phys_addr) |
| return i; |
| } |
| return -1; |
| } |
| |
| /* Returns -1 if this slot is not totally contained on any other, |
| * and the number of the slot otherwise */ |
| static int get_container_slot(uint64_t phys_addr, unsigned long size) |
| { |
| int i; |
| |
| for (i = 0; i < KVM_MAX_NUM_MEM_REGIONS ; ++i) |
| if (slots[i].len && slots[i].phys_addr <= phys_addr && |
| (slots[i].phys_addr + slots[i].len) >= phys_addr + size) |
| return i; |
| return -1; |
| } |
| |
| int kvm_is_containing_region(kvm_context_t kvm, unsigned long phys_addr, unsigned long size) |
| { |
| int slot = get_container_slot(phys_addr, size); |
| if (slot == -1) |
| return 0; |
| return 1; |
| } |
| |
| /* |
| * dirty pages logging control |
| */ |
| static int kvm_dirty_pages_log_change(kvm_context_t kvm, |
| unsigned long phys_addr, |
| unsigned flags, |
| unsigned mask) |
| { |
| int r = -1; |
| int slot = get_slot(phys_addr); |
| |
| if (slot == -1) { |
| fprintf(stderr, "BUG: %s: invalid parameters\n", __FUNCTION__); |
| return 1; |
| } |
| |
| flags = (slots[slot].flags & ~mask) | flags; |
| if (flags == slots[slot].flags) |
| return 0; |
| slots[slot].flags = flags; |
| |
| { |
| struct kvm_userspace_memory_region mem = { |
| .slot = slot, |
| .memory_size = slots[slot].len, |
| .guest_phys_addr = slots[slot].phys_addr, |
| .userspace_addr = slots[slot].userspace_addr, |
| .flags = slots[slot].flags, |
| }; |
| |
| |
| DPRINTF("slot %d start %llx len %llx flags %x\n", |
| mem.slot, |
| mem.guest_phys_addr, |
| mem.memory_size, |
| mem.flags); |
| r = kvm_vm_ioctl(kvm_state, KVM_SET_USER_MEMORY_REGION, &mem); |
| if (r < 0) |
| fprintf(stderr, "%s: %m\n", __FUNCTION__); |
| } |
| return r; |
| } |
| |
| static int kvm_dirty_pages_log_change_all(kvm_context_t kvm, |
| int (*change)(kvm_context_t kvm, |
| uint64_t start, |
| uint64_t len)) |
| { |
| int i, r; |
| |
| for (i=r=0; i<KVM_MAX_NUM_MEM_REGIONS && r==0; i++) { |
| if (slots[i].len) |
| r = change(kvm, slots[i].phys_addr, slots[i].len); |
| } |
| return r; |
| } |
| |
| int kvm_dirty_pages_log_enable_slot(kvm_context_t kvm, |
| uint64_t phys_addr, |
| uint64_t len) |
| { |
| int slot = get_slot(phys_addr); |
| |
| DPRINTF("start %"PRIx64" len %"PRIx64"\n", phys_addr, len); |
| if (slot == -1) { |
| fprintf(stderr, "BUG: %s: invalid parameters\n", __func__); |
| return -EINVAL; |
| } |
| |
| if (slots[slot].logging_count++) |
| return 0; |
| |
| return kvm_dirty_pages_log_change(kvm, slots[slot].phys_addr, |
| KVM_MEM_LOG_DIRTY_PAGES, |
| KVM_MEM_LOG_DIRTY_PAGES); |
| } |
| |
| int kvm_dirty_pages_log_disable_slot(kvm_context_t kvm, |
| uint64_t phys_addr, |
| uint64_t len) |
| { |
| int slot = get_slot(phys_addr); |
| |
| if (slot == -1) { |
| fprintf(stderr, "BUG: %s: invalid parameters\n", __func__); |
| return -EINVAL; |
| } |
| |
| if (--slots[slot].logging_count) |
| return 0; |
| |
| return kvm_dirty_pages_log_change(kvm, slots[slot].phys_addr, |
| 0, |
| KVM_MEM_LOG_DIRTY_PAGES); |
| } |
| |
| /** |
| * Enable dirty page logging for all memory regions |
| */ |
| int kvm_dirty_pages_log_enable_all(kvm_context_t kvm) |
| { |
| if (kvm->dirty_pages_log_all) |
| return 0; |
| kvm->dirty_pages_log_all = 1; |
| return kvm_dirty_pages_log_change_all(kvm, |
| kvm_dirty_pages_log_enable_slot); |
| } |
| |
| /** |
| * Enable dirty page logging only for memory regions that were created with |
| * dirty logging enabled (disable for all other memory regions). |
| */ |
| int kvm_dirty_pages_log_reset(kvm_context_t kvm) |
| { |
| if (!kvm->dirty_pages_log_all) |
| return 0; |
| kvm->dirty_pages_log_all = 0; |
| return kvm_dirty_pages_log_change_all(kvm, |
| kvm_dirty_pages_log_disable_slot); |
| } |
| |
| |
| static int kvm_create_context(void); |
| |
| int kvm_init(int smp_cpus) |
| { |
| int fd; |
| int r, gsi_count; |
| |
| |
| fd = open("/dev/kvm", O_RDWR); |
| if (fd == -1) { |
| perror("open /dev/kvm"); |
| return -1; |
| } |
| r = ioctl(fd, KVM_GET_API_VERSION, 0); |
| if (r == -1) { |
| fprintf(stderr, "kvm kernel version too old: " |
| "KVM_GET_API_VERSION ioctl not supported\n"); |
| goto out_close; |
| } |
| if (r < EXPECTED_KVM_API_VERSION) { |
| fprintf(stderr, "kvm kernel version too old: " |
| "We expect API version %d or newer, but got " |
| "version %d\n", |
| EXPECTED_KVM_API_VERSION, r); |
| goto out_close; |
| } |
| if (r > EXPECTED_KVM_API_VERSION) { |
| fprintf(stderr, "kvm userspace version too old\n"); |
| goto out_close; |
| } |
| kvm_abi = r; |
| kvm_page_size = getpagesize(); |
| kvm_state = qemu_mallocz(sizeof(*kvm_state)); |
| kvm_context = &kvm_state->kvm_context; |
| |
| kvm_state->fd = fd; |
| kvm_state->vmfd = -1; |
| kvm_context->opaque = cpu_single_env; |
| kvm_context->dirty_pages_log_all = 0; |
| kvm_context->no_irqchip_creation = 0; |
| kvm_context->no_pit_creation = 0; |
| |
| #ifdef KVM_CAP_SET_GUEST_DEBUG |
| TAILQ_INIT(&kvm_state->kvm_sw_breakpoints); |
| #endif |
| |
| gsi_count = kvm_get_gsi_count(kvm_context); |
| if (gsi_count > 0) { |
| int gsi_bits, i; |
| |
| /* Round up so we can search ints using ffs */ |
| gsi_bits = ALIGN(gsi_count, 32); |
| kvm_context->used_gsi_bitmap = qemu_mallocz(gsi_bits / 8); |
| kvm_context->max_gsi = gsi_bits; |
| |
| /* Mark any over-allocated bits as already in use */ |
| for (i = gsi_count; i < gsi_bits; i++) |
| set_gsi(kvm_context, i); |
| } |
| |
| pthread_mutex_lock(&qemu_mutex); |
| return kvm_create_context(); |
| |
| out_close: |
| close(fd); |
| return -1; |
| } |
| |
| static void kvm_finalize(KVMState *s) |
| { |
| /* FIXME |
| if (kvm->vcpu_fd[0] != -1) |
| close(kvm->vcpu_fd[0]); |
| if (kvm->vm_fd != -1) |
| close(kvm->vm_fd); |
| */ |
| close(s->fd); |
| free(s); |
| } |
| |
| void kvm_disable_irqchip_creation(kvm_context_t kvm) |
| { |
| kvm->no_irqchip_creation = 1; |
| } |
| |
| void kvm_disable_pit_creation(kvm_context_t kvm) |
| { |
| kvm->no_pit_creation = 1; |
| } |
| |
| kvm_vcpu_context_t kvm_create_vcpu(CPUState *env, int id) |
| { |
| long mmap_size; |
| int r; |
| kvm_vcpu_context_t vcpu_ctx = qemu_malloc(sizeof(struct kvm_vcpu_context)); |
| kvm_context_t kvm = kvm_context; |
| |
| vcpu_ctx->kvm = kvm; |
| vcpu_ctx->id = id; |
| |
| r = kvm_vm_ioctl(kvm_state, KVM_CREATE_VCPU, id); |
| if (r < 0) { |
| fprintf(stderr, "kvm_create_vcpu: %m\n"); |
| goto err; |
| } |
| vcpu_ctx->fd = r; |
| |
| env->kvm_fd = r; |
| env->kvm_state = kvm_state; |
| |
| mmap_size = kvm_ioctl(kvm_state, KVM_GET_VCPU_MMAP_SIZE, 0); |
| if (mmap_size < 0) { |
| fprintf(stderr, "get vcpu mmap size: %m\n"); |
| goto err_fd; |
| } |
| vcpu_ctx->run = mmap(NULL, mmap_size, PROT_READ|PROT_WRITE, MAP_SHARED, |
| vcpu_ctx->fd, 0); |
| if (vcpu_ctx->run == MAP_FAILED) { |
| fprintf(stderr, "mmap vcpu area: %m\n"); |
| goto err_fd; |
| } |
| return vcpu_ctx; |
| err_fd: |
| close(vcpu_ctx->fd); |
| err: |
| free(vcpu_ctx); |
| return NULL; |
| } |
| |
| static int kvm_set_boot_vcpu_id(kvm_context_t kvm, uint32_t id) |
| { |
| #ifdef KVM_CAP_SET_BOOT_CPU_ID |
| int r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_SET_BOOT_CPU_ID); |
| if (r > 0) |
| return kvm_vm_ioctl(kvm_state, KVM_SET_BOOT_CPU_ID, id); |
| return -ENOSYS; |
| #else |
| return -ENOSYS; |
| #endif |
| } |
| |
| int kvm_create_vm(kvm_context_t kvm) |
| { |
| int fd; |
| #ifdef KVM_CAP_IRQ_ROUTING |
| kvm->irq_routes = qemu_mallocz(sizeof(*kvm->irq_routes)); |
| kvm->nr_allocated_irq_routes = 0; |
| #endif |
| |
| fd = kvm_ioctl(kvm_state, KVM_CREATE_VM, 0); |
| if (fd < 0) { |
| fprintf(stderr, "kvm_create_vm: %m\n"); |
| return -1; |
| } |
| kvm_state->vmfd = fd; |
| return 0; |
| } |
| |
| static int kvm_create_default_phys_mem(kvm_context_t kvm, |
| unsigned long phys_mem_bytes, |
| void **vm_mem) |
| { |
| #ifdef KVM_CAP_USER_MEMORY |
| int r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_USER_MEMORY); |
| if (r > 0) |
| return 0; |
| fprintf(stderr, "Hypervisor too old: KVM_CAP_USER_MEMORY extension not supported\n"); |
| #else |
| #error Hypervisor too old: KVM_CAP_USER_MEMORY extension not supported |
| #endif |
| return -1; |
| } |
| |
| void kvm_create_irqchip(kvm_context_t kvm) |
| { |
| int r; |
| |
| kvm->irqchip_in_kernel = 0; |
| #ifdef KVM_CAP_IRQCHIP |
| if (!kvm->no_irqchip_creation) { |
| r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_IRQCHIP); |
| if (r > 0) { /* kernel irqchip supported */ |
| r = kvm_vm_ioctl(kvm_state, KVM_CREATE_IRQCHIP); |
| if (r >= 0) { |
| kvm->irqchip_inject_ioctl = KVM_IRQ_LINE; |
| #if defined(KVM_CAP_IRQ_INJECT_STATUS) && defined(KVM_IRQ_LINE_STATUS) |
| r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, |
| KVM_CAP_IRQ_INJECT_STATUS); |
| if (r > 0) |
| kvm->irqchip_inject_ioctl = KVM_IRQ_LINE_STATUS; |
| #endif |
| kvm->irqchip_in_kernel = 1; |
| } |
| else |
| fprintf(stderr, "Create kernel PIC irqchip failed\n"); |
| } |
| } |
| #endif |
| } |
| |
| int kvm_create(kvm_context_t kvm, unsigned long phys_mem_bytes, void **vm_mem) |
| { |
| int r; |
| |
| r = kvm_create_vm(kvm); |
| if (r < 0) |
| return r; |
| r = kvm_arch_create(kvm, phys_mem_bytes, vm_mem); |
| if (r < 0) |
| return r; |
| init_slots(); |
| r = kvm_create_default_phys_mem(kvm, phys_mem_bytes, vm_mem); |
| if (r < 0) |
| return r; |
| kvm_create_irqchip(kvm); |
| |
| return 0; |
| } |
| |
| |
| int kvm_register_phys_mem(kvm_context_t kvm, |
| unsigned long phys_start, void *userspace_addr, |
| unsigned long len, int log) |
| { |
| |
| struct kvm_userspace_memory_region memory = { |
| .memory_size = len, |
| .guest_phys_addr = phys_start, |
| .userspace_addr = (unsigned long)(intptr_t)userspace_addr, |
| .flags = log ? KVM_MEM_LOG_DIRTY_PAGES : 0, |
| }; |
| int r; |
| |
| memory.slot = get_free_slot(kvm); |
| DPRINTF("memory: gpa: %llx, size: %llx, uaddr: %llx, slot: %x, flags: %lx\n", |
| memory.guest_phys_addr, memory.memory_size, |
| memory.userspace_addr, memory.slot, memory.flags); |
| r = kvm_vm_ioctl(kvm_state, KVM_SET_USER_MEMORY_REGION, &memory); |
| if (r < 0) { |
| fprintf(stderr, "create_userspace_phys_mem: %s\n", strerror(-r)); |
| return -1; |
| } |
| register_slot(memory.slot, memory.guest_phys_addr, memory.memory_size, |
| memory.userspace_addr, memory.flags); |
| return 0; |
| } |
| |
| |
| /* destroy/free a whole slot. |
| * phys_start, len and slot are the params passed to kvm_create_phys_mem() |
| */ |
| void kvm_destroy_phys_mem(kvm_context_t kvm, unsigned long phys_start, |
| unsigned long len) |
| { |
| int slot; |
| int r; |
| struct kvm_userspace_memory_region memory = { |
| .memory_size = 0, |
| .guest_phys_addr = phys_start, |
| .userspace_addr = 0, |
| .flags = 0, |
| }; |
| |
| slot = get_slot(phys_start); |
| |
| if ((slot >= KVM_MAX_NUM_MEM_REGIONS) || (slot == -1)) { |
| fprintf(stderr, "BUG: %s: invalid parameters (slot=%d)\n", |
| __FUNCTION__, slot); |
| return; |
| } |
| if (phys_start != slots[slot].phys_addr) { |
| fprintf(stderr, |
| "WARNING: %s: phys_start is 0x%lx expecting 0x%lx\n", |
| __FUNCTION__, phys_start, slots[slot].phys_addr); |
| phys_start = slots[slot].phys_addr; |
| } |
| |
| memory.slot = slot; |
| DPRINTF("slot %d start %llx len %llx flags %x\n", |
| memory.slot, |
| memory.guest_phys_addr, |
| memory.memory_size, |
| memory.flags); |
| r = kvm_vm_ioctl(kvm_state, KVM_SET_USER_MEMORY_REGION, &memory); |
| if (r < 0) { |
| fprintf(stderr, "destroy_userspace_phys_mem: %s", |
| strerror(-r)); |
| return; |
| } |
| |
| free_slot(memory.slot); |
| } |
| |
| void kvm_unregister_memory_area(kvm_context_t kvm, uint64_t phys_addr, unsigned long size) |
| { |
| |
| int slot = get_container_slot(phys_addr, size); |
| |
| if (slot != -1) { |
| DPRINTF("Unregistering memory region %llx (%lx)\n", phys_addr, size); |
| kvm_destroy_phys_mem(kvm, phys_addr, size); |
| return; |
| } |
| } |
| |
| static int kvm_get_map(kvm_context_t kvm, int ioctl_num, int slot, void *buf) |
| { |
| int r; |
| struct kvm_dirty_log log = { |
| .slot = slot, |
| }; |
| |
| log.dirty_bitmap = buf; |
| |
| r = kvm_vm_ioctl(kvm_state, ioctl_num, &log); |
| if (r < 0) |
| return r; |
| return 0; |
| } |
| |
| int kvm_get_dirty_pages(kvm_context_t kvm, unsigned long phys_addr, void *buf) |
| { |
| int slot; |
| |
| slot = get_slot(phys_addr); |
| return kvm_get_map(kvm, KVM_GET_DIRTY_LOG, slot, buf); |
| } |
| |
| int kvm_get_dirty_pages_range(kvm_context_t kvm, unsigned long phys_addr, |
| unsigned long len, void *opaque, |
| int (*cb)(unsigned long start, unsigned long len, |
| void*bitmap, void *opaque)) |
| { |
| int i; |
| int r; |
| unsigned long end_addr = phys_addr + len; |
| void *buf; |
| |
| for (i = 0; i < KVM_MAX_NUM_MEM_REGIONS; ++i) { |
| if ((slots[i].len && (uint64_t)slots[i].phys_addr >= phys_addr) |
| && ((uint64_t)slots[i].phys_addr + slots[i].len <= end_addr)) { |
| buf = qemu_malloc((slots[i].len / 4096 + 7) / 8 + 2); |
| r = kvm_get_map(kvm, KVM_GET_DIRTY_LOG, i, buf); |
| if (r) { |
| qemu_free(buf); |
| return r; |
| } |
| r = cb(slots[i].phys_addr, slots[i].len, buf, opaque); |
| qemu_free(buf); |
| if (r) |
| return r; |
| } |
| } |
| return 0; |
| } |
| |
| #ifdef KVM_CAP_IRQCHIP |
| |
| int kvm_set_irq_level(kvm_context_t kvm, int irq, int level, int *status) |
| { |
| struct kvm_irq_level event; |
| int r; |
| |
| if (!kvm->irqchip_in_kernel) |
| return 0; |
| event.level = level; |
| event.irq = irq; |
| r = kvm_vm_ioctl(kvm_state, kvm->irqchip_inject_ioctl, &event); |
| if (r < 0) |
| perror("kvm_set_irq_level"); |
| |
| if (status) { |
| #ifdef KVM_CAP_IRQ_INJECT_STATUS |
| *status = (kvm->irqchip_inject_ioctl == KVM_IRQ_LINE) ? |
| 1 : event.status; |
| #else |
| *status = 1; |
| #endif |
| } |
| |
| return 1; |
| } |
| |
| int kvm_get_irqchip(kvm_context_t kvm, struct kvm_irqchip *chip) |
| { |
| int r; |
| |
| if (!kvm->irqchip_in_kernel) |
| return 0; |
| r = kvm_vm_ioctl(kvm_state, KVM_GET_IRQCHIP, chip); |
| if (r < 0) { |
| perror("kvm_get_irqchip\n"); |
| } |
| return r; |
| } |
| |
| int kvm_set_irqchip(kvm_context_t kvm, struct kvm_irqchip *chip) |
| { |
| int r; |
| |
| if (!kvm->irqchip_in_kernel) |
| return 0; |
| r = kvm_vm_ioctl(kvm_state, KVM_SET_IRQCHIP, chip); |
| if (r < 0) { |
| perror("kvm_set_irqchip\n"); |
| } |
| return r; |
| } |
| |
| #endif |
| |
| static int handle_io(kvm_vcpu_context_t vcpu) |
| { |
| struct kvm_run *run = vcpu->run; |
| kvm_context_t kvm = vcpu->kvm; |
| uint16_t addr = run->io.port; |
| int i; |
| void *p = (void *)run + run->io.data_offset; |
| |
| for (i = 0; i < run->io.count; ++i) { |
| switch (run->io.direction) { |
| case KVM_EXIT_IO_IN: |
| switch (run->io.size) { |
| case 1: |
| *(uint8_t *)p = cpu_inb(kvm->opaque, addr); |
| break; |
| case 2: |
| *(uint16_t *)p = cpu_inw(kvm->opaque, addr); |
| break; |
| case 4: |
| *(uint32_t *)p = cpu_inl(kvm->opaque, addr); |
| break; |
| default: |
| fprintf(stderr, "bad I/O size %d\n", run->io.size); |
| return -EMSGSIZE; |
| } |
| break; |
| case KVM_EXIT_IO_OUT: |
| switch (run->io.size) { |
| case 1: |
| cpu_outb(kvm->opaque, addr, *(uint8_t *)p); |
| break; |
| case 2: |
| cpu_outw(kvm->opaque, addr, *(uint16_t *)p); |
| break; |
| case 4: |
| cpu_outl(kvm->opaque, addr, *(uint32_t *)p); |
| break; |
| default: |
| fprintf(stderr, "bad I/O size %d\n", run->io.size); |
| return -EMSGSIZE; |
| } |
| break; |
| default: |
| fprintf(stderr, "bad I/O direction %d\n", run->io.direction); |
| return -EPROTO; |
| } |
| |
| p += run->io.size; |
| } |
| |
| return 0; |
| } |
| |
| int handle_debug(kvm_vcpu_context_t vcpu, void *env) |
| { |
| #ifdef KVM_CAP_SET_GUEST_DEBUG |
| struct kvm_run *run = vcpu->run; |
| kvm_context_t kvm = vcpu->kvm; |
| |
| return kvm_debug(kvm->opaque, env, &run->debug.arch); |
| #else |
| return 0; |
| #endif |
| } |
| |
| int kvm_get_regs(kvm_vcpu_context_t vcpu, struct kvm_regs *regs) |
| { |
| return ioctl(vcpu->fd, KVM_GET_REGS, regs); |
| } |
| |
| int kvm_set_regs(kvm_vcpu_context_t vcpu, struct kvm_regs *regs) |
| { |
| return ioctl(vcpu->fd, KVM_SET_REGS, regs); |
| } |
| |
| int kvm_get_fpu(kvm_vcpu_context_t vcpu, struct kvm_fpu *fpu) |
| { |
| return ioctl(vcpu->fd, KVM_GET_FPU, fpu); |
| } |
| |
| int kvm_set_fpu(kvm_vcpu_context_t vcpu, struct kvm_fpu *fpu) |
| { |
| return ioctl(vcpu->fd, KVM_SET_FPU, fpu); |
| } |
| |
| int kvm_get_sregs(kvm_vcpu_context_t vcpu, struct kvm_sregs *sregs) |
| { |
| return ioctl(vcpu->fd, KVM_GET_SREGS, sregs); |
| } |
| |
| int kvm_set_sregs(kvm_vcpu_context_t vcpu, struct kvm_sregs *sregs) |
| { |
| return ioctl(vcpu->fd, KVM_SET_SREGS, sregs); |
| } |
| |
| #ifdef KVM_CAP_MP_STATE |
| int kvm_get_mpstate(kvm_vcpu_context_t vcpu, struct kvm_mp_state *mp_state) |
| { |
| int r; |
| |
| r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_MP_STATE); |
| if (r > 0) |
| return ioctl(vcpu->fd, KVM_GET_MP_STATE, mp_state); |
| return -ENOSYS; |
| } |
| |
| int kvm_set_mpstate(kvm_vcpu_context_t vcpu, struct kvm_mp_state *mp_state) |
| { |
| int r; |
| |
| r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_MP_STATE); |
| if (r > 0) |
| return ioctl(vcpu->fd, KVM_SET_MP_STATE, mp_state); |
| return -ENOSYS; |
| } |
| #endif |
| |
| static int handle_mmio(kvm_vcpu_context_t vcpu) |
| { |
| unsigned long addr = vcpu->run->mmio.phys_addr; |
| kvm_context_t kvm = vcpu->kvm; |
| struct kvm_run *kvm_run = vcpu->run; |
| void *data = kvm_run->mmio.data; |
| |
| /* hack: Red Hat 7.1 generates these weird accesses. */ |
| if ((addr > 0xa0000-4 && addr <= 0xa0000) && kvm_run->mmio.len == 3) |
| return 0; |
| |
| if (kvm_run->mmio.is_write) |
| return kvm_mmio_write(kvm->opaque, addr, data, |
| kvm_run->mmio.len); |
| else |
| return kvm_mmio_read(kvm->opaque, addr, data, |
| kvm_run->mmio.len); |
| } |
| |
| int handle_io_window(kvm_context_t kvm) |
| { |
| return 1; |
| } |
| |
| int handle_halt(kvm_vcpu_context_t vcpu) |
| { |
| return kvm_arch_halt(vcpu->kvm->opaque, vcpu); |
| } |
| |
| int handle_shutdown(kvm_context_t kvm, CPUState *env) |
| { |
| /* stop the current vcpu from going back to guest mode */ |
| env->stopped = 1; |
| |
| qemu_system_reset_request(); |
| return 1; |
| } |
| |
| static inline void push_nmi(kvm_context_t kvm) |
| { |
| #ifdef KVM_CAP_USER_NMI |
| kvm_arch_push_nmi(kvm->opaque); |
| #endif /* KVM_CAP_USER_NMI */ |
| } |
| |
| void post_kvm_run(kvm_context_t kvm, CPUState *env) |
| { |
| pthread_mutex_lock(&qemu_mutex); |
| kvm_arch_post_kvm_run(kvm->opaque, env); |
| } |
| |
| int pre_kvm_run(kvm_context_t kvm, CPUState *env) |
| { |
| kvm_arch_pre_kvm_run(kvm->opaque, env); |
| |
| pthread_mutex_unlock(&qemu_mutex); |
| return 0; |
| } |
| |
| int kvm_get_interrupt_flag(kvm_vcpu_context_t vcpu) |
| { |
| return vcpu->run->if_flag; |
| } |
| |
| int kvm_is_ready_for_interrupt_injection(kvm_vcpu_context_t vcpu) |
| { |
| return vcpu->run->ready_for_interrupt_injection; |
| } |
| |
| int kvm_run(kvm_vcpu_context_t vcpu, void *env) |
| { |
| int r; |
| int fd = vcpu->fd; |
| struct kvm_run *run = vcpu->run; |
| kvm_context_t kvm = vcpu->kvm; |
| |
| again: |
| push_nmi(kvm); |
| #if !defined(__s390__) |
| if (!kvm->irqchip_in_kernel) |
| run->request_interrupt_window = kvm_arch_try_push_interrupts(env); |
| #endif |
| r = pre_kvm_run(kvm, env); |
| if (r) |
| return r; |
| r = ioctl(fd, KVM_RUN, 0); |
| |
| if (r == -1 && errno != EINTR && errno != EAGAIN) { |
| r = -errno; |
| post_kvm_run(kvm, env); |
| fprintf(stderr, "kvm_run: %s\n", strerror(-r)); |
| return r; |
| } |
| |
| post_kvm_run(kvm, env); |
| |
| #if defined(KVM_CAP_COALESCED_MMIO) |
| if (kvm->coalesced_mmio) { |
| struct kvm_coalesced_mmio_ring *ring = (void *)run + |
| kvm->coalesced_mmio * PAGE_SIZE; |
| while (ring->first != ring->last) { |
| kvm_mmio_write(kvm->opaque, |
| ring->coalesced_mmio[ring->first].phys_addr, |
| &ring->coalesced_mmio[ring->first].data[0], |
| ring->coalesced_mmio[ring->first].len); |
| smp_wmb(); |
| ring->first = (ring->first + 1) % |
| KVM_COALESCED_MMIO_MAX; |
| } |
| } |
| #endif |
| |
| #if !defined(__s390__) |
| if (r == -1) { |
| r = handle_io_window(kvm); |
| goto more; |
| } |
| #endif |
| if (1) { |
| switch (run->exit_reason) { |
| case KVM_EXIT_UNKNOWN: |
| r = handle_unhandled(run->hw.hardware_exit_reason); |
| break; |
| case KVM_EXIT_FAIL_ENTRY: |
| r = handle_unhandled(run->fail_entry.hardware_entry_failure_reason); |
| break; |
| case KVM_EXIT_EXCEPTION: |
| fprintf(stderr, "exception %d (%x)\n", |
| run->ex.exception, |
| run->ex.error_code); |
| kvm_show_regs(vcpu); |
| kvm_show_code(vcpu); |
| abort(); |
| break; |
| case KVM_EXIT_IO: |
| r = handle_io(vcpu); |
| break; |
| case KVM_EXIT_DEBUG: |
| r = handle_debug(vcpu, env); |
| break; |
| case KVM_EXIT_MMIO: |
| r = handle_mmio(vcpu); |
| break; |
| case KVM_EXIT_HLT: |
| r = handle_halt(vcpu); |
| break; |
| case KVM_EXIT_IRQ_WINDOW_OPEN: |
| break; |
| case KVM_EXIT_SHUTDOWN: |
| r = handle_shutdown(kvm, env); |
| break; |
| #if defined(__s390__) |
| case KVM_EXIT_S390_SIEIC: |
| r = kvm_s390_handle_intercept(kvm, vcpu, |
| run); |
| break; |
| case KVM_EXIT_S390_RESET: |
| r = kvm_s390_handle_reset(kvm, vcpu, run); |
| break; |
| #endif |
| default: |
| if (kvm_arch_run(vcpu)) { |
| fprintf(stderr, "unhandled vm exit: 0x%x\n", |
| run->exit_reason); |
| kvm_show_regs(vcpu); |
| abort(); |
| } |
| break; |
| } |
| } |
| more: |
| if (!r) |
| goto again; |
| return r; |
| } |
| |
| int kvm_inject_irq(kvm_vcpu_context_t vcpu, unsigned irq) |
| { |
| struct kvm_interrupt intr; |
| |
| intr.irq = irq; |
| return ioctl(vcpu->fd, KVM_INTERRUPT, &intr); |
| } |
| |
| #ifdef KVM_CAP_SET_GUEST_DEBUG |
| int kvm_set_guest_debug(kvm_vcpu_context_t vcpu, struct kvm_guest_debug *dbg) |
| { |
| return ioctl(vcpu->fd, KVM_SET_GUEST_DEBUG, dbg); |
| } |
| #endif |
| |
| int kvm_set_signal_mask(kvm_vcpu_context_t vcpu, const sigset_t *sigset) |
| { |
| struct kvm_signal_mask *sigmask; |
| int r; |
| |
| if (!sigset) { |
| r = ioctl(vcpu->fd, KVM_SET_SIGNAL_MASK, NULL); |
| if (r == -1) |
| r = -errno; |
| return r; |
| } |
| sigmask = qemu_malloc(sizeof(*sigmask) + sizeof(*sigset)); |
| |
| sigmask->len = 8; |
| memcpy(sigmask->sigset, sigset, sizeof(*sigset)); |
| r = ioctl(vcpu->fd, KVM_SET_SIGNAL_MASK, sigmask); |
| if (r == -1) |
| r = -errno; |
| free(sigmask); |
| return r; |
| } |
| |
| int kvm_irqchip_in_kernel(kvm_context_t kvm) |
| { |
| return kvm->irqchip_in_kernel; |
| } |
| |
| int kvm_pit_in_kernel(kvm_context_t kvm) |
| { |
| return kvm->pit_in_kernel; |
| } |
| |
| int kvm_has_sync_mmu(void) |
| { |
| int r = 0; |
| #ifdef KVM_CAP_SYNC_MMU |
| r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_SYNC_MMU); |
| #endif |
| return r; |
| } |
| |
| int kvm_inject_nmi(kvm_vcpu_context_t vcpu) |
| { |
| #ifdef KVM_CAP_USER_NMI |
| return ioctl(vcpu->fd, KVM_NMI); |
| #else |
| return -ENOSYS; |
| #endif |
| } |
| |
| int kvm_init_coalesced_mmio(kvm_context_t kvm) |
| { |
| int r = 0; |
| kvm->coalesced_mmio = 0; |
| #ifdef KVM_CAP_COALESCED_MMIO |
| r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_COALESCED_MMIO); |
| if (r > 0) { |
| kvm->coalesced_mmio = r; |
| return 0; |
| } |
| #endif |
| return r; |
| } |
| |
| int kvm_coalesce_mmio_region(target_phys_addr_t addr, ram_addr_t size) |
| { |
| #ifdef KVM_CAP_COALESCED_MMIO |
| kvm_context_t kvm = kvm_context; |
| struct kvm_coalesced_mmio_zone zone; |
| int r; |
| |
| if (kvm->coalesced_mmio) { |
| |
| zone.addr = addr; |
| zone.size = size; |
| |
| r = kvm_vm_ioctl(kvm_state, KVM_REGISTER_COALESCED_MMIO, &zone); |
| if (r < 0) { |
| perror("kvm_register_coalesced_mmio_zone"); |
| return r; |
| } |
| return 0; |
| } |
| #endif |
| return -ENOSYS; |
| } |
| |
| int kvm_uncoalesce_mmio_region(target_phys_addr_t addr, ram_addr_t size) |
| { |
| #ifdef KVM_CAP_COALESCED_MMIO |
| kvm_context_t kvm = kvm_context; |
| struct kvm_coalesced_mmio_zone zone; |
| int r; |
| |
| if (kvm->coalesced_mmio) { |
| |
| zone.addr = addr; |
| zone.size = size; |
| |
| r = kvm_vm_ioctl(kvm_state, KVM_UNREGISTER_COALESCED_MMIO, &zone); |
| if (r < 0) { |
| perror("kvm_unregister_coalesced_mmio_zone"); |
| return r; |
| } |
| DPRINTF("Unregistered coalesced mmio region for %llx (%lx)\n", addr, size); |
| return 0; |
| } |
| #endif |
| return -ENOSYS; |
| } |
| |
| #ifdef KVM_CAP_DEVICE_ASSIGNMENT |
| int kvm_assign_pci_device(kvm_context_t kvm, |
| struct kvm_assigned_pci_dev *assigned_dev) |
| { |
| return kvm_vm_ioctl(kvm_state, KVM_ASSIGN_PCI_DEVICE, assigned_dev); |
| } |
| |
| static int kvm_old_assign_irq(kvm_context_t kvm, |
| struct kvm_assigned_irq *assigned_irq) |
| { |
| return kvm_vm_ioctl(kvm_state, KVM_ASSIGN_IRQ, assigned_irq); |
| } |
| |
| #ifdef KVM_CAP_ASSIGN_DEV_IRQ |
| int kvm_assign_irq(kvm_context_t kvm, |
| struct kvm_assigned_irq *assigned_irq) |
| { |
| int ret; |
| |
| ret = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_ASSIGN_DEV_IRQ); |
| if (ret > 0) { |
| return kvm_vm_ioctl(kvm_state, KVM_ASSIGN_DEV_IRQ, assigned_irq); |
| } |
| |
| return kvm_old_assign_irq(kvm, assigned_irq); |
| } |
| |
| int kvm_deassign_irq(kvm_context_t kvm, |
| struct kvm_assigned_irq *assigned_irq) |
| { |
| return kvm_vm_ioctl(kvm_state, KVM_DEASSIGN_DEV_IRQ, assigned_irq); |
| } |
| #else |
| int kvm_assign_irq(kvm_context_t kvm, |
| struct kvm_assigned_irq *assigned_irq) |
| { |
| return kvm_old_assign_irq(kvm, assigned_irq); |
| } |
| #endif |
| #endif |
| |
| #ifdef KVM_CAP_DEVICE_DEASSIGNMENT |
| int kvm_deassign_pci_device(kvm_context_t kvm, |
| struct kvm_assigned_pci_dev *assigned_dev) |
| { |
| return kvm_vm_ioctl(kvm_state, KVM_DEASSIGN_PCI_DEVICE, assigned_dev); |
| } |
| #endif |
| |
| int kvm_destroy_memory_region_works(kvm_context_t kvm) |
| { |
| int ret = 0; |
| |
| #ifdef KVM_CAP_DESTROY_MEMORY_REGION_WORKS |
| ret = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, |
| KVM_CAP_DESTROY_MEMORY_REGION_WORKS); |
| if (ret <= 0) |
| ret = 0; |
| #endif |
| return ret; |
| } |
| |
| int kvm_reinject_control(kvm_context_t kvm, int pit_reinject) |
| { |
| #ifdef KVM_CAP_REINJECT_CONTROL |
| int r; |
| struct kvm_reinject_control control; |
| |
| control.pit_reinject = pit_reinject; |
| |
| r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_REINJECT_CONTROL); |
| if (r > 0) { |
| return kvm_vm_ioctl(kvm_state, KVM_REINJECT_CONTROL, &control); |
| } |
| #endif |
| return -ENOSYS; |
| } |
| |
| int kvm_has_gsi_routing(kvm_context_t kvm) |
| { |
| int r = 0; |
| |
| #ifdef KVM_CAP_IRQ_ROUTING |
| r = kvm_check_extension(kvm_state, KVM_CAP_IRQ_ROUTING); |
| #endif |
| return r; |
| } |
| |
| int kvm_get_gsi_count(kvm_context_t kvm) |
| { |
| #ifdef KVM_CAP_IRQ_ROUTING |
| return kvm_check_extension(kvm_state, KVM_CAP_IRQ_ROUTING); |
| #else |
| return -EINVAL; |
| #endif |
| } |
| |
| int kvm_clear_gsi_routes(kvm_context_t kvm) |
| { |
| #ifdef KVM_CAP_IRQ_ROUTING |
| kvm->irq_routes->nr = 0; |
| return 0; |
| #else |
| return -EINVAL; |
| #endif |
| } |
| |
| int kvm_add_routing_entry(kvm_context_t kvm, |
| struct kvm_irq_routing_entry* entry) |
| { |
| #ifdef KVM_CAP_IRQ_ROUTING |
| struct kvm_irq_routing *z; |
| struct kvm_irq_routing_entry *new; |
| int n, size; |
| |
| if (kvm->irq_routes->nr == kvm->nr_allocated_irq_routes) { |
| n = kvm->nr_allocated_irq_routes * 2; |
| if (n < 64) |
| n = 64; |
| size = sizeof(struct kvm_irq_routing); |
| size += n * sizeof(*new); |
| z = realloc(kvm->irq_routes, size); |
| if (!z) |
| return -ENOMEM; |
| kvm->nr_allocated_irq_routes = n; |
| kvm->irq_routes = z; |
| } |
| n = kvm->irq_routes->nr++; |
| new = &kvm->irq_routes->entries[n]; |
| memset(new, 0, sizeof(*new)); |
| new->gsi = entry->gsi; |
| new->type = entry->type; |
| new->flags = entry->flags; |
| new->u = entry->u; |
| |
| set_gsi(kvm, entry->gsi); |
| |
| return 0; |
| #else |
| return -ENOSYS; |
| #endif |
| } |
| |
| int kvm_add_irq_route(kvm_context_t kvm, int gsi, int irqchip, int pin) |
| { |
| #ifdef KVM_CAP_IRQ_ROUTING |
| struct kvm_irq_routing_entry e; |
| |
| e.gsi = gsi; |
| e.type = KVM_IRQ_ROUTING_IRQCHIP; |
| e.flags = 0; |
| e.u.irqchip.irqchip = irqchip; |
| e.u.irqchip.pin = pin; |
| return kvm_add_routing_entry(kvm, &e); |
| #else |
| return -ENOSYS; |
| #endif |
| } |
| |
| int kvm_del_routing_entry(kvm_context_t kvm, |
| struct kvm_irq_routing_entry* entry) |
| { |
| #ifdef KVM_CAP_IRQ_ROUTING |
| struct kvm_irq_routing_entry *e, *p; |
| int i, gsi, found = 0; |
| |
| gsi = entry->gsi; |
| |
| for (i = 0; i < kvm->irq_routes->nr; ++i) { |
| e = &kvm->irq_routes->entries[i]; |
| if (e->type == entry->type |
| && e->gsi == gsi) { |
| switch (e->type) |
| { |
| case KVM_IRQ_ROUTING_IRQCHIP: { |
| if (e->u.irqchip.irqchip == |
| entry->u.irqchip.irqchip |
| && e->u.irqchip.pin == |
| entry->u.irqchip.pin) { |
| p = &kvm->irq_routes-> |
| entries[--kvm->irq_routes->nr]; |
| *e = *p; |
| found = 1; |
| } |
| break; |
| } |
| case KVM_IRQ_ROUTING_MSI: { |
| if (e->u.msi.address_lo == |
| entry->u.msi.address_lo |
| && e->u.msi.address_hi == |
| entry->u.msi.address_hi |
| && e->u.msi.data == entry->u.msi.data) { |
| p = &kvm->irq_routes-> |
| entries[--kvm->irq_routes->nr]; |
| *e = *p; |
| found = 1; |
| } |
| break; |
| } |
| default: |
| break; |
| } |
| if (found) { |
| /* If there are no other users of this GSI |
| * mark it available in the bitmap */ |
| for (i = 0; i < kvm->irq_routes->nr; i++) { |
| e = &kvm->irq_routes->entries[i]; |
| if (e->gsi == gsi) |
| break; |
| } |
| if (i == kvm->irq_routes->nr) |
| clear_gsi(kvm, gsi); |
| |
| return 0; |
| } |
| } |
| } |
| return -ESRCH; |
| #else |
| return -ENOSYS; |
| #endif |
| } |
| |
| int kvm_update_routing_entry(kvm_context_t kvm, |
| struct kvm_irq_routing_entry* entry, |
| struct kvm_irq_routing_entry* newentry) |
| { |
| #ifdef KVM_CAP_IRQ_ROUTING |
| struct kvm_irq_routing_entry *e; |
| int i; |
| |
| if (entry->gsi != newentry->gsi || |
| entry->type != newentry->type) { |
| return -EINVAL; |
| } |
| |
| for (i = 0; i < kvm->irq_routes->nr; ++i) { |
| e = &kvm->irq_routes->entries[i]; |
| if (e->type != entry->type || e->gsi != entry->gsi) { |
| continue; |
| } |
| switch (e->type) { |
| case KVM_IRQ_ROUTING_IRQCHIP: |
| if (e->u.irqchip.irqchip == entry->u.irqchip.irqchip && |
| e->u.irqchip.pin == entry->u.irqchip.pin) { |
| memcpy(&e->u.irqchip, &newentry->u.irqchip, sizeof e->u.irqchip); |
| return 0; |
| } |
| break; |
| case KVM_IRQ_ROUTING_MSI: |
| if (e->u.msi.address_lo == entry->u.msi.address_lo && |
| e->u.msi.address_hi == entry->u.msi.address_hi && |
| e->u.msi.data == entry->u.msi.data) { |
| memcpy(&e->u.msi, &newentry->u.msi, sizeof e->u.msi); |
| return 0; |
| } |
| break; |
| default: |
| break; |
| } |
| } |
| return -ESRCH; |
| #else |
| return -ENOSYS; |
| #endif |
| } |
| |
| int kvm_del_irq_route(kvm_context_t kvm, int gsi, int irqchip, int pin) |
| { |
| #ifdef KVM_CAP_IRQ_ROUTING |
| struct kvm_irq_routing_entry e; |
| |
| e.gsi = gsi; |
| e.type = KVM_IRQ_ROUTING_IRQCHIP; |
| e.flags = 0; |
| e.u.irqchip.irqchip = irqchip; |
| e.u.irqchip.pin = pin; |
| return kvm_del_routing_entry(kvm, &e); |
| #else |
| return -ENOSYS; |
| #endif |
| } |
| |
| int kvm_commit_irq_routes(kvm_context_t kvm) |
| { |
| #ifdef KVM_CAP_IRQ_ROUTING |
| kvm->irq_routes->flags = 0; |
| return kvm_vm_ioctl(kvm_state, KVM_SET_GSI_ROUTING, kvm->irq_routes); |
| #else |
| return -ENOSYS; |
| #endif |
| } |
| |
| int kvm_get_irq_route_gsi(kvm_context_t kvm) |
| { |
| int i, bit; |
| uint32_t *buf = kvm->used_gsi_bitmap; |
| |
| /* Return the lowest unused GSI in the bitmap */ |
| for (i = 0; i < kvm->max_gsi / 32; i++) { |
| bit = ffs(~buf[i]); |
| if (!bit) |
| continue; |
| |
| return bit - 1 + i * 32; |
| } |
| |
| return -ENOSPC; |
| } |
| |
| #ifdef KVM_CAP_DEVICE_MSIX |
| int kvm_assign_set_msix_nr(kvm_context_t kvm, |
| struct kvm_assigned_msix_nr *msix_nr) |
| { |
| return kvm_vm_ioctl(kvm_state, KVM_ASSIGN_SET_MSIX_NR, msix_nr); |
| } |
| |
| int kvm_assign_set_msix_entry(kvm_context_t kvm, |
| struct kvm_assigned_msix_entry *entry) |
| { |
| return kvm_vm_ioctl(kvm_state, KVM_ASSIGN_SET_MSIX_ENTRY, entry); |
| } |
| #endif |
| |
| #if defined(KVM_CAP_IRQFD) && defined(CONFIG_eventfd) |
| |
| #include <sys/eventfd.h> |
| |
| static int _kvm_irqfd(kvm_context_t kvm, int fd, int gsi, int flags) |
| { |
| struct kvm_irqfd data = { |
| .fd = fd, |
| .gsi = gsi, |
| .flags = flags, |
| }; |
| |
| return kvm_vm_ioctl(kvm_state, KVM_IRQFD, &data); |
| } |
| |
| int kvm_irqfd(kvm_context_t kvm, int gsi, int flags) |
| { |
| int r; |
| int fd; |
| |
| if (!kvm_check_extension(kvm_state, KVM_CAP_IRQFD)) |
| return -ENOENT; |
| |
| fd = eventfd(0, 0); |
| if (fd < 0) |
| return -errno; |
| |
| r = _kvm_irqfd(kvm, fd, gsi, 0); |
| if (r < 0) { |
| close(fd); |
| return -errno; |
| } |
| |
| return fd; |
| } |
| |
| #else /* KVM_CAP_IRQFD */ |
| |
| int kvm_irqfd(kvm_context_t kvm, int gsi, int flags) |
| { |
| return -ENOSYS; |
| } |
| |
| #endif /* KVM_CAP_IRQFD */ |
| static inline unsigned long kvm_get_thread_id(void) |
| { |
| return syscall(SYS_gettid); |
| } |
| |
| static void qemu_cond_wait(pthread_cond_t *cond) |
| { |
| CPUState *env = cpu_single_env; |
| static const struct timespec ts = { |
| .tv_sec = 0, |
| .tv_nsec = 100000, |
| }; |
| |
| pthread_cond_timedwait(cond, &qemu_mutex, &ts); |
| cpu_single_env = env; |
| } |
| |
| static void sig_ipi_handler(int n) |
| { |
| } |
| |
| static void on_vcpu(CPUState *env, void (*func)(void *data), void *data) |
| { |
| struct qemu_work_item wi; |
| |
| if (env == current_env) { |
| func(data); |
| return; |
| } |
| |
| wi.func = func; |
| wi.data = data; |
| if (!env->kvm_cpu_state.queued_work_first) |
| env->kvm_cpu_state.queued_work_first = &wi; |
| else |
| env->kvm_cpu_state.queued_work_last->next = &wi; |
| env->kvm_cpu_state.queued_work_last = &wi; |
| wi.next = NULL; |
| wi.done = false; |
| |
| pthread_kill(env->kvm_cpu_state.thread, SIG_IPI); |
| while (!wi.done) |
| qemu_cond_wait(&qemu_work_cond); |
| } |
| |
| static void inject_interrupt(void *data) |
| { |
| cpu_interrupt(current_env, (long)data); |
| } |
| |
| void kvm_inject_interrupt(CPUState *env, int mask) |
| { |
| on_vcpu(env, inject_interrupt, (void *)(long)mask); |
| } |
| |
| void kvm_update_interrupt_request(CPUState *env) |
| { |
| int signal = 0; |
| |
| if (env) { |
| if (!current_env || !current_env->created) |
| signal = 1; |
| /* |
| * Testing for created here is really redundant |
| */ |
| if (current_env && current_env->created && |
| env != current_env && !env->kvm_cpu_state.signalled) |
| signal = 1; |
| |
| if (signal) { |
| env->kvm_cpu_state.signalled = 1; |
| if (env->kvm_cpu_state.thread) |
| pthread_kill(env->kvm_cpu_state.thread, SIG_IPI); |
| } |
| } |
| } |
| |
| static void kvm_do_load_registers(void *_env) |
| { |
| CPUState *env = _env; |
| |
| kvm_arch_load_regs(env); |
| } |
| |
| void kvm_load_registers(CPUState *env) |
| { |
| if (kvm_enabled() && qemu_system_ready) |
| on_vcpu(env, kvm_do_load_registers, env); |
| } |
| |
| static void kvm_do_save_registers(void *_env) |
| { |
| CPUState *env = _env; |
| |
| kvm_arch_save_regs(env); |
| } |
| |
| void kvm_save_registers(CPUState *env) |
| { |
| if (kvm_enabled()) |
| on_vcpu(env, kvm_do_save_registers, env); |
| } |
| |
| static void kvm_do_load_mpstate(void *_env) |
| { |
| CPUState *env = _env; |
| |
| kvm_arch_load_mpstate(env); |
| } |
| |
| void kvm_load_mpstate(CPUState *env) |
| { |
| if (kvm_enabled() && qemu_system_ready) |
| on_vcpu(env, kvm_do_load_mpstate, env); |
| } |
| |
| static void kvm_do_save_mpstate(void *_env) |
| { |
| CPUState *env = _env; |
| |
| kvm_arch_save_mpstate(env); |
| env->halted = (env->mp_state == KVM_MP_STATE_HALTED); |
| } |
| |
| void kvm_save_mpstate(CPUState *env) |
| { |
| if (kvm_enabled()) |
| on_vcpu(env, kvm_do_save_mpstate, env); |
| } |
| |
| int kvm_cpu_exec(CPUState *env) |
| { |
| int r; |
| |
| r = kvm_run(env->kvm_cpu_state.vcpu_ctx, env); |
| if (r < 0) { |
| printf("kvm_run returned %d\n", r); |
| vm_stop(0); |
| } |
| |
| return 0; |
| } |
| |
| static int is_cpu_stopped(CPUState *env) |
| { |
| return !vm_running || env->stopped; |
| } |
| |
| static void flush_queued_work(CPUState *env) |
| { |
| struct qemu_work_item *wi; |
| |
| if (!env->kvm_cpu_state.queued_work_first) |
| return; |
| |
| while ((wi = env->kvm_cpu_state.queued_work_first)) { |
| env->kvm_cpu_state.queued_work_first = wi->next; |
| wi->func(wi->data); |
| wi->done = true; |
| } |
| env->kvm_cpu_state.queued_work_last = NULL; |
| pthread_cond_broadcast(&qemu_work_cond); |
| } |
| |
| static void kvm_main_loop_wait(CPUState *env, int timeout) |
| { |
| struct timespec ts; |
| int r, e; |
| siginfo_t siginfo; |
| sigset_t waitset; |
| |
| pthread_mutex_unlock(&qemu_mutex); |
| |
| ts.tv_sec = timeout / 1000; |
| ts.tv_nsec = (timeout % 1000) * 1000000; |
| sigemptyset(&waitset); |
| sigaddset(&waitset, SIG_IPI); |
| |
| r = sigtimedwait(&waitset, &siginfo, &ts); |
| e = errno; |
| |
| pthread_mutex_lock(&qemu_mutex); |
| |
| if (r == -1 && !(e == EAGAIN || e == EINTR)) { |
| printf("sigtimedwait: %s\n", strerror(e)); |
| exit(1); |
| } |
| |
| cpu_single_env = env; |
| flush_queued_work(env); |
| |
| if (env->stop) { |
| env->stop = 0; |
| env->stopped = 1; |
| pthread_cond_signal(&qemu_pause_cond); |
| } |
| |
| env->kvm_cpu_state.signalled = 0; |
| } |
| |
| static int all_threads_paused(void) |
| { |
| CPUState *penv = first_cpu; |
| |
| while (penv) { |
| if (penv->stop) |
| return 0; |
| penv = (CPUState *)penv->next_cpu; |
| } |
| |
| return 1; |
| } |
| |
| static void pause_all_threads(void) |
| { |
| CPUState *penv = first_cpu; |
| |
| while (penv) { |
| if (penv != cpu_single_env) { |
| penv->stop = 1; |
| pthread_kill(penv->kvm_cpu_state.thread, SIG_IPI); |
| } else { |
| penv->stop = 0; |
| penv->stopped = 1; |
| cpu_exit(penv); |
| } |
| penv = (CPUState *)penv->next_cpu; |
| } |
| |
| while (!all_threads_paused()) |
| qemu_cond_wait(&qemu_pause_cond); |
| } |
| |
| static void resume_all_threads(void) |
| { |
| CPUState *penv = first_cpu; |
| |
| assert(!cpu_single_env); |
| |
| while (penv) { |
| penv->stop = 0; |
| penv->stopped = 0; |
| pthread_kill(penv->kvm_cpu_state.thread, SIG_IPI); |
| penv = (CPUState *)penv->next_cpu; |
| } |
| } |
| |
| static void kvm_vm_state_change_handler(void *context, int running, int reason) |
| { |
| if (running) |
| resume_all_threads(); |
| else |
| pause_all_threads(); |
| } |
| |
| static void setup_kernel_sigmask(CPUState *env) |
| { |
| sigset_t set; |
| |
| sigemptyset(&set); |
| sigaddset(&set, SIGUSR2); |
| sigaddset(&set, SIGIO); |
| sigaddset(&set, SIGALRM); |
| sigprocmask(SIG_BLOCK, &set, NULL); |
| |
| sigprocmask(SIG_BLOCK, NULL, &set); |
| sigdelset(&set, SIG_IPI); |
| |
| kvm_set_signal_mask(env->kvm_cpu_state.vcpu_ctx, &set); |
| } |
| |
| static void qemu_kvm_system_reset(void) |
| { |
| CPUState *penv = first_cpu; |
| |
| pause_all_threads(); |
| |
| qemu_system_reset(); |
| |
| while (penv) { |
| kvm_arch_cpu_reset(penv); |
| penv = (CPUState *)penv->next_cpu; |
| } |
| |
| resume_all_threads(); |
| } |
| |
| static void process_irqchip_events(CPUState *env) |
| { |
| kvm_arch_process_irqchip_events(env); |
| if (kvm_arch_has_work(env)) |
| env->halted = 0; |
| } |
| |
| static int kvm_main_loop_cpu(CPUState *env) |
| { |
| setup_kernel_sigmask(env); |
| |
| pthread_mutex_lock(&qemu_mutex); |
| |
| kvm_qemu_init_env(env); |
| #ifdef TARGET_I386 |
| kvm_tpr_vcpu_start(env); |
| #endif |
| |
| cpu_single_env = env; |
| kvm_arch_load_regs(env); |
| |
| while (1) { |
| int run_cpu = !is_cpu_stopped(env); |
| if (run_cpu && !kvm_irqchip_in_kernel(kvm_context)) { |
| process_irqchip_events(env); |
| run_cpu = !env->halted; |
| } |
| if (run_cpu) { |
| kvm_main_loop_wait(env, 0); |
| kvm_cpu_exec(env); |
| } else { |
| kvm_main_loop_wait(env, 1000); |
| } |
| } |
| pthread_mutex_unlock(&qemu_mutex); |
| return 0; |
| } |
| |
| static void *ap_main_loop(void *_env) |
| { |
| CPUState *env = _env; |
| sigset_t signals; |
| struct ioperm_data *data = NULL; |
| |
| current_env = env; |
| env->thread_id = kvm_get_thread_id(); |
| sigfillset(&signals); |
| sigprocmask(SIG_BLOCK, &signals, NULL); |
| env->kvm_cpu_state.vcpu_ctx = kvm_create_vcpu(env, env->cpu_index); |
| |
| #ifdef USE_KVM_DEVICE_ASSIGNMENT |
| /* do ioperm for io ports of assigned devices */ |
| LIST_FOREACH(data, &ioperm_head, entries) |
| on_vcpu(env, kvm_arch_do_ioperm, data); |
| #endif |
| |
| /* signal VCPU creation */ |
| pthread_mutex_lock(&qemu_mutex); |
| current_env->created = 1; |
| pthread_cond_signal(&qemu_vcpu_cond); |
| |
| /* and wait for machine initialization */ |
| while (!qemu_system_ready) |
| qemu_cond_wait(&qemu_system_cond); |
| pthread_mutex_unlock(&qemu_mutex); |
| |
| kvm_main_loop_cpu(env); |
| return NULL; |
| } |
| |
| void kvm_init_vcpu(CPUState *env) |
| { |
| pthread_create(&env->kvm_cpu_state.thread, NULL, ap_main_loop, env); |
| |
| while (env->created == 0) |
| qemu_cond_wait(&qemu_vcpu_cond); |
| } |
| |
| int kvm_vcpu_inited(CPUState *env) |
| { |
| return env->created; |
| } |
| |
| #ifdef TARGET_I386 |
| void kvm_hpet_disable_kpit(void) |
| { |
| struct kvm_pit_state2 ps2; |
| |
| kvm_get_pit2(kvm_context, &ps2); |
| ps2.flags |= KVM_PIT_FLAGS_HPET_LEGACY; |
| kvm_set_pit2(kvm_context, &ps2); |
| } |
| |
| void kvm_hpet_enable_kpit(void) |
| { |
| struct kvm_pit_state2 ps2; |
| |
| kvm_get_pit2(kvm_context, &ps2); |
| ps2.flags &= ~KVM_PIT_FLAGS_HPET_LEGACY; |
| kvm_set_pit2(kvm_context, &ps2); |
| } |
| #endif |
| |
| int kvm_init_ap(void) |
| { |
| #ifdef TARGET_I386 |
| kvm_tpr_opt_setup(); |
| #endif |
| qemu_add_vm_change_state_handler(kvm_vm_state_change_handler, NULL); |
| |
| signal(SIG_IPI, sig_ipi_handler); |
| return 0; |
| } |
| |
| void qemu_kvm_notify_work(void) |
| { |
| uint64_t value = 1; |
| char buffer[8]; |
| size_t offset = 0; |
| |
| if (io_thread_fd == -1) |
| return; |
| |
| memcpy(buffer, &value, sizeof(value)); |
| |
| while (offset < 8) { |
| ssize_t len; |
| |
| len = write(io_thread_fd, buffer + offset, 8 - offset); |
| if (len == -1 && errno == EINTR) |
| continue; |
| |
| /* In case we have a pipe, there is not reason to insist writing |
| * 8 bytes |
| */ |
| if (len == -1 && errno == EAGAIN) |
| break; |
| |
| if (len <= 0) |
| break; |
| |
| offset += len; |
| } |
| } |
| |
| /* If we have signalfd, we mask out the signals we want to handle and then |
| * use signalfd to listen for them. We rely on whatever the current signal |
| * handler is to dispatch the signals when we receive them. |
| */ |
| |
| static void sigfd_handler(void *opaque) |
| { |
| int fd = (unsigned long)opaque; |
| struct qemu_signalfd_siginfo info; |
| struct sigaction action; |
| ssize_t len; |
| |
| while (1) { |
| do { |
| len = read(fd, &info, sizeof(info)); |
| } while (len == -1 && errno == EINTR); |
| |
| if (len == -1 && errno == EAGAIN) |
| break; |
| |
| if (len != sizeof(info)) { |
| printf("read from sigfd returned %zd: %m\n", len); |
| return; |
| } |
| |
| sigaction(info.ssi_signo, NULL, &action); |
| if (action.sa_handler) |
| action.sa_handler(info.ssi_signo); |
| |
| } |
| } |
| |
| /* Used to break IO thread out of select */ |
| static void io_thread_wakeup(void *opaque) |
| { |
| int fd = (unsigned long)opaque; |
| char buffer[4096]; |
| |
| /* Drain the pipe/(eventfd) */ |
| while (1) { |
| ssize_t len; |
| |
| len = read(fd, buffer, sizeof(buffer)); |
| if (len == -1 && errno == EINTR) |
| continue; |
| |
| if (len <= 0) |
| break; |
| } |
| } |
| |
| int kvm_main_loop(void) |
| { |
| int fds[2]; |
| sigset_t mask; |
| int sigfd; |
| |
| io_thread = pthread_self(); |
| qemu_system_ready = 1; |
| |
| if (qemu_eventfd(fds) == -1) { |
| fprintf(stderr, "failed to create eventfd\n"); |
| return -errno; |
| } |
| |
| fcntl(fds[0], F_SETFL, O_NONBLOCK); |
| fcntl(fds[1], F_SETFL, O_NONBLOCK); |
| |
| qemu_set_fd_handler2(fds[0], NULL, io_thread_wakeup, NULL, |
| (void *)(unsigned long)fds[0]); |
| |
| io_thread_fd = fds[1]; |
| |
| sigemptyset(&mask); |
| sigaddset(&mask, SIGIO); |
| sigaddset(&mask, SIGALRM); |
| sigprocmask(SIG_BLOCK, &mask, NULL); |
| |
| sigfd = qemu_signalfd(&mask); |
| if (sigfd == -1) { |
| fprintf(stderr, "failed to create signalfd\n"); |
| return -errno; |
| } |
| |
| fcntl(sigfd, F_SETFL, O_NONBLOCK); |
| |
| qemu_set_fd_handler2(sigfd, NULL, sigfd_handler, NULL, |
| (void *)(unsigned long)sigfd); |
| |
| pthread_cond_broadcast(&qemu_system_cond); |
| |
| io_thread_sigfd = sigfd; |
| cpu_single_env = NULL; |
| |
| while (1) { |
| main_loop_wait(1000); |
| if (qemu_shutdown_requested()) { |
| if (qemu_no_shutdown()) { |
| vm_stop(0); |
| } else |
| break; |
| } else if (qemu_powerdown_requested()) |
| qemu_system_powerdown(); |
| else if (qemu_reset_requested()) |
| qemu_kvm_system_reset(); |
| else if (kvm_debug_cpu_requested) { |
| gdb_set_stop_cpu(kvm_debug_cpu_requested); |
| vm_stop(EXCP_DEBUG); |
| kvm_debug_cpu_requested = NULL; |
| } |
| } |
| |
| pause_all_threads(); |
| pthread_mutex_unlock(&qemu_mutex); |
| |
| return 0; |
| } |
| |
| #ifdef TARGET_I386 |
| static int destroy_region_works = 0; |
| #endif |
| |
| |
| #if !defined(TARGET_I386) |
| int kvm_arch_init_irq_routing(void) |
| { |
| return 0; |
| } |
| #endif |
| |
| extern int no_hpet; |
| |
| static int kvm_create_context() |
| { |
| int r; |
| |
| if (!kvm_irqchip) { |
| kvm_disable_irqchip_creation(kvm_context); |
| } |
| if (!kvm_pit) { |
| kvm_disable_pit_creation(kvm_context); |
| } |
| if (kvm_create(kvm_context, 0, NULL) < 0) { |
| kvm_finalize(kvm_state); |
| return -1; |
| } |
| r = kvm_arch_qemu_create_context(); |
| if(r <0) |
| kvm_finalize(kvm_state); |
| if (kvm_pit && !kvm_pit_reinject) { |
| if (kvm_reinject_control(kvm_context, 0)) { |
| fprintf(stderr, "failure to disable in-kernel PIT reinjection\n"); |
| return -1; |
| } |
| } |
| #ifdef TARGET_I386 |
| destroy_region_works = kvm_destroy_memory_region_works(kvm_context); |
| #endif |
| |
| r = kvm_arch_init_irq_routing(); |
| if (r < 0) { |
| return r; |
| } |
| |
| kvm_init_ap(); |
| if (kvm_irqchip) { |
| if (!qemu_kvm_has_gsi_routing()) { |
| irq0override = 0; |
| #ifdef TARGET_I386 |
| /* if kernel can't do irq routing, interrupt source |
| * override 0->2 can not be set up as required by hpet, |
| * so disable hpet. |
| */ |
| no_hpet=1; |
| } else if (!qemu_kvm_has_pit_state2()) { |
| no_hpet=1; |
| } |
| #else |
| } |
| #endif |
| } |
| |
| return 0; |
| } |
| |
| #ifdef TARGET_I386 |
| static int must_use_aliases_source(target_phys_addr_t addr) |
| { |
| if (destroy_region_works) |
| return false; |
| if (addr == 0xa0000 || addr == 0xa8000) |
| return true; |
| return false; |
| } |
| |
| static int must_use_aliases_target(target_phys_addr_t addr) |
| { |
| if (destroy_region_works) |
| return false; |
| if (addr >= 0xe0000000 && addr < 0x100000000ull) |
| return true; |
| return false; |
| } |
| |
| static struct mapping { |
| target_phys_addr_t phys; |
| ram_addr_t ram; |
| ram_addr_t len; |
| } mappings[50]; |
| static int nr_mappings; |
| |
| static struct mapping *find_ram_mapping(ram_addr_t ram_addr) |
| { |
| struct mapping *p; |
| |
| for (p = mappings; p < mappings + nr_mappings; ++p) { |
| if (p->ram <= ram_addr && ram_addr < p->ram + p->len) { |
| return p; |
| } |
| } |
| return NULL; |
| } |
| |
| static struct mapping *find_mapping(target_phys_addr_t start_addr) |
| { |
| struct mapping *p; |
| |
| for (p = mappings; p < mappings + nr_mappings; ++p) { |
| if (p->phys <= start_addr && start_addr < p->phys + p->len) { |
| return p; |
| } |
| } |
| return NULL; |
| } |
| |
| static void drop_mapping(target_phys_addr_t start_addr) |
| { |
| struct mapping *p = find_mapping(start_addr); |
| |
| if (p) |
| *p = mappings[--nr_mappings]; |
| } |
| #endif |
| |
| void kvm_set_phys_mem(target_phys_addr_t start_addr, ram_addr_t size, |
| ram_addr_t phys_offset) |
| { |
| int r = 0; |
| unsigned long area_flags; |
| #ifdef TARGET_I386 |
| struct mapping *p; |
| #endif |
| |
| if (start_addr + size > phys_ram_size) { |
| phys_ram_size = start_addr + size; |
| } |
| |
| phys_offset &= ~IO_MEM_ROM; |
| area_flags = phys_offset & ~TARGET_PAGE_MASK; |
| |
| if (area_flags != IO_MEM_RAM) { |
| #ifdef TARGET_I386 |
| if (must_use_aliases_source(start_addr)) { |
| kvm_destroy_memory_alias(kvm_context, start_addr); |
| return; |
| } |
| if (must_use_aliases_target(start_addr)) |
| return; |
| #endif |
| while (size > 0) { |
| p = find_mapping(start_addr); |
| if (p) { |
| kvm_unregister_memory_area(kvm_context, p->phys, p->len); |
| drop_mapping(p->phys); |
| } |
| start_addr += TARGET_PAGE_SIZE; |
| if (size > TARGET_PAGE_SIZE) { |
| size -= TARGET_PAGE_SIZE; |
| } else { |
| size = 0; |
| } |
| } |
| return; |
| } |
| |
| r = kvm_is_containing_region(kvm_context, start_addr, size); |
| if (r) |
| return; |
| |
| if (area_flags >= TLB_MMIO) |
| return; |
| |
| #ifdef TARGET_I386 |
| if (must_use_aliases_source(start_addr)) { |
| p = find_ram_mapping(phys_offset); |
| if (p) { |
| kvm_create_memory_alias(kvm_context, start_addr, size, |
| p->phys + (phys_offset - p->ram)); |
| } |
| return; |
| } |
| #endif |
| |
| r = kvm_register_phys_mem(kvm_context, start_addr, |
| qemu_get_ram_ptr(phys_offset), |
| size, 0); |
| if (r < 0) { |
| printf("kvm_cpu_register_physical_memory: failed\n"); |
| exit(1); |
| } |
| |
| #ifdef TARGET_I386 |
| drop_mapping(start_addr); |
| p = &mappings[nr_mappings++]; |
| p->phys = start_addr; |
| p->ram = phys_offset; |
| p->len = size; |
| #endif |
| |
| return; |
| } |
| |
| int kvm_setup_guest_memory(void *area, unsigned long size) |
| { |
| int ret = 0; |
| |
| #ifdef MADV_DONTFORK |
| if (kvm_enabled() && !kvm_has_sync_mmu()) |
| ret = madvise(area, size, MADV_DONTFORK); |
| #endif |
| |
| if (ret) |
| perror ("madvise"); |
| |
| return ret; |
| } |
| |
| int kvm_qemu_check_extension(int ext) |
| { |
| return kvm_check_extension(kvm_state, ext); |
| } |
| |
| int kvm_qemu_init_env(CPUState *cenv) |
| { |
| return kvm_arch_qemu_init_env(cenv); |
| } |
| |
| #ifdef KVM_CAP_SET_GUEST_DEBUG |
| |
| struct kvm_set_guest_debug_data { |
| struct kvm_guest_debug dbg; |
| int err; |
| }; |
| |
| static void kvm_invoke_set_guest_debug(void *data) |
| { |
| struct kvm_set_guest_debug_data *dbg_data = data; |
| |
| dbg_data->err = kvm_set_guest_debug(cpu_single_env->kvm_cpu_state.vcpu_ctx, |
| &dbg_data->dbg); |
| } |
| |
| int kvm_update_guest_debug(CPUState *env, unsigned long reinject_trap) |
| { |
| struct kvm_set_guest_debug_data data; |
| |
| data.dbg.control = 0; |
| if (env->singlestep_enabled) |
| data.dbg.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP; |
| |
| kvm_arch_update_guest_debug(env, &data.dbg); |
| data.dbg.control |= reinject_trap; |
| |
| on_vcpu(env, kvm_invoke_set_guest_debug, &data); |
| return data.err; |
| } |
| |
| #endif |
| |
| /* |
| * dirty pages logging |
| */ |
| /* FIXME: use unsigned long pointer instead of unsigned char */ |
| unsigned char *kvm_dirty_bitmap = NULL; |
| int kvm_physical_memory_set_dirty_tracking(int enable) |
| { |
| int r = 0; |
| |
| if (!kvm_enabled()) |
| return 0; |
| |
| if (enable) { |
| if (!kvm_dirty_bitmap) { |
| unsigned bitmap_size = BITMAP_SIZE(phys_ram_size); |
| kvm_dirty_bitmap = qemu_malloc(bitmap_size); |
| if (kvm_dirty_bitmap == NULL) { |
| perror("Failed to allocate dirty pages bitmap"); |
| r=-1; |
| } |
| else { |
| r = kvm_dirty_pages_log_enable_all(kvm_context); |
| } |
| } |
| } |
| else { |
| if (kvm_dirty_bitmap) { |
| r = kvm_dirty_pages_log_reset(kvm_context); |
| qemu_free(kvm_dirty_bitmap); |
| kvm_dirty_bitmap = NULL; |
| } |
| } |
| return r; |
| } |
| |
| /* get kvm's dirty pages bitmap and update qemu's */ |
| static int kvm_get_dirty_pages_log_range(unsigned long start_addr, |
| unsigned char *bitmap, |
| unsigned long offset, |
| unsigned long mem_size) |
| { |
| unsigned int i, j, n=0; |
| unsigned char c; |
| unsigned long page_number, addr, addr1; |
| ram_addr_t ram_addr; |
| unsigned int len = ((mem_size/TARGET_PAGE_SIZE) + 7) / 8; |
| |
| /* |
| * bitmap-traveling is faster than memory-traveling (for addr...) |
| * especially when most of the memory is not dirty. |
| */ |
| for (i=0; i<len; i++) { |
| c = bitmap[i]; |
| while (c>0) { |
| j = ffsl(c) - 1; |
| c &= ~(1u<<j); |
| page_number = i * 8 + j; |
| addr1 = page_number * TARGET_PAGE_SIZE; |
| addr = offset + addr1; |
| ram_addr = cpu_get_physical_page_desc(addr); |
| cpu_physical_memory_set_dirty(ram_addr); |
| n++; |
| } |
| } |
| return 0; |
| } |
| static int kvm_get_dirty_bitmap_cb(unsigned long start, unsigned long len, |
| void *bitmap, void *opaque) |
| { |
| return kvm_get_dirty_pages_log_range(start, bitmap, start, len); |
| } |
| |
| /* |
| * get kvm's dirty pages bitmap and update qemu's |
| * we only care about physical ram, which resides in slots 0 and 3 |
| */ |
| int kvm_update_dirty_pages_log(void) |
| { |
| int r = 0; |
| |
| |
| r = kvm_get_dirty_pages_range(kvm_context, 0, -1UL, |
| NULL, |
| kvm_get_dirty_bitmap_cb); |
| return r; |
| } |
| |
| void kvm_qemu_log_memory(target_phys_addr_t start, target_phys_addr_t size, |
| int log) |
| { |
| if (log) |
| kvm_dirty_pages_log_enable_slot(kvm_context, start, size); |
| else { |
| #ifdef TARGET_I386 |
| if (must_use_aliases_target(start)) |
| return; |
| #endif |
| kvm_dirty_pages_log_disable_slot(kvm_context, start, size); |
| } |
| } |
| |
| int kvm_get_phys_ram_page_bitmap(unsigned char *bitmap) |
| { |
| unsigned int bsize = BITMAP_SIZE(phys_ram_size); |
| unsigned int brsize = BITMAP_SIZE(ram_size); |
| unsigned int extra_pages = (phys_ram_size - ram_size) / TARGET_PAGE_SIZE; |
| unsigned int extra_bytes = (extra_pages +7)/8; |
| unsigned int hole_start = BITMAP_SIZE(0xa0000); |
| unsigned int hole_end = BITMAP_SIZE(0xc0000); |
| |
| memset(bitmap, 0xFF, brsize + extra_bytes); |
| memset(bitmap + hole_start, 0, hole_end - hole_start); |
| memset(bitmap + brsize + extra_bytes, 0, bsize - brsize - extra_bytes); |
| |
| return 0; |
| } |
| |
| #ifdef KVM_CAP_IRQCHIP |
| |
| int kvm_set_irq(int irq, int level, int *status) |
| { |
| return kvm_set_irq_level(kvm_context, irq, level, status); |
| } |
| |
| #endif |
| |
| int qemu_kvm_get_dirty_pages(unsigned long phys_addr, void *buf) |
| { |
| return kvm_get_dirty_pages(kvm_context, phys_addr, buf); |
| } |
| |
| void kvm_mutex_unlock(void) |
| { |
| assert(!cpu_single_env); |
| pthread_mutex_unlock(&qemu_mutex); |
| } |
| |
| void kvm_mutex_lock(void) |
| { |
| pthread_mutex_lock(&qemu_mutex); |
| cpu_single_env = NULL; |
| } |
| |
| #ifdef USE_KVM_DEVICE_ASSIGNMENT |
| void kvm_add_ioperm_data(struct ioperm_data *data) |
| { |
| LIST_INSERT_HEAD(&ioperm_head, data, entries); |
| } |
| |
| void kvm_remove_ioperm_data(unsigned long start_port, unsigned long num) |
| { |
| struct ioperm_data *data; |
| |
| data = LIST_FIRST(&ioperm_head); |
| while (data) { |
| struct ioperm_data *next = LIST_NEXT(data, entries); |
| |
| if (data->start_port == start_port && data->num == num) { |
| LIST_REMOVE(data, entries); |
| qemu_free(data); |
| } |
| |
| data = next; |
| } |
| } |
| |
| void kvm_ioperm(CPUState *env, void *data) |
| { |
| if (kvm_enabled() && qemu_system_ready) |
| on_vcpu(env, kvm_arch_do_ioperm, data); |
| } |
| |
| #endif |
| |
| int kvm_physical_sync_dirty_bitmap(target_phys_addr_t start_addr, target_phys_addr_t end_addr) |
| { |
| #ifndef TARGET_IA64 |
| |
| #ifdef TARGET_I386 |
| if (must_use_aliases_source(start_addr)) |
| return 0; |
| #endif |
| |
| kvm_get_dirty_pages_range(kvm_context, start_addr, end_addr - start_addr, |
| NULL, kvm_get_dirty_bitmap_cb); |
| #endif |
| return 0; |
| } |
| |
| int kvm_log_start(target_phys_addr_t phys_addr, target_phys_addr_t len) |
| { |
| #ifdef TARGET_I386 |
| if (must_use_aliases_source(phys_addr)) |
| return 0; |
| #endif |
| |
| #ifndef TARGET_IA64 |
| kvm_qemu_log_memory(phys_addr, len, 1); |
| #endif |
| return 0; |
| } |
| |
| int kvm_log_stop(target_phys_addr_t phys_addr, target_phys_addr_t len) |
| { |
| #ifdef TARGET_I386 |
| if (must_use_aliases_source(phys_addr)) |
| return 0; |
| #endif |
| |
| #ifndef TARGET_IA64 |
| kvm_qemu_log_memory(phys_addr, len, 0); |
| #endif |
| return 0; |
| } |
| |
| int kvm_set_boot_cpu_id(uint32_t id) |
| { |
| return kvm_set_boot_vcpu_id(kvm_context, id); |
| } |
| |
| #ifdef TARGET_I386 |
| #ifdef KVM_CAP_MCE |
| struct kvm_x86_mce_data |
| { |
| CPUState *env; |
| struct kvm_x86_mce *mce; |
| }; |
| |
| static void kvm_do_inject_x86_mce(void *_data) |
| { |
| struct kvm_x86_mce_data *data = _data; |
| int r; |
| |
| r = kvm_set_mce(data->env->kvm_cpu_state.vcpu_ctx, data->mce); |
| if (r < 0) |
| perror("kvm_set_mce FAILED"); |
| } |
| #endif |
| |
| void kvm_inject_x86_mce(CPUState *cenv, int bank, uint64_t status, |
| uint64_t mcg_status, uint64_t addr, uint64_t misc) |
| { |
| #ifdef KVM_CAP_MCE |
| struct kvm_x86_mce mce = { |
| .bank = bank, |
| .status = status, |
| .mcg_status = mcg_status, |
| .addr = addr, |
| .misc = misc, |
| }; |
| struct kvm_x86_mce_data data = { |
| .env = cenv, |
| .mce = &mce, |
| }; |
| |
| on_vcpu(cenv, kvm_do_inject_x86_mce, &data); |
| #endif |
| } |
| #endif |