| // SPDX-License-Identifier: GPL-2.0 |
| /* |
| * Copyright (C) 2020 - Google LLC |
| * Author: Quentin Perret <qperret@google.com> |
| */ |
| |
| #include <linux/init.h> |
| #include <linux/kmemleak.h> |
| #include <linux/kvm_host.h> |
| #include <asm/kvm_mmu.h> |
| #include <linux/memblock.h> |
| #include <linux/mutex.h> |
| #include <linux/sort.h> |
| |
| #include <asm/kvm_pkvm.h> |
| |
| #include "hyp_constants.h" |
| |
| DEFINE_STATIC_KEY_FALSE(kvm_protected_mode_initialized); |
| |
| static struct memblock_region *hyp_memory = kvm_nvhe_sym(hyp_memory); |
| static unsigned int *hyp_memblock_nr_ptr = &kvm_nvhe_sym(hyp_memblock_nr); |
| |
| phys_addr_t hyp_mem_base; |
| phys_addr_t hyp_mem_size; |
| |
| static int cmp_hyp_memblock(const void *p1, const void *p2) |
| { |
| const struct memblock_region *r1 = p1; |
| const struct memblock_region *r2 = p2; |
| |
| return r1->base < r2->base ? -1 : (r1->base > r2->base); |
| } |
| |
| static void __init sort_memblock_regions(void) |
| { |
| sort(hyp_memory, |
| *hyp_memblock_nr_ptr, |
| sizeof(struct memblock_region), |
| cmp_hyp_memblock, |
| NULL); |
| } |
| |
| static int __init register_memblock_regions(void) |
| { |
| struct memblock_region *reg; |
| |
| for_each_mem_region(reg) { |
| if (*hyp_memblock_nr_ptr >= HYP_MEMBLOCK_REGIONS) |
| return -ENOMEM; |
| |
| hyp_memory[*hyp_memblock_nr_ptr] = *reg; |
| (*hyp_memblock_nr_ptr)++; |
| } |
| sort_memblock_regions(); |
| |
| return 0; |
| } |
| |
| void __init kvm_hyp_reserve(void) |
| { |
| u64 hyp_mem_pages = 0; |
| int ret; |
| |
| if (!is_hyp_mode_available() || is_kernel_in_hyp_mode()) |
| return; |
| |
| if (kvm_get_mode() != KVM_MODE_PROTECTED) |
| return; |
| |
| ret = register_memblock_regions(); |
| if (ret) { |
| *hyp_memblock_nr_ptr = 0; |
| kvm_err("Failed to register hyp memblocks: %d\n", ret); |
| return; |
| } |
| |
| hyp_mem_pages += hyp_s1_pgtable_pages(); |
| hyp_mem_pages += host_s2_pgtable_pages(); |
| hyp_mem_pages += hyp_vm_table_pages(); |
| hyp_mem_pages += hyp_vmemmap_pages(STRUCT_HYP_PAGE_SIZE); |
| hyp_mem_pages += hyp_ffa_proxy_pages(); |
| |
| /* |
| * Try to allocate a PMD-aligned region to reduce TLB pressure once |
| * this is unmapped from the host stage-2, and fallback to PAGE_SIZE. |
| */ |
| hyp_mem_size = hyp_mem_pages << PAGE_SHIFT; |
| hyp_mem_base = memblock_phys_alloc(ALIGN(hyp_mem_size, PMD_SIZE), |
| PMD_SIZE); |
| if (!hyp_mem_base) |
| hyp_mem_base = memblock_phys_alloc(hyp_mem_size, PAGE_SIZE); |
| else |
| hyp_mem_size = ALIGN(hyp_mem_size, PMD_SIZE); |
| |
| if (!hyp_mem_base) { |
| kvm_err("Failed to reserve hyp memory\n"); |
| return; |
| } |
| |
| kvm_info("Reserved %lld MiB at 0x%llx\n", hyp_mem_size >> 20, |
| hyp_mem_base); |
| } |
| |
| static void __pkvm_destroy_hyp_vm(struct kvm *host_kvm) |
| { |
| if (host_kvm->arch.pkvm.handle) { |
| WARN_ON(kvm_call_hyp_nvhe(__pkvm_teardown_vm, |
| host_kvm->arch.pkvm.handle)); |
| } |
| |
| host_kvm->arch.pkvm.handle = 0; |
| free_hyp_memcache(&host_kvm->arch.pkvm.teardown_mc); |
| free_hyp_memcache(&host_kvm->arch.pkvm.stage2_teardown_mc); |
| } |
| |
| static int __pkvm_create_hyp_vcpu(struct kvm_vcpu *vcpu) |
| { |
| size_t hyp_vcpu_sz = PAGE_ALIGN(PKVM_HYP_VCPU_SIZE); |
| pkvm_handle_t handle = vcpu->kvm->arch.pkvm.handle; |
| void *hyp_vcpu; |
| int ret; |
| |
| vcpu->arch.pkvm_memcache.flags |= HYP_MEMCACHE_ACCOUNT_STAGE2; |
| |
| hyp_vcpu = alloc_pages_exact(hyp_vcpu_sz, GFP_KERNEL_ACCOUNT); |
| if (!hyp_vcpu) |
| return -ENOMEM; |
| |
| ret = kvm_call_hyp_nvhe(__pkvm_init_vcpu, handle, vcpu, hyp_vcpu); |
| if (!ret) |
| vcpu_set_flag(vcpu, VCPU_PKVM_FINALIZED); |
| else |
| free_pages_exact(hyp_vcpu, hyp_vcpu_sz); |
| |
| return ret; |
| } |
| |
| /* |
| * Allocates and donates memory for hypervisor VM structs at EL2. |
| * |
| * Allocates space for the VM state, which includes the hyp vm as well as |
| * the hyp vcpus. |
| * |
| * Stores an opaque handler in the kvm struct for future reference. |
| * |
| * Return 0 on success, negative error code on failure. |
| */ |
| static int __pkvm_create_hyp_vm(struct kvm *host_kvm) |
| { |
| size_t pgd_sz, hyp_vm_sz; |
| void *pgd, *hyp_vm; |
| int ret; |
| |
| if (host_kvm->created_vcpus < 1) |
| return -EINVAL; |
| |
| pgd_sz = kvm_pgtable_stage2_pgd_size(host_kvm->arch.mmu.vtcr); |
| |
| /* |
| * The PGD pages will be reclaimed using a hyp_memcache which implies |
| * page granularity. So, use alloc_pages_exact() to get individual |
| * refcounts. |
| */ |
| pgd = alloc_pages_exact(pgd_sz, GFP_KERNEL_ACCOUNT); |
| if (!pgd) |
| return -ENOMEM; |
| |
| /* Allocate memory to donate to hyp for vm and vcpu pointers. */ |
| hyp_vm_sz = PAGE_ALIGN(size_add(PKVM_HYP_VM_SIZE, |
| size_mul(sizeof(void *), |
| host_kvm->created_vcpus))); |
| hyp_vm = alloc_pages_exact(hyp_vm_sz, GFP_KERNEL_ACCOUNT); |
| if (!hyp_vm) { |
| ret = -ENOMEM; |
| goto free_pgd; |
| } |
| |
| /* Donate the VM memory to hyp and let hyp initialize it. */ |
| ret = kvm_call_hyp_nvhe(__pkvm_init_vm, host_kvm, hyp_vm, pgd); |
| if (ret < 0) |
| goto free_vm; |
| |
| host_kvm->arch.pkvm.handle = ret; |
| host_kvm->arch.pkvm.stage2_teardown_mc.flags |= HYP_MEMCACHE_ACCOUNT_STAGE2; |
| kvm_account_pgtable_pages(pgd, pgd_sz / PAGE_SIZE); |
| |
| return 0; |
| free_vm: |
| free_pages_exact(hyp_vm, hyp_vm_sz); |
| free_pgd: |
| free_pages_exact(pgd, pgd_sz); |
| return ret; |
| } |
| |
| int pkvm_create_hyp_vm(struct kvm *host_kvm) |
| { |
| int ret = 0; |
| |
| mutex_lock(&host_kvm->arch.config_lock); |
| if (!host_kvm->arch.pkvm.handle) |
| ret = __pkvm_create_hyp_vm(host_kvm); |
| mutex_unlock(&host_kvm->arch.config_lock); |
| |
| return ret; |
| } |
| |
| int pkvm_create_hyp_vcpu(struct kvm_vcpu *vcpu) |
| { |
| int ret = 0; |
| |
| mutex_lock(&vcpu->kvm->arch.config_lock); |
| if (!vcpu_get_flag(vcpu, VCPU_PKVM_FINALIZED)) |
| ret = __pkvm_create_hyp_vcpu(vcpu); |
| mutex_unlock(&vcpu->kvm->arch.config_lock); |
| |
| return ret; |
| } |
| |
| void pkvm_destroy_hyp_vm(struct kvm *host_kvm) |
| { |
| mutex_lock(&host_kvm->arch.config_lock); |
| __pkvm_destroy_hyp_vm(host_kvm); |
| mutex_unlock(&host_kvm->arch.config_lock); |
| } |
| |
| int pkvm_init_host_vm(struct kvm *host_kvm) |
| { |
| return 0; |
| } |
| |
| static void __init _kvm_host_prot_finalize(void *arg) |
| { |
| int *err = arg; |
| |
| if (WARN_ON(kvm_call_hyp_nvhe(__pkvm_prot_finalize))) |
| WRITE_ONCE(*err, -EINVAL); |
| } |
| |
| static int __init pkvm_drop_host_privileges(void) |
| { |
| int ret = 0; |
| |
| /* |
| * Flip the static key upfront as that may no longer be possible |
| * once the host stage 2 is installed. |
| */ |
| static_branch_enable(&kvm_protected_mode_initialized); |
| on_each_cpu(_kvm_host_prot_finalize, &ret, 1); |
| return ret; |
| } |
| |
| static int __init finalize_pkvm(void) |
| { |
| int ret; |
| |
| if (!is_protected_kvm_enabled() || !is_kvm_arm_initialised()) |
| return 0; |
| |
| /* |
| * Exclude HYP sections from kmemleak so that they don't get peeked |
| * at, which would end badly once inaccessible. |
| */ |
| kmemleak_free_part(__hyp_bss_start, __hyp_bss_end - __hyp_bss_start); |
| kmemleak_free_part(__hyp_rodata_start, __hyp_rodata_end - __hyp_rodata_start); |
| kmemleak_free_part_phys(hyp_mem_base, hyp_mem_size); |
| |
| ret = pkvm_drop_host_privileges(); |
| if (ret) |
| pr_err("Failed to finalize Hyp protection: %d\n", ret); |
| |
| return ret; |
| } |
| device_initcall_sync(finalize_pkvm); |
| |
| static int cmp_mappings(struct rb_node *node, const struct rb_node *parent) |
| { |
| struct pkvm_mapping *a = rb_entry(node, struct pkvm_mapping, node); |
| struct pkvm_mapping *b = rb_entry(parent, struct pkvm_mapping, node); |
| |
| if (a->gfn < b->gfn) |
| return -1; |
| if (a->gfn > b->gfn) |
| return 1; |
| return 0; |
| } |
| |
| static struct rb_node *find_first_mapping_node(struct rb_root *root, u64 gfn) |
| { |
| struct rb_node *node = root->rb_node, *prev = NULL; |
| struct pkvm_mapping *mapping; |
| |
| while (node) { |
| mapping = rb_entry(node, struct pkvm_mapping, node); |
| if (mapping->gfn == gfn) |
| return node; |
| prev = node; |
| node = (gfn < mapping->gfn) ? node->rb_left : node->rb_right; |
| } |
| |
| return prev; |
| } |
| |
| /* |
| * __tmp is updated to rb_next(__tmp) *before* entering the body of the loop to allow freeing |
| * of __map inline. |
| */ |
| #define for_each_mapping_in_range_safe(__pgt, __start, __end, __map) \ |
| for (struct rb_node *__tmp = find_first_mapping_node(&(__pgt)->pkvm_mappings, \ |
| ((__start) >> PAGE_SHIFT)); \ |
| __tmp && ({ \ |
| __map = rb_entry(__tmp, struct pkvm_mapping, node); \ |
| __tmp = rb_next(__tmp); \ |
| true; \ |
| }); \ |
| ) \ |
| if (__map->gfn < ((__start) >> PAGE_SHIFT)) \ |
| continue; \ |
| else if (__map->gfn >= ((__end) >> PAGE_SHIFT)) \ |
| break; \ |
| else |
| |
| int pkvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu, |
| struct kvm_pgtable_mm_ops *mm_ops) |
| { |
| pgt->pkvm_mappings = RB_ROOT; |
| pgt->mmu = mmu; |
| |
| return 0; |
| } |
| |
| void pkvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt) |
| { |
| struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu); |
| pkvm_handle_t handle = kvm->arch.pkvm.handle; |
| struct pkvm_mapping *mapping; |
| struct rb_node *node; |
| |
| if (!handle) |
| return; |
| |
| node = rb_first(&pgt->pkvm_mappings); |
| while (node) { |
| mapping = rb_entry(node, struct pkvm_mapping, node); |
| kvm_call_hyp_nvhe(__pkvm_host_unshare_guest, handle, mapping->gfn); |
| node = rb_next(node); |
| rb_erase(&mapping->node, &pgt->pkvm_mappings); |
| kfree(mapping); |
| } |
| } |
| |
| int pkvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size, |
| u64 phys, enum kvm_pgtable_prot prot, |
| void *mc, enum kvm_pgtable_walk_flags flags) |
| { |
| struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu); |
| struct pkvm_mapping *mapping = NULL; |
| struct kvm_hyp_memcache *cache = mc; |
| u64 gfn = addr >> PAGE_SHIFT; |
| u64 pfn = phys >> PAGE_SHIFT; |
| int ret; |
| |
| if (size != PAGE_SIZE) |
| return -EINVAL; |
| |
| lockdep_assert_held_write(&kvm->mmu_lock); |
| ret = kvm_call_hyp_nvhe(__pkvm_host_share_guest, pfn, gfn, prot); |
| if (ret) { |
| /* Is the gfn already mapped due to a racing vCPU? */ |
| if (ret == -EPERM) |
| return -EAGAIN; |
| } |
| |
| swap(mapping, cache->mapping); |
| mapping->gfn = gfn; |
| mapping->pfn = pfn; |
| WARN_ON(rb_find_add(&mapping->node, &pgt->pkvm_mappings, cmp_mappings)); |
| |
| return ret; |
| } |
| |
| int pkvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size) |
| { |
| struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu); |
| pkvm_handle_t handle = kvm->arch.pkvm.handle; |
| struct pkvm_mapping *mapping; |
| int ret = 0; |
| |
| lockdep_assert_held_write(&kvm->mmu_lock); |
| for_each_mapping_in_range_safe(pgt, addr, addr + size, mapping) { |
| ret = kvm_call_hyp_nvhe(__pkvm_host_unshare_guest, handle, mapping->gfn); |
| if (WARN_ON(ret)) |
| break; |
| rb_erase(&mapping->node, &pgt->pkvm_mappings); |
| kfree(mapping); |
| } |
| |
| return ret; |
| } |
| |
| int pkvm_pgtable_stage2_wrprotect(struct kvm_pgtable *pgt, u64 addr, u64 size) |
| { |
| struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu); |
| pkvm_handle_t handle = kvm->arch.pkvm.handle; |
| struct pkvm_mapping *mapping; |
| int ret = 0; |
| |
| lockdep_assert_held(&kvm->mmu_lock); |
| for_each_mapping_in_range_safe(pgt, addr, addr + size, mapping) { |
| ret = kvm_call_hyp_nvhe(__pkvm_host_wrprotect_guest, handle, mapping->gfn); |
| if (WARN_ON(ret)) |
| break; |
| } |
| |
| return ret; |
| } |
| |
| int pkvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size) |
| { |
| struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu); |
| struct pkvm_mapping *mapping; |
| |
| lockdep_assert_held(&kvm->mmu_lock); |
| for_each_mapping_in_range_safe(pgt, addr, addr + size, mapping) |
| __clean_dcache_guest_page(pfn_to_kaddr(mapping->pfn), PAGE_SIZE); |
| |
| return 0; |
| } |
| |
| bool pkvm_pgtable_stage2_test_clear_young(struct kvm_pgtable *pgt, u64 addr, u64 size, bool mkold) |
| { |
| struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu); |
| pkvm_handle_t handle = kvm->arch.pkvm.handle; |
| struct pkvm_mapping *mapping; |
| bool young = false; |
| |
| lockdep_assert_held(&kvm->mmu_lock); |
| for_each_mapping_in_range_safe(pgt, addr, addr + size, mapping) |
| young |= kvm_call_hyp_nvhe(__pkvm_host_test_clear_young_guest, handle, mapping->gfn, |
| mkold); |
| |
| return young; |
| } |
| |
| int pkvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr, enum kvm_pgtable_prot prot, |
| enum kvm_pgtable_walk_flags flags) |
| { |
| return kvm_call_hyp_nvhe(__pkvm_host_relax_perms_guest, addr >> PAGE_SHIFT, prot); |
| } |
| |
| void pkvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr, |
| enum kvm_pgtable_walk_flags flags) |
| { |
| WARN_ON(kvm_call_hyp_nvhe(__pkvm_host_mkyoung_guest, addr >> PAGE_SHIFT)); |
| } |
| |
| void pkvm_pgtable_stage2_free_unlinked(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, s8 level) |
| { |
| WARN_ON_ONCE(1); |
| } |
| |
| kvm_pte_t *pkvm_pgtable_stage2_create_unlinked(struct kvm_pgtable *pgt, u64 phys, s8 level, |
| enum kvm_pgtable_prot prot, void *mc, bool force_pte) |
| { |
| WARN_ON_ONCE(1); |
| return NULL; |
| } |
| |
| int pkvm_pgtable_stage2_split(struct kvm_pgtable *pgt, u64 addr, u64 size, |
| struct kvm_mmu_memory_cache *mc) |
| { |
| WARN_ON_ONCE(1); |
| return -EINVAL; |
| } |