diff options
Diffstat (limited to 'arch/arm64/kvm/hyp')
-rw-r--r-- | arch/arm64/kvm/hyp/include/nvhe/mem_protect.h | 3 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/include/nvhe/pkvm.h | 58 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/nvhe/hyp-main.c | 31 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/nvhe/mem_protect.c | 14 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/nvhe/pkvm.c | 380 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/nvhe/setup.c | 8 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/pgtable.c | 9 |
7 files changed, 503 insertions, 0 deletions
diff --git a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h index 0a6d3e7f2a43..ce9a796a85ee 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h +++ b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h @@ -11,6 +11,7 @@ #include <asm/kvm_mmu.h> #include <asm/kvm_pgtable.h> #include <asm/virt.h> +#include <nvhe/pkvm.h> #include <nvhe/spinlock.h> /* @@ -68,10 +69,12 @@ bool addr_is_memory(phys_addr_t phys); int host_stage2_idmap_locked(phys_addr_t addr, u64 size, enum kvm_pgtable_prot prot); int host_stage2_set_owner_locked(phys_addr_t addr, u64 size, u8 owner_id); int kvm_host_prepare_stage2(void *pgt_pool_base); +int kvm_guest_prepare_stage2(struct pkvm_hyp_vm *vm, void *pgd); void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt); int hyp_pin_shared_mem(void *from, void *to); void hyp_unpin_shared_mem(void *from, void *to); +void reclaim_guest_pages(struct pkvm_hyp_vm *vm); static __always_inline void __load_host_stage2(void) { diff --git a/arch/arm64/kvm/hyp/include/nvhe/pkvm.h b/arch/arm64/kvm/hyp/include/nvhe/pkvm.h new file mode 100644 index 000000000000..8c653a3b9501 --- /dev/null +++ b/arch/arm64/kvm/hyp/include/nvhe/pkvm.h @@ -0,0 +1,58 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2021 Google LLC + * Author: Fuad Tabba <tabba@google.com> + */ + +#ifndef __ARM64_KVM_NVHE_PKVM_H__ +#define __ARM64_KVM_NVHE_PKVM_H__ + +#include <asm/kvm_pkvm.h> + +/* + * Holds the relevant data for maintaining the vcpu state completely at hyp. + */ +struct pkvm_hyp_vcpu { + struct kvm_vcpu vcpu; + + /* Backpointer to the host's (untrusted) vCPU instance. */ + struct kvm_vcpu *host_vcpu; +}; + +/* + * Holds the relevant data for running a protected vm. + */ +struct pkvm_hyp_vm { + struct kvm kvm; + + /* Backpointer to the host's (untrusted) KVM instance. */ + struct kvm *host_kvm; + + /* The guest's stage-2 page-table managed by the hypervisor. */ + struct kvm_pgtable pgt; + + /* + * The number of vcpus initialized and ready to run. + * Modifying this is protected by 'vm_table_lock'. + */ + unsigned int nr_vcpus; + + /* Array of the hyp vCPU structures for this VM. */ + struct pkvm_hyp_vcpu *vcpus[]; +}; + +static inline struct pkvm_hyp_vm * +pkvm_hyp_vcpu_to_hyp_vm(struct pkvm_hyp_vcpu *hyp_vcpu) +{ + return container_of(hyp_vcpu->vcpu.kvm, struct pkvm_hyp_vm, kvm); +} + +void pkvm_hyp_vm_table_init(void *tbl); + +int __pkvm_init_vm(struct kvm *host_kvm, unsigned long vm_hva, + unsigned long pgd_hva); +int __pkvm_init_vcpu(pkvm_handle_t handle, struct kvm_vcpu *host_vcpu, + unsigned long vcpu_hva); +int __pkvm_teardown_vm(pkvm_handle_t handle); + +#endif /* __ARM64_KVM_NVHE_PKVM_H__ */ diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c index 3cea4b6ac23e..b5f3fcfe9135 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c @@ -15,6 +15,7 @@ #include <nvhe/mem_protect.h> #include <nvhe/mm.h> +#include <nvhe/pkvm.h> #include <nvhe/trap_handler.h> DEFINE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params); @@ -191,6 +192,33 @@ static void handle___pkvm_vcpu_init_traps(struct kvm_cpu_context *host_ctxt) __pkvm_vcpu_init_traps(kern_hyp_va(vcpu)); } +static void handle___pkvm_init_vm(struct kvm_cpu_context *host_ctxt) +{ + DECLARE_REG(struct kvm *, host_kvm, host_ctxt, 1); + DECLARE_REG(unsigned long, vm_hva, host_ctxt, 2); + DECLARE_REG(unsigned long, pgd_hva, host_ctxt, 3); + + host_kvm = kern_hyp_va(host_kvm); + cpu_reg(host_ctxt, 1) = __pkvm_init_vm(host_kvm, vm_hva, pgd_hva); +} + +static void handle___pkvm_init_vcpu(struct kvm_cpu_context *host_ctxt) +{ + DECLARE_REG(pkvm_handle_t, handle, host_ctxt, 1); + DECLARE_REG(struct kvm_vcpu *, host_vcpu, host_ctxt, 2); + DECLARE_REG(unsigned long, vcpu_hva, host_ctxt, 3); + + host_vcpu = kern_hyp_va(host_vcpu); + cpu_reg(host_ctxt, 1) = __pkvm_init_vcpu(handle, host_vcpu, vcpu_hva); +} + +static void handle___pkvm_teardown_vm(struct kvm_cpu_context *host_ctxt) +{ + DECLARE_REG(pkvm_handle_t, handle, host_ctxt, 1); + + cpu_reg(host_ctxt, 1) = __pkvm_teardown_vm(handle); +} + typedef void (*hcall_t)(struct kvm_cpu_context *); #define HANDLE_FUNC(x) [__KVM_HOST_SMCCC_FUNC_##x] = (hcall_t)handle_##x @@ -220,6 +248,9 @@ static const hcall_t host_hcall[] = { HANDLE_FUNC(__vgic_v3_save_aprs), HANDLE_FUNC(__vgic_v3_restore_aprs), HANDLE_FUNC(__pkvm_vcpu_init_traps), + HANDLE_FUNC(__pkvm_init_vm), + HANDLE_FUNC(__pkvm_init_vcpu), + HANDLE_FUNC(__pkvm_teardown_vm), }; static void handle_host_hcall(struct kvm_cpu_context *host_ctxt) diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index 06c6a24c0eae..459957b3082e 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -141,6 +141,20 @@ int kvm_host_prepare_stage2(void *pgt_pool_base) return 0; } +int kvm_guest_prepare_stage2(struct pkvm_hyp_vm *vm, void *pgd) +{ + vm->pgt.pgd = pgd; + return 0; +} + +void reclaim_guest_pages(struct pkvm_hyp_vm *vm) +{ + unsigned long nr_pages; + + nr_pages = kvm_pgtable_stage2_pgd_size(vm->kvm.arch.vtcr) >> PAGE_SHIFT; + WARN_ON(__pkvm_hyp_donate_host(hyp_virt_to_pfn(vm->pgt.pgd), nr_pages)); +} + int __pkvm_prot_finalize(void) { struct kvm_s2_mmu *mmu = &host_mmu.arch.mmu; diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c index 85d3b7ae720f..135c9a095eca 100644 --- a/arch/arm64/kvm/hyp/nvhe/pkvm.c +++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c @@ -7,6 +7,9 @@ #include <linux/kvm_host.h> #include <linux/mm.h> #include <nvhe/fixed_config.h> +#include <nvhe/mem_protect.h> +#include <nvhe/memory.h> +#include <nvhe/pkvm.h> #include <nvhe/trap_handler.h> /* @@ -183,3 +186,380 @@ void __pkvm_vcpu_init_traps(struct kvm_vcpu *vcpu) pvm_init_traps_aa64mmfr0(vcpu); pvm_init_traps_aa64mmfr1(vcpu); } + +/* + * Start the VM table handle at the offset defined instead of at 0. + * Mainly for sanity checking and debugging. + */ +#define HANDLE_OFFSET 0x1000 + +static unsigned int vm_handle_to_idx(pkvm_handle_t handle) +{ + return handle - HANDLE_OFFSET; +} + +static pkvm_handle_t idx_to_vm_handle(unsigned int idx) +{ + return idx + HANDLE_OFFSET; +} + +/* + * Spinlock for protecting state related to the VM table. Protects writes + * to 'vm_table' and 'nr_table_entries' as well as reads and writes to + * 'last_hyp_vcpu_lookup'. + */ +static DEFINE_HYP_SPINLOCK(vm_table_lock); + +/* + * The table of VM entries for protected VMs in hyp. + * Allocated at hyp initialization and setup. + */ +static struct pkvm_hyp_vm **vm_table; + +void pkvm_hyp_vm_table_init(void *tbl) +{ + WARN_ON(vm_table); + vm_table = tbl; +} + +/* + * Return the hyp vm structure corresponding to the handle. + */ +static struct pkvm_hyp_vm *get_vm_by_handle(pkvm_handle_t handle) +{ + unsigned int idx = vm_handle_to_idx(handle); + + if (unlikely(idx >= KVM_MAX_PVMS)) + return NULL; + + return vm_table[idx]; +} + +static void unpin_host_vcpu(struct kvm_vcpu *host_vcpu) +{ + if (host_vcpu) + hyp_unpin_shared_mem(host_vcpu, host_vcpu + 1); +} + +static void unpin_host_vcpus(struct pkvm_hyp_vcpu *hyp_vcpus[], + unsigned int nr_vcpus) +{ + int i; + + for (i = 0; i < nr_vcpus; i++) + unpin_host_vcpu(hyp_vcpus[i]->host_vcpu); +} + +static void init_pkvm_hyp_vm(struct kvm *host_kvm, struct pkvm_hyp_vm *hyp_vm, + unsigned int nr_vcpus) +{ + hyp_vm->host_kvm = host_kvm; + hyp_vm->kvm.created_vcpus = nr_vcpus; + hyp_vm->kvm.arch.vtcr = host_mmu.arch.vtcr; +} + +static int init_pkvm_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu, + struct pkvm_hyp_vm *hyp_vm, + struct kvm_vcpu *host_vcpu, + unsigned int vcpu_idx) +{ + int ret = 0; + + if (hyp_pin_shared_mem(host_vcpu, host_vcpu + 1)) + return -EBUSY; + + if (host_vcpu->vcpu_idx != vcpu_idx) { + ret = -EINVAL; + goto done; + } + + hyp_vcpu->host_vcpu = host_vcpu; + + hyp_vcpu->vcpu.kvm = &hyp_vm->kvm; + hyp_vcpu->vcpu.vcpu_id = READ_ONCE(host_vcpu->vcpu_id); + hyp_vcpu->vcpu.vcpu_idx = vcpu_idx; + + hyp_vcpu->vcpu.arch.hw_mmu = &hyp_vm->kvm.arch.mmu; +done: + if (ret) + unpin_host_vcpu(host_vcpu); + return ret; +} + +static int find_free_vm_table_entry(struct kvm *host_kvm) +{ + int i; + + for (i = 0; i < KVM_MAX_PVMS; ++i) { + if (!vm_table[i]) + return i; + } + + return -ENOMEM; +} + +/* + * Allocate a VM table entry and insert a pointer to the new vm. + * + * Return a unique handle to the protected VM on success, + * negative error code on failure. + */ +static pkvm_handle_t insert_vm_table_entry(struct kvm *host_kvm, + struct pkvm_hyp_vm *hyp_vm) +{ + struct kvm_s2_mmu *mmu = &hyp_vm->kvm.arch.mmu; + int idx; + + hyp_assert_lock_held(&vm_table_lock); + + /* + * Initializing protected state might have failed, yet a malicious + * host could trigger this function. Thus, ensure that 'vm_table' + * exists. + */ + if (unlikely(!vm_table)) + return -EINVAL; + + idx = find_free_vm_table_entry(host_kvm); + if (idx < 0) + return idx; + + hyp_vm->kvm.arch.pkvm_handle = idx_to_vm_handle(idx); + + /* VMID 0 is reserved for the host */ + atomic64_set(&mmu->vmid.id, idx + 1); + + mmu->arch = &hyp_vm->kvm.arch; + mmu->pgt = &hyp_vm->pgt; + + vm_table[idx] = hyp_vm; + return hyp_vm->kvm.arch.pkvm_handle; +} + +/* + * Deallocate and remove the VM table entry corresponding to the handle. + */ +static void remove_vm_table_entry(pkvm_handle_t handle) +{ + hyp_assert_lock_held(&vm_table_lock); + vm_table[vm_handle_to_idx(handle)] = NULL; +} + +static size_t pkvm_get_hyp_vm_size(unsigned int nr_vcpus) +{ + return size_add(sizeof(struct pkvm_hyp_vm), + size_mul(sizeof(struct pkvm_hyp_vcpu *), nr_vcpus)); +} + +static void *map_donated_memory_noclear(unsigned long host_va, size_t size) +{ + void *va = (void *)kern_hyp_va(host_va); + + if (!PAGE_ALIGNED(va)) + return NULL; + + if (__pkvm_host_donate_hyp(hyp_virt_to_pfn(va), + PAGE_ALIGN(size) >> PAGE_SHIFT)) + return NULL; + + return va; +} + +static void *map_donated_memory(unsigned long host_va, size_t size) +{ + void *va = map_donated_memory_noclear(host_va, size); + + if (va) + memset(va, 0, size); + + return va; +} + +static void __unmap_donated_memory(void *va, size_t size) +{ + WARN_ON(__pkvm_hyp_donate_host(hyp_virt_to_pfn(va), + PAGE_ALIGN(size) >> PAGE_SHIFT)); +} + +static void unmap_donated_memory(void *va, size_t size) +{ + if (!va) + return; + + memset(va, 0, size); + __unmap_donated_memory(va, size); +} + +static void __maybe_unused unmap_donated_memory_noclear(void *va, size_t size) +{ + if (!va) + return; + + __unmap_donated_memory(va, size); +} + +/* + * Initialize the hypervisor copy of the protected VM state using the + * memory donated by the host. + * + * Unmaps the donated memory from the host at stage 2. + * + * host_kvm: A pointer to the host's struct kvm. + * vm_hva: The host va of the area being donated for the VM state. + * Must be page aligned. + * pgd_hva: The host va of the area being donated for the stage-2 PGD for + * the VM. Must be page aligned. Its size is implied by the VM's + * VTCR. + * + * Return a unique handle to the protected VM on success, + * negative error code on failure. + */ +int __pkvm_init_vm(struct kvm *host_kvm, unsigned long vm_hva, + unsigned long pgd_hva) +{ + struct pkvm_hyp_vm *hyp_vm = NULL; + size_t vm_size, pgd_size; + unsigned int nr_vcpus; + void *pgd = NULL; + int ret; + + ret = hyp_pin_shared_mem(host_kvm, host_kvm + 1); + if (ret) + return ret; + + nr_vcpus = READ_ONCE(host_kvm->created_vcpus); + if (nr_vcpus < 1) { + ret = -EINVAL; + goto err_unpin_kvm; + } + + vm_size = pkvm_get_hyp_vm_size(nr_vcpus); + pgd_size = kvm_pgtable_stage2_pgd_size(host_mmu.arch.vtcr); + + ret = -ENOMEM; + + hyp_vm = map_donated_memory(vm_hva, vm_size); + if (!hyp_vm) + goto err_remove_mappings; + + pgd = map_donated_memory_noclear(pgd_hva, pgd_size); + if (!pgd) + goto err_remove_mappings; + + init_pkvm_hyp_vm(host_kvm, hyp_vm, nr_vcpus); + + hyp_spin_lock(&vm_table_lock); + ret = insert_vm_table_entry(host_kvm, hyp_vm); + if (ret < 0) + goto err_unlock; + + ret = kvm_guest_prepare_stage2(hyp_vm, pgd); + if (ret) + goto err_remove_vm_table_entry; + hyp_spin_unlock(&vm_table_lock); + + return hyp_vm->kvm.arch.pkvm_handle; + +err_remove_vm_table_entry: + remove_vm_table_entry(hyp_vm->kvm.arch.pkvm_handle); +err_unlock: + hyp_spin_unlock(&vm_table_lock); +err_remove_mappings: + unmap_donated_memory(hyp_vm, vm_size); + unmap_donated_memory(pgd, pgd_size); +err_unpin_kvm: + hyp_unpin_shared_mem(host_kvm, host_kvm + 1); + return ret; +} + +/* + * Initialize the hypervisor copy of the protected vCPU state using the + * memory donated by the host. + * + * handle: The handle for the protected vm. + * host_vcpu: A pointer to the corresponding host vcpu. + * vcpu_hva: The host va of the area being donated for the vcpu state. + * Must be page aligned. The size of the area must be equal to + * the page-aligned size of 'struct pkvm_hyp_vcpu'. + * Return 0 on success, negative error code on failure. + */ +int __pkvm_init_vcpu(pkvm_handle_t handle, struct kvm_vcpu *host_vcpu, + unsigned long vcpu_hva) +{ + struct pkvm_hyp_vcpu *hyp_vcpu; + struct pkvm_hyp_vm *hyp_vm; + unsigned int idx; + int ret; + + hyp_vcpu = map_donated_memory(vcpu_hva, sizeof(*hyp_vcpu)); + if (!hyp_vcpu) + return -ENOMEM; + + hyp_spin_lock(&vm_table_lock); + + hyp_vm = get_vm_by_handle(handle); + if (!hyp_vm) { + ret = -ENOENT; + goto unlock; + } + + idx = hyp_vm->nr_vcpus; + if (idx >= hyp_vm->kvm.created_vcpus) { + ret = -EINVAL; + goto unlock; + } + + ret = init_pkvm_hyp_vcpu(hyp_vcpu, hyp_vm, host_vcpu, idx); + if (ret) + goto unlock; + + hyp_vm->vcpus[idx] = hyp_vcpu; + hyp_vm->nr_vcpus++; +unlock: + hyp_spin_unlock(&vm_table_lock); + + if (ret) + unmap_donated_memory(hyp_vcpu, sizeof(*hyp_vcpu)); + + return ret; +} + +int __pkvm_teardown_vm(pkvm_handle_t handle) +{ + struct pkvm_hyp_vm *hyp_vm; + struct kvm *host_kvm; + size_t vm_size; + int err; + + hyp_spin_lock(&vm_table_lock); + hyp_vm = get_vm_by_handle(handle); + if (!hyp_vm) { + err = -ENOENT; + goto err_unlock; + } + + if (WARN_ON(hyp_page_count(hyp_vm))) { + err = -EBUSY; + goto err_unlock; + } + + /* Ensure the VMID is clean before it can be reallocated */ + __kvm_tlb_flush_vmid(&hyp_vm->kvm.arch.mmu); + remove_vm_table_entry(handle); + hyp_spin_unlock(&vm_table_lock); + + /* Reclaim guest pages (including page-table pages) */ + reclaim_guest_pages(hyp_vm); + unpin_host_vcpus(hyp_vm->vcpus, hyp_vm->nr_vcpus); + + /* Return the metadata pages to the host */ + host_kvm = hyp_vm->host_kvm; + vm_size = pkvm_get_hyp_vm_size(hyp_vm->kvm.created_vcpus); + unmap_donated_memory(hyp_vm, vm_size); + hyp_unpin_shared_mem(host_kvm, host_kvm + 1); + return 0; + +err_unlock: + hyp_spin_unlock(&vm_table_lock); + return err; +} diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c index 0312c9c74a5a..2be72fbe7279 100644 --- a/arch/arm64/kvm/hyp/nvhe/setup.c +++ b/arch/arm64/kvm/hyp/nvhe/setup.c @@ -16,6 +16,7 @@ #include <nvhe/memory.h> #include <nvhe/mem_protect.h> #include <nvhe/mm.h> +#include <nvhe/pkvm.h> #include <nvhe/trap_handler.h> unsigned long hyp_nr_cpus; @@ -24,6 +25,7 @@ unsigned long hyp_nr_cpus; (unsigned long)__per_cpu_start) static void *vmemmap_base; +static void *vm_table_base; static void *hyp_pgt_base; static void *host_s2_pgt_base; static struct kvm_pgtable_mm_ops pkvm_pgtable_mm_ops; @@ -40,6 +42,11 @@ static int divide_memory_pool(void *virt, unsigned long size) if (!vmemmap_base) return -ENOMEM; + nr_pages = hyp_vm_table_pages(); + vm_table_base = hyp_early_alloc_contig(nr_pages); + if (!vm_table_base) + return -ENOMEM; + nr_pages = hyp_s1_pgtable_pages(); hyp_pgt_base = hyp_early_alloc_contig(nr_pages); if (!hyp_pgt_base) @@ -314,6 +321,7 @@ void __noreturn __pkvm_init_finalise(void) if (ret) goto out; + pkvm_hyp_vm_table_init(vm_table_base); out: /* * We tail-called to here from handle___pkvm_init() and will not return, diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index cdf8e76b0be1..a1a27f88a312 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -1200,6 +1200,15 @@ int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu, return 0; } +size_t kvm_pgtable_stage2_pgd_size(u64 vtcr) +{ + u32 ia_bits = VTCR_EL2_IPA(vtcr); + u32 sl0 = FIELD_GET(VTCR_EL2_SL0_MASK, vtcr); + u32 start_level = VTCR_EL2_TGRAN_SL0_BASE - sl0; + + return kvm_pgd_pages(ia_bits, start_level) * PAGE_SIZE; +} + static int stage2_free_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, enum kvm_pgtable_walk_flags flag, void * const arg) |