summaryrefslogtreecommitdiff
path: root/arch/arm64/kvm/mmu.c
diff options
context:
space:
mode:
authorMarc Zyngier <maz@kernel.org>2022-12-05 14:22:55 +0000
committerMarc Zyngier <maz@kernel.org>2022-12-05 14:22:55 +0000
commitfe8e3f44c58760f0779813938ed465b3297db1ac (patch)
treef0d529b2e7bf99b662134ac7b1dc180a21d0a3b8 /arch/arm64/kvm/mmu.c
parenta937f37d85f3347a1b6de69990fcd214ce882886 (diff)
parent5e806c5812e8012a83496cf96bdba266b3aec428 (diff)
downloadlinux-fe8e3f44c58760f0779813938ed465b3297db1ac.tar.gz
Merge branch kvm-arm64/parallel-faults into kvmarm-master/next
* kvm-arm64/parallel-faults: : . : Parallel stage-2 fault handling, courtesy of Oliver Upton. : From the cover letter: : : "Presently KVM only takes a read lock for stage 2 faults if it believes : the fault can be fixed by relaxing permissions on a PTE (write unprotect : for dirty logging). Otherwise, stage 2 faults grab the write lock, which : predictably can pile up all the vCPUs in a sufficiently large VM. : : Like the TDP MMU for x86, this series loosens the locking around : manipulations of the stage 2 page tables to allow parallel faults. RCU : and atomics are exploited to safely build/destroy the stage 2 page : tables in light of multiple software observers." : . KVM: arm64: Reject shared table walks in the hyp code KVM: arm64: Don't acquire RCU read lock for exclusive table walks KVM: arm64: Take a pointer to walker data in kvm_dereference_pteref() KVM: arm64: Handle stage-2 faults in parallel KVM: arm64: Make table->block changes parallel-aware KVM: arm64: Make leaf->leaf PTE changes parallel-aware KVM: arm64: Make block->table PTE changes parallel-aware KVM: arm64: Split init and set for table PTE KVM: arm64: Atomically update stage 2 leaf attributes in parallel walks KVM: arm64: Protect stage-2 traversal with RCU KVM: arm64: Tear down unlinked stage-2 subtree after break-before-make KVM: arm64: Use an opaque type for pteps KVM: arm64: Add a helper to tear down unlinked stage-2 subtrees KVM: arm64: Don't pass kvm_pgtable through kvm_pgtable_walk_data KVM: arm64: Pass mm_ops through the visitor context KVM: arm64: Stash observed pte value in visitor context KVM: arm64: Combine visitor arguments into a context structure Signed-off-by: Marc Zyngier <maz@kernel.org>
Diffstat (limited to 'arch/arm64/kvm/mmu.c')
-rw-r--r--arch/arm64/kvm/mmu.c53
1 files changed, 28 insertions, 25 deletions
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index 1ef0704420d9..0fe5945bc4fb 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -128,6 +128,25 @@ static void kvm_s2_free_pages_exact(void *virt, size_t size)
free_pages_exact(virt, size);
}
+static struct kvm_pgtable_mm_ops kvm_s2_mm_ops;
+
+static void stage2_free_removed_table_rcu_cb(struct rcu_head *head)
+{
+ struct page *page = container_of(head, struct page, rcu_head);
+ void *pgtable = page_to_virt(page);
+ u32 level = page_private(page);
+
+ kvm_pgtable_stage2_free_removed(&kvm_s2_mm_ops, pgtable, level);
+}
+
+static void stage2_free_removed_table(void *addr, u32 level)
+{
+ struct page *page = virt_to_page(addr);
+
+ set_page_private(page, (unsigned long)level);
+ call_rcu(&page->rcu_head, stage2_free_removed_table_rcu_cb);
+}
+
static void kvm_host_get_page(void *addr)
{
get_page(virt_to_page(addr));
@@ -640,7 +659,7 @@ static struct kvm_pgtable_mm_ops kvm_user_mm_ops = {
static int get_user_mapping_size(struct kvm *kvm, u64 addr)
{
struct kvm_pgtable pgt = {
- .pgd = (kvm_pte_t *)kvm->mm->pgd,
+ .pgd = (kvm_pteref_t)kvm->mm->pgd,
.ia_bits = vabits_actual,
.start_level = (KVM_PGTABLE_MAX_LEVELS -
CONFIG_PGTABLE_LEVELS),
@@ -662,6 +681,7 @@ static struct kvm_pgtable_mm_ops kvm_s2_mm_ops = {
.zalloc_page = stage2_memcache_zalloc_page,
.zalloc_pages_exact = kvm_s2_zalloc_pages_exact,
.free_pages_exact = kvm_s2_free_pages_exact,
+ .free_removed_table = stage2_free_removed_table,
.get_page = kvm_host_get_page,
.put_page = kvm_s2_put_page,
.page_count = kvm_host_page_count,
@@ -841,7 +861,7 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
write_lock(&kvm->mmu_lock);
ret = kvm_pgtable_stage2_map(pgt, addr, PAGE_SIZE, pa, prot,
- &cache);
+ &cache, 0);
write_unlock(&kvm->mmu_lock);
if (ret)
break;
@@ -1136,7 +1156,6 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
gfn_t gfn;
kvm_pfn_t pfn;
bool logging_active = memslot_is_logging(memslot);
- bool use_read_lock = false;
unsigned long fault_level = kvm_vcpu_trap_get_fault_level(vcpu);
unsigned long vma_pagesize, fault_granule;
enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R;
@@ -1171,8 +1190,6 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
if (logging_active) {
force_pte = true;
vma_shift = PAGE_SHIFT;
- use_read_lock = (fault_status == FSC_PERM && write_fault &&
- fault_granule == PAGE_SIZE);
} else {
vma_shift = get_vma_page_shift(vma, hva);
}
@@ -1271,15 +1288,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
if (exec_fault && device)
return -ENOEXEC;
- /*
- * To reduce MMU contentions and enhance concurrency during dirty
- * logging dirty logging, only acquire read lock for permission
- * relaxation.
- */
- if (use_read_lock)
- read_lock(&kvm->mmu_lock);
- else
- write_lock(&kvm->mmu_lock);
+ read_lock(&kvm->mmu_lock);
pgt = vcpu->arch.hw_mmu->pgt;
if (mmu_invalidate_retry(kvm, mmu_seq))
goto out_unlock;
@@ -1323,15 +1332,12 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
* permissions only if vma_pagesize equals fault_granule. Otherwise,
* kvm_pgtable_stage2_map() should be called to change block size.
*/
- if (fault_status == FSC_PERM && vma_pagesize == fault_granule) {
+ if (fault_status == FSC_PERM && vma_pagesize == fault_granule)
ret = kvm_pgtable_stage2_relax_perms(pgt, fault_ipa, prot);
- } else {
- WARN_ONCE(use_read_lock, "Attempted stage-2 map outside of write lock\n");
-
+ else
ret = kvm_pgtable_stage2_map(pgt, fault_ipa, vma_pagesize,
__pfn_to_phys(pfn), prot,
- memcache);
- }
+ memcache, KVM_PGTABLE_WALK_SHARED);
/* Mark the page dirty only if the fault is handled successfully */
if (writable && !ret) {
@@ -1340,10 +1346,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
}
out_unlock:
- if (use_read_lock)
- read_unlock(&kvm->mmu_lock);
- else
- write_unlock(&kvm->mmu_lock);
+ read_unlock(&kvm->mmu_lock);
kvm_set_pfn_accessed(pfn);
kvm_release_pfn_clean(pfn);
return ret != -EAGAIN ? ret : 0;
@@ -1549,7 +1552,7 @@ bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
*/
kvm_pgtable_stage2_map(kvm->arch.mmu.pgt, range->start << PAGE_SHIFT,
PAGE_SIZE, __pfn_to_phys(pfn),
- KVM_PGTABLE_PROT_R, NULL);
+ KVM_PGTABLE_PROT_R, NULL, 0);
return false;
}