From eb82151d0b1df53d1ad8d060ecd554ca12eb552a Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Wed, 30 Aug 2017 16:23:00 -0700 Subject: kaiser: enhanced by kernel and user PCIDs Merged performance improvements to Kaiser, using distinct kernel and user Process Context Identifiers to minimize the TLB flushing. Signed-off-by: Dave Hansen Signed-off-by: Hugh Dickins Acked-by: Jiri Kosina Signed-off-by: Greg Kroah-Hartman --- arch/x86/mm/kaiser.c | 7 +++++++ arch/x86/mm/tlb.c | 46 +++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 50 insertions(+), 3 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/kaiser.c b/arch/x86/mm/kaiser.c index bf48bf0df8c5..290a52e6017d 100644 --- a/arch/x86/mm/kaiser.c +++ b/arch/x86/mm/kaiser.c @@ -240,6 +240,8 @@ static void __init kaiser_init_all_pgds(void) } while (0) extern char __per_cpu_user_mapped_start[], __per_cpu_user_mapped_end[]; +extern unsigned long X86_CR3_PCID_KERN_VAR; +extern unsigned long X86_CR3_PCID_USER_VAR; /* * If anything in here fails, we will likely die on one of the * first kernel->user transitions and init will die. But, we @@ -290,6 +292,11 @@ void __init kaiser_init(void) kaiser_add_user_map_early(&debug_idt_table, sizeof(gate_desc) * NR_VECTORS, __PAGE_KERNEL); + + kaiser_add_user_map_early(&X86_CR3_PCID_KERN_VAR, PAGE_SIZE, + __PAGE_KERNEL); + kaiser_add_user_map_early(&X86_CR3_PCID_USER_VAR, PAGE_SIZE, + __PAGE_KERNEL); } /* Add a mapping to the shadow mapping, and synchronize the mappings */ diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 7a4cdb632508..aed0b704de3d 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -34,6 +34,46 @@ struct flush_tlb_info { unsigned long flush_end; }; +static void load_new_mm_cr3(pgd_t *pgdir) +{ + unsigned long new_mm_cr3 = __pa(pgdir); + + /* + * KAISER, plus PCIDs needs some extra work here. But, + * if either of features is not present, we need no + * PCIDs here and just do a normal, full TLB flush with + * the write_cr3() + */ + if (!IS_ENABLED(CONFIG_KAISER) || + !cpu_feature_enabled(X86_FEATURE_PCID)) + goto out_set_cr3; + /* + * We reuse the same PCID for different tasks, so we must + * flush all the entires for the PCID out when we change + * tasks. + */ + new_mm_cr3 = X86_CR3_PCID_KERN_FLUSH | __pa(pgdir); + + /* + * The flush from load_cr3() may leave old TLB entries + * for userspace in place. We must flush that context + * separately. We can theoretically delay doing this + * until we actually load up the userspace CR3, but + * that's a bit tricky. We have to have the "need to + * flush userspace PCID" bit per-cpu and check it in the + * exit-to-userspace paths. + */ + invpcid_flush_single_context(X86_CR3_PCID_ASID_USER); + +out_set_cr3: + /* + * Caution: many callers of this function expect + * that load_cr3() is serializing and orders TLB + * fills with respect to the mm_cpumask writes. + */ + write_cr3(new_mm_cr3); +} + /* * We cannot call mmdrop() because we are in interrupt context, * instead update mm->cpu_vm_mask. @@ -45,7 +85,7 @@ void leave_mm(int cpu) BUG(); if (cpumask_test_cpu(cpu, mm_cpumask(active_mm))) { cpumask_clear_cpu(cpu, mm_cpumask(active_mm)); - load_cr3(swapper_pg_dir); + load_new_mm_cr3(swapper_pg_dir); /* * This gets called in the idle path where RCU * functions differently. Tracing normally @@ -105,7 +145,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, * ordering guarantee we need. * */ - load_cr3(next->pgd); + load_new_mm_cr3(next->pgd); trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL); @@ -152,7 +192,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, * As above, load_cr3() is serializing and orders TLB * fills with respect to the mm_cpumask write. */ - load_cr3(next->pgd); + load_new_mm_cr3(next->pgd); trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL); load_mm_cr4(next); load_mm_ldt(next); -- cgit v1.2.1