diff options
author | Dave Hansen <dave.hansen@linux.intel.com> | 2017-08-30 16:23:00 -0700 |
---|---|---|
committer | Greg Kroah-Hartman <gregkh@linuxfoundation.org> | 2018-01-05 15:44:25 +0100 |
commit | eb82151d0b1df53d1ad8d060ecd554ca12eb552a (patch) | |
tree | f84284e5399a5caee2e11a9d0cc4a774f3c6685f /arch/x86/mm | |
parent | 3e3d38fd9832e82a8cb1a5b1154acfa43ac08d15 (diff) | |
download | linux-rt-eb82151d0b1df53d1ad8d060ecd554ca12eb552a.tar.gz |
kaiser: enhanced by kernel and user PCIDs
Merged performance improvements to Kaiser, using distinct kernel
and user Process Context Identifiers to minimize the TLB flushing.
Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Signed-off-by: Hugh Dickins <hughd@google.com>
Acked-by: Jiri Kosina <jkosina@suse.cz>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Diffstat (limited to 'arch/x86/mm')
-rw-r--r-- | arch/x86/mm/kaiser.c | 7 | ||||
-rw-r--r-- | arch/x86/mm/tlb.c | 46 |
2 files changed, 50 insertions, 3 deletions
diff --git a/arch/x86/mm/kaiser.c b/arch/x86/mm/kaiser.c index bf48bf0df8c5..290a52e6017d 100644 --- a/arch/x86/mm/kaiser.c +++ b/arch/x86/mm/kaiser.c @@ -240,6 +240,8 @@ static void __init kaiser_init_all_pgds(void) } while (0) extern char __per_cpu_user_mapped_start[], __per_cpu_user_mapped_end[]; +extern unsigned long X86_CR3_PCID_KERN_VAR; +extern unsigned long X86_CR3_PCID_USER_VAR; /* * If anything in here fails, we will likely die on one of the * first kernel->user transitions and init will die. But, we @@ -290,6 +292,11 @@ void __init kaiser_init(void) kaiser_add_user_map_early(&debug_idt_table, sizeof(gate_desc) * NR_VECTORS, __PAGE_KERNEL); + + kaiser_add_user_map_early(&X86_CR3_PCID_KERN_VAR, PAGE_SIZE, + __PAGE_KERNEL); + kaiser_add_user_map_early(&X86_CR3_PCID_USER_VAR, PAGE_SIZE, + __PAGE_KERNEL); } /* Add a mapping to the shadow mapping, and synchronize the mappings */ diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 7a4cdb632508..aed0b704de3d 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -34,6 +34,46 @@ struct flush_tlb_info { unsigned long flush_end; }; +static void load_new_mm_cr3(pgd_t *pgdir) +{ + unsigned long new_mm_cr3 = __pa(pgdir); + + /* + * KAISER, plus PCIDs needs some extra work here. But, + * if either of features is not present, we need no + * PCIDs here and just do a normal, full TLB flush with + * the write_cr3() + */ + if (!IS_ENABLED(CONFIG_KAISER) || + !cpu_feature_enabled(X86_FEATURE_PCID)) + goto out_set_cr3; + /* + * We reuse the same PCID for different tasks, so we must + * flush all the entires for the PCID out when we change + * tasks. + */ + new_mm_cr3 = X86_CR3_PCID_KERN_FLUSH | __pa(pgdir); + + /* + * The flush from load_cr3() may leave old TLB entries + * for userspace in place. We must flush that context + * separately. We can theoretically delay doing this + * until we actually load up the userspace CR3, but + * that's a bit tricky. We have to have the "need to + * flush userspace PCID" bit per-cpu and check it in the + * exit-to-userspace paths. + */ + invpcid_flush_single_context(X86_CR3_PCID_ASID_USER); + +out_set_cr3: + /* + * Caution: many callers of this function expect + * that load_cr3() is serializing and orders TLB + * fills with respect to the mm_cpumask writes. + */ + write_cr3(new_mm_cr3); +} + /* * We cannot call mmdrop() because we are in interrupt context, * instead update mm->cpu_vm_mask. @@ -45,7 +85,7 @@ void leave_mm(int cpu) BUG(); if (cpumask_test_cpu(cpu, mm_cpumask(active_mm))) { cpumask_clear_cpu(cpu, mm_cpumask(active_mm)); - load_cr3(swapper_pg_dir); + load_new_mm_cr3(swapper_pg_dir); /* * This gets called in the idle path where RCU * functions differently. Tracing normally @@ -105,7 +145,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, * ordering guarantee we need. * */ - load_cr3(next->pgd); + load_new_mm_cr3(next->pgd); trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL); @@ -152,7 +192,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, * As above, load_cr3() is serializing and orders TLB * fills with respect to the mm_cpumask write. */ - load_cr3(next->pgd); + load_new_mm_cr3(next->pgd); trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL); load_mm_cr4(next); load_mm_ldt(next); |