summaryrefslogtreecommitdiff
path: root/arch/x86/mm
diff options
context:
space:
mode:
authorDave Hansen <dave.hansen@linux.intel.com>2017-08-30 16:23:00 -0700
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2018-01-05 15:44:25 +0100
commiteb82151d0b1df53d1ad8d060ecd554ca12eb552a (patch)
treef84284e5399a5caee2e11a9d0cc4a774f3c6685f /arch/x86/mm
parent3e3d38fd9832e82a8cb1a5b1154acfa43ac08d15 (diff)
downloadlinux-rt-eb82151d0b1df53d1ad8d060ecd554ca12eb552a.tar.gz
kaiser: enhanced by kernel and user PCIDs
Merged performance improvements to Kaiser, using distinct kernel and user Process Context Identifiers to minimize the TLB flushing. Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com> Signed-off-by: Hugh Dickins <hughd@google.com> Acked-by: Jiri Kosina <jkosina@suse.cz> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Diffstat (limited to 'arch/x86/mm')
-rw-r--r--arch/x86/mm/kaiser.c7
-rw-r--r--arch/x86/mm/tlb.c46
2 files changed, 50 insertions, 3 deletions
diff --git a/arch/x86/mm/kaiser.c b/arch/x86/mm/kaiser.c
index bf48bf0df8c5..290a52e6017d 100644
--- a/arch/x86/mm/kaiser.c
+++ b/arch/x86/mm/kaiser.c
@@ -240,6 +240,8 @@ static void __init kaiser_init_all_pgds(void)
} while (0)
extern char __per_cpu_user_mapped_start[], __per_cpu_user_mapped_end[];
+extern unsigned long X86_CR3_PCID_KERN_VAR;
+extern unsigned long X86_CR3_PCID_USER_VAR;
/*
* If anything in here fails, we will likely die on one of the
* first kernel->user transitions and init will die. But, we
@@ -290,6 +292,11 @@ void __init kaiser_init(void)
kaiser_add_user_map_early(&debug_idt_table,
sizeof(gate_desc) * NR_VECTORS,
__PAGE_KERNEL);
+
+ kaiser_add_user_map_early(&X86_CR3_PCID_KERN_VAR, PAGE_SIZE,
+ __PAGE_KERNEL);
+ kaiser_add_user_map_early(&X86_CR3_PCID_USER_VAR, PAGE_SIZE,
+ __PAGE_KERNEL);
}
/* Add a mapping to the shadow mapping, and synchronize the mappings */
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 7a4cdb632508..aed0b704de3d 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -34,6 +34,46 @@ struct flush_tlb_info {
unsigned long flush_end;
};
+static void load_new_mm_cr3(pgd_t *pgdir)
+{
+ unsigned long new_mm_cr3 = __pa(pgdir);
+
+ /*
+ * KAISER, plus PCIDs needs some extra work here. But,
+ * if either of features is not present, we need no
+ * PCIDs here and just do a normal, full TLB flush with
+ * the write_cr3()
+ */
+ if (!IS_ENABLED(CONFIG_KAISER) ||
+ !cpu_feature_enabled(X86_FEATURE_PCID))
+ goto out_set_cr3;
+ /*
+ * We reuse the same PCID for different tasks, so we must
+ * flush all the entires for the PCID out when we change
+ * tasks.
+ */
+ new_mm_cr3 = X86_CR3_PCID_KERN_FLUSH | __pa(pgdir);
+
+ /*
+ * The flush from load_cr3() may leave old TLB entries
+ * for userspace in place. We must flush that context
+ * separately. We can theoretically delay doing this
+ * until we actually load up the userspace CR3, but
+ * that's a bit tricky. We have to have the "need to
+ * flush userspace PCID" bit per-cpu and check it in the
+ * exit-to-userspace paths.
+ */
+ invpcid_flush_single_context(X86_CR3_PCID_ASID_USER);
+
+out_set_cr3:
+ /*
+ * Caution: many callers of this function expect
+ * that load_cr3() is serializing and orders TLB
+ * fills with respect to the mm_cpumask writes.
+ */
+ write_cr3(new_mm_cr3);
+}
+
/*
* We cannot call mmdrop() because we are in interrupt context,
* instead update mm->cpu_vm_mask.
@@ -45,7 +85,7 @@ void leave_mm(int cpu)
BUG();
if (cpumask_test_cpu(cpu, mm_cpumask(active_mm))) {
cpumask_clear_cpu(cpu, mm_cpumask(active_mm));
- load_cr3(swapper_pg_dir);
+ load_new_mm_cr3(swapper_pg_dir);
/*
* This gets called in the idle path where RCU
* functions differently. Tracing normally
@@ -105,7 +145,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
* ordering guarantee we need.
*
*/
- load_cr3(next->pgd);
+ load_new_mm_cr3(next->pgd);
trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
@@ -152,7 +192,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
* As above, load_cr3() is serializing and orders TLB
* fills with respect to the mm_cpumask write.
*/
- load_cr3(next->pgd);
+ load_new_mm_cr3(next->pgd);
trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
load_mm_cr4(next);
load_mm_ldt(next);