From a0d14b8909de55139b8702fe0c7e80b69763dcfb Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 11 Jul 2019 13:40:59 +0200 Subject: x86/mm, tracing: Fix CR2 corruption Despite the current efforts to read CR2 before tracing happens there still exist a number of possible holes: idtentry page_fault do_page_fault has_error_code=1 call error_entry TRACE_IRQS_OFF call trace_hardirqs_off* #PF // modifies CR2 CALL_enter_from_user_mode __context_tracking_exit() trace_user_exit(0) #PF // modifies CR2 call do_page_fault address = read_cr2(); /* whoopsie */ And similar for i386. Fix it by pulling the CR2 read into the entry code, before any of that stuff gets a chance to run and ruin things. Reported-by: He Zhe Reported-by: Eiichi Tsukata Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Reviewed-by: Andy Lutomirski Cc: bp@alien8.de Cc: rostedt@goodmis.org Cc: torvalds@linux-foundation.org Cc: hpa@zytor.com Cc: dave.hansen@linux.intel.com Cc: jgross@suse.com Cc: joel@joelfernandes.org Link: https://lkml.kernel.org/r/20190711114336.116812491@infradead.org Debugged-by: Steven Rostedt --- arch/x86/entry/entry_64.S | 35 ++++++++++++++++++----------------- 1 file changed, 18 insertions(+), 17 deletions(-) (limited to 'arch/x86/entry/entry_64.S') diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 95ae05f0edf2..7cb2e1f1ec09 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -864,7 +864,7 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt */ #define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss_rw) + (TSS_ist + (x) * 8) -.macro idtentry_part do_sym, has_error_code:req, paranoid:req, shift_ist=-1, ist_offset=0 +.macro idtentry_part do_sym, has_error_code:req, read_cr2:req, paranoid:req, shift_ist=-1, ist_offset=0 .if \paranoid call paranoid_entry @@ -874,12 +874,21 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt .endif UNWIND_HINT_REGS - .if \paranoid + .if \read_cr2 + GET_CR2_INTO(%rdx); /* can clobber %rax */ + .endif + .if \shift_ist != -1 TRACE_IRQS_OFF_DEBUG /* reload IDT in case of recursion */ .else TRACE_IRQS_OFF .endif + + .if \paranoid == 0 + testb $3, CS(%rsp) + jz .Lfrom_kernel_no_context_tracking_\@ + CALL_enter_from_user_mode +.Lfrom_kernel_no_context_tracking_\@: .endif movq %rsp, %rdi /* pt_regs pointer */ @@ -923,6 +932,7 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt * fresh stack. (This is for #DB, which has a nasty habit * of recursing.) * @create_gap: create a 6-word stack gap when coming from kernel mode. + * @read_cr2: load CR2 into the 3rd argument; done before calling any C code * * idtentry generates an IDT stub that sets up a usable kernel context, * creates struct pt_regs, and calls @do_sym. The stub has the following @@ -947,7 +957,7 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt * @paranoid == 2 is special: the stub will never switch stacks. This is for * #DF: if the thread stack is somehow unusable, we'll still get a useful OOPS. */ -.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1 ist_offset=0 create_gap=0 +.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1 ist_offset=0 create_gap=0 read_cr2=0 ENTRY(\sym) UNWIND_HINT_IRET_REGS offset=\has_error_code*8 @@ -985,7 +995,7 @@ ENTRY(\sym) .Lfrom_usermode_no_gap_\@: .endif - idtentry_part \do_sym, \has_error_code, \paranoid, \shift_ist, \ist_offset + idtentry_part \do_sym, \has_error_code, \read_cr2, \paranoid, \shift_ist, \ist_offset .if \paranoid == 1 /* @@ -994,7 +1004,7 @@ ENTRY(\sym) * run in real process context if user_mode(regs). */ .Lfrom_usermode_switch_stack_\@: - idtentry_part \do_sym, \has_error_code, paranoid=0 + idtentry_part \do_sym, \has_error_code, \read_cr2, paranoid=0 .endif _ASM_NOKPROBE(\sym) @@ -1006,7 +1016,7 @@ idtentry overflow do_overflow has_error_code=0 idtentry bounds do_bounds has_error_code=0 idtentry invalid_op do_invalid_op has_error_code=0 idtentry device_not_available do_device_not_available has_error_code=0 -idtentry double_fault do_double_fault has_error_code=1 paranoid=2 +idtentry double_fault do_double_fault has_error_code=1 paranoid=2 read_cr2=1 idtentry coprocessor_segment_overrun do_coprocessor_segment_overrun has_error_code=0 idtentry invalid_TSS do_invalid_TSS has_error_code=1 idtentry segment_not_present do_segment_not_present has_error_code=1 @@ -1179,10 +1189,10 @@ idtentry xenint3 do_int3 has_error_code=0 #endif idtentry general_protection do_general_protection has_error_code=1 -idtentry page_fault do_page_fault has_error_code=1 +idtentry page_fault do_page_fault has_error_code=1 read_cr2=1 #ifdef CONFIG_KVM_GUEST -idtentry async_page_fault do_async_page_fault has_error_code=1 +idtentry async_page_fault do_async_page_fault has_error_code=1 read_cr2=1 #endif #ifdef CONFIG_X86_MCE @@ -1281,18 +1291,9 @@ ENTRY(error_entry) movq %rax, %rsp /* switch stack */ ENCODE_FRAME_POINTER pushq %r12 - - /* - * We need to tell lockdep that IRQs are off. We can't do this until - * we fix gsbase, and we should do it before enter_from_user_mode - * (which can take locks). - */ - TRACE_IRQS_OFF - CALL_enter_from_user_mode ret .Lerror_entry_done: - TRACE_IRQS_OFF ret /* -- cgit v1.2.1