From d28c4393a7bf558538e9def269c1caeab6ec056f Mon Sep 17 00:00:00 2001 From: "Prasanna S.P" <prasanna@in.ibm.com> Date: Tue, 26 Sep 2006 10:52:34 +0200 Subject: [PATCH] x86: error_code is not safe for kprobes This patch moves the entry.S:error_entry to .kprobes.text section, since code marked unsafe for kprobes jumps directly to entry.S::error_entry, that must be marked unsafe as well. This patch also moves all the ".previous.text" asm directives to ".previous" for kprobes section. AK: Following a similar i386 patch from Chuck Ebbert AK: Also merged Jeremy's fix in. +From: Jeremy Fitzhardinge <jeremy@goop.org> KPROBE_ENTRY does a .section .kprobes.text, and expects its users to do a .previous at the end of the function. Unfortunately, if any code within the function switches sections, for example .fixup, then the .previous ends up putting all subsequent code into .fixup. Worse, any subsequent .fixup code gets intermingled with the code its supposed to be fixing (which is also in .fixup). It's surprising this didn't cause more havok. The fix is to use .pushsection/.popsection, so this stuff nests properly. A further cleanup would be to get rid of all .section/.previous pairs, since they're inherently fragile. +From: Chuck Ebbert <76306.1226@compuserve.com> Because code marked unsafe for kprobes jumps directly to entry.S::error_code, that must be marked unsafe as well. The easiest way to do that is to move the page fault entry point to just before error_code and let it inherit the same section. Also moved all the ".previous" asm directives for kprobes sections to column 1 and removed ".text" from them. Signed-off-by: Chuck Ebbert <76306.1226@compuserve.com> Signed-off-by: Andi Kleen <ak@suse.de> --- arch/i386/kernel/entry.S | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) (limited to 'arch/i386/kernel/entry.S') diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S index 87f9f60b803b..ba22ec8fab54 100644 --- a/arch/i386/kernel/entry.S +++ b/arch/i386/kernel/entry.S @@ -591,11 +591,9 @@ ENTRY(name) \ /* The include is where all of the SMP etc. interrupts come from */ #include "entry_arch.h" -ENTRY(divide_error) - RING0_INT_FRAME - pushl $0 # no error code - CFI_ADJUST_CFA_OFFSET 4 - pushl $do_divide_error +KPROBE_ENTRY(page_fault) + RING0_EC_FRAME + pushl $do_page_fault CFI_ADJUST_CFA_OFFSET 4 ALIGN error_code: @@ -645,6 +643,7 @@ error_code: call *%edi jmp ret_from_exception CFI_ENDPROC +KPROBE_END(page_fault) ENTRY(coprocessor_error) RING0_INT_FRAME @@ -720,7 +719,8 @@ debug_stack_correct: call do_debug jmp ret_from_exception CFI_ENDPROC - .previous .text +KPROBE_END(debug) + /* * NMI is doubly nasty. It can happen _while_ we're handling * a debug fault, and the debug fault hasn't yet been able to @@ -816,7 +816,7 @@ KPROBE_ENTRY(int3) call do_int3 jmp ret_from_exception CFI_ENDPROC - .previous .text +KPROBE_END(int3) ENTRY(overflow) RING0_INT_FRAME @@ -881,7 +881,7 @@ KPROBE_ENTRY(general_protection) CFI_ADJUST_CFA_OFFSET 4 jmp error_code CFI_ENDPROC - .previous .text +KPROBE_END(general_protection) ENTRY(alignment_check) RING0_EC_FRAME @@ -890,13 +890,14 @@ ENTRY(alignment_check) jmp error_code CFI_ENDPROC -KPROBE_ENTRY(page_fault) - RING0_EC_FRAME - pushl $do_page_fault +ENTRY(divide_error) + RING0_INT_FRAME + pushl $0 # no error code + CFI_ADJUST_CFA_OFFSET 4 + pushl $do_divide_error CFI_ADJUST_CFA_OFFSET 4 jmp error_code CFI_ENDPROC - .previous .text #ifdef CONFIG_X86_MCE ENTRY(machine_check) -- cgit v1.2.1 From 02ba1a32dbd3d406530a17a2643a8f0f8cbf3acc Mon Sep 17 00:00:00 2001 From: Andi Kleen <ak@suse.de> Date: Tue, 26 Sep 2006 10:52:35 +0200 Subject: [PATCH] i386: move kernel_thread_helper into entry.S And add proper CFI annotation to it which was previously impossible. This prevents "stuck" messages by the dwarf2 unwinder when reaching the top of a kernel stack. Includes feedback from Jan Beulich Cc: jbeulich@novell.com Signed-off-by: Andi Kleen <ak@suse.de> --- arch/i386/kernel/entry.S | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'arch/i386/kernel/entry.S') diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S index ba22ec8fab54..dede506e5bd0 100644 --- a/arch/i386/kernel/entry.S +++ b/arch/i386/kernel/entry.S @@ -950,6 +950,19 @@ ENTRY(arch_unwind_init_running) ENDPROC(arch_unwind_init_running) #endif +ENTRY(kernel_thread_helper) + pushl $0 # fake return address for unwinder + CFI_STARTPROC + movl %edx,%eax + push %edx + CFI_ADJUST_CFA_OFFSET 4 + call *%ebx + push %eax + CFI_ADJUST_CFA_OFFSET 4 + call do_exit + CFI_ENDPROC +ENDPROC(kernel_thread_helper) + .section .rodata,"a" #include "syscall_table.S" -- cgit v1.2.1 From 06039754d775d3e48e4a292e4f353321205eff53 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fernando=20Luis=20V=E1zquez=20Cao?= <fernando@oss.ntt.co.jp> Date: Tue, 26 Sep 2006 10:52:36 +0200 Subject: [PATCH] i386: Disallow kprobes on NMI handlers A kprobe executes IRET early and that could cause NMI recursion and stack corruption. Note: This problem was originally spotted and solved by Andi Kleen in the x86_64 architecture. This patch is an adaption of his patch for i386. AK: Merged with current code which was a bit different. AK: Removed printk in nmi handler that shouldn't be there in the first time AK: Added missing include. AK: added KPROBES_END Signed-off-by: Fernando Vazquez <fernando@intellilink.co.jp> Signed-off-by: Andi Kleen <ak@suse.de> --- arch/i386/kernel/entry.S | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/i386/kernel/entry.S') diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S index dede506e5bd0..0928f70639aa 100644 --- a/arch/i386/kernel/entry.S +++ b/arch/i386/kernel/entry.S @@ -729,7 +729,7 @@ KPROBE_END(debug) * check whether we got an NMI on the debug path where the debug * fault happened on the sysenter path. */ -ENTRY(nmi) +KPROBE_ENTRY(nmi) RING0_INT_FRAME pushl %eax CFI_ADJUST_CFA_OFFSET 4 @@ -805,6 +805,7 @@ nmi_16bit_stack: .align 4 .long 1b,iret_exc .previous +KPROBE_END(nmi) KPROBE_ENTRY(int3) RING0_INT_FRAME -- cgit v1.2.1 From a549b86dd0f3cbffcd5f9343f4ae7fcd59f7e756 Mon Sep 17 00:00:00 2001 From: Chuck Ebbert <76306.1226@compuserve.com> Date: Tue, 26 Sep 2006 10:52:37 +0200 Subject: [PATCH] i386: annotate FIX_STACK() and the rest of nmi() In i386's entry.S, FIX_STACK() needs annotation because it replaces the stack pointer. And the rest of nmi() needs annotation in order to compile with these new annotations. Signed-off-by: Chuck Ebbert <76306.1226@compuserve.com> Signed-off-by: Andi Kleen <ak@suse.de> --- arch/i386/kernel/entry.S | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) (limited to 'arch/i386/kernel/entry.S') diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S index 0928f70639aa..4b0845249222 100644 --- a/arch/i386/kernel/entry.S +++ b/arch/i386/kernel/entry.S @@ -701,9 +701,15 @@ device_not_available_emulate: jne ok; \ label: \ movl TSS_sysenter_esp0+offset(%esp),%esp; \ + CFI_DEF_CFA esp, 0; \ + CFI_UNDEFINED eip; \ pushfl; \ + CFI_ADJUST_CFA_OFFSET 4; \ pushl $__KERNEL_CS; \ - pushl $sysenter_past_esp + CFI_ADJUST_CFA_OFFSET 4; \ + pushl $sysenter_past_esp; \ + CFI_ADJUST_CFA_OFFSET 4; \ + CFI_REL_OFFSET eip, 0 KPROBE_ENTRY(debug) RING0_INT_FRAME @@ -754,6 +760,7 @@ KPROBE_ENTRY(nmi) cmpl $sysenter_entry,12(%esp) je nmi_debug_stack_check nmi_stack_correct: + /* We have a RING0_INT_FRAME here */ pushl %eax CFI_ADJUST_CFA_OFFSET 4 SAVE_ALL @@ -764,9 +771,12 @@ nmi_stack_correct: CFI_ENDPROC nmi_stack_fixup: + RING0_INT_FRAME FIX_STACK(12,nmi_stack_correct, 1) jmp nmi_stack_correct + nmi_debug_stack_check: + /* We have a RING0_INT_FRAME here */ cmpw $__KERNEL_CS,16(%esp) jne nmi_stack_correct cmpl $debug,(%esp) @@ -777,8 +787,10 @@ nmi_debug_stack_check: jmp nmi_stack_correct nmi_16bit_stack: - RING0_INT_FRAME - /* create the pointer to lss back */ + /* We have a RING0_INT_FRAME here. + * + * create the pointer to lss back + */ pushl %ss CFI_ADJUST_CFA_OFFSET 4 pushl %esp -- cgit v1.2.1 From 0da5db313317e3195482d3e660a1074857374a89 Mon Sep 17 00:00:00 2001 From: Rusty Russell <rusty@rustcorp.com.au> Date: Tue, 26 Sep 2006 10:52:39 +0200 Subject: [PATCH] i386: Abstract sensitive instructions Abstract sensitive instructions in assembler code, replacing them with macros (which currently are #defined to the native versions). We use long names: assembler is case-insensitive, so if something goes wrong and macros do not expand, it would assemble anyway. Resulting object files are exactly the same as before. Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Andi Kleen <ak@suse.de> --- arch/i386/kernel/entry.S | 38 ++++++++++++++++++++++---------------- 1 file changed, 22 insertions(+), 16 deletions(-) (limited to 'arch/i386/kernel/entry.S') diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S index 4b0845249222..3872fca5c74a 100644 --- a/arch/i386/kernel/entry.S +++ b/arch/i386/kernel/entry.S @@ -76,8 +76,15 @@ DF_MASK = 0x00000400 NT_MASK = 0x00004000 VM_MASK = 0x00020000 +/* These are replaces for paravirtualization */ +#define DISABLE_INTERRUPTS cli +#define ENABLE_INTERRUPTS sti +#define ENABLE_INTERRUPTS_SYSEXIT sti; sysexit +#define INTERRUPT_RETURN iret +#define GET_CR0_INTO_EAX movl %cr0, %eax + #ifdef CONFIG_PREEMPT -#define preempt_stop cli; TRACE_IRQS_OFF +#define preempt_stop DISABLE_INTERRUPTS; TRACE_IRQS_OFF #else #define preempt_stop #define resume_kernel restore_nocheck @@ -236,7 +243,7 @@ check_userspace: testl $(VM_MASK | 3), %eax jz resume_kernel ENTRY(resume_userspace) - cli # make sure we don't miss an interrupt + DISABLE_INTERRUPTS # make sure we don't miss an interrupt # setting need_resched or sigpending # between sampling and the iret movl TI_flags(%ebp), %ecx @@ -247,7 +254,7 @@ ENTRY(resume_userspace) #ifdef CONFIG_PREEMPT ENTRY(resume_kernel) - cli + DISABLE_INTERRUPTS cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ? jnz restore_nocheck need_resched: @@ -275,7 +282,7 @@ sysenter_past_esp: * No need to follow this irqs on/off section: the syscall * disabled irqs and here we enable it straight after entry: */ - sti + ENABLE_INTERRUPTS pushl $(__USER_DS) CFI_ADJUST_CFA_OFFSET 4 /*CFI_REL_OFFSET ss, 0*/ @@ -320,7 +327,7 @@ sysenter_past_esp: jae syscall_badsys call *sys_call_table(,%eax,4) movl %eax,EAX(%esp) - cli + DISABLE_INTERRUPTS TRACE_IRQS_OFF movl TI_flags(%ebp), %ecx testw $_TIF_ALLWORK_MASK, %cx @@ -330,8 +337,7 @@ sysenter_past_esp: movl OLDESP(%esp), %ecx xorl %ebp,%ebp TRACE_IRQS_ON - sti - sysexit + ENABLE_INTERRUPTS_SYSEXIT CFI_ENDPROC @@ -356,7 +362,7 @@ syscall_call: call *sys_call_table(,%eax,4) movl %eax,EAX(%esp) # store the return value syscall_exit: - cli # make sure we don't miss an interrupt + DISABLE_INTERRUPTS # make sure we don't miss an interrupt # setting need_resched or sigpending # between sampling and the iret TRACE_IRQS_OFF @@ -381,11 +387,11 @@ restore_nocheck_notrace: RESTORE_REGS addl $4, %esp CFI_ADJUST_CFA_OFFSET -4 -1: iret +1: INTERRUPT_RETURN .section .fixup,"ax" iret_exc: TRACE_IRQS_ON - sti + ENABLE_INTERRUPTS pushl $0 # no error code pushl $do_iret_error jmp error_code @@ -409,7 +415,7 @@ ldt_ss: * dosemu and wine happy. */ subl $8, %esp # reserve space for switch16 pointer CFI_ADJUST_CFA_OFFSET 8 - cli + DISABLE_INTERRUPTS TRACE_IRQS_OFF movl %esp, %eax /* Set up the 16bit stack frame with switch32 pointer on top, @@ -419,7 +425,7 @@ ldt_ss: TRACE_IRQS_IRET RESTORE_REGS lss 20+4(%esp), %esp # switch to 16bit stack -1: iret +1: INTERRUPT_RETURN .section __ex_table,"a" .align 4 .long 1b,iret_exc @@ -434,7 +440,7 @@ work_pending: jz work_notifysig work_resched: call schedule - cli # make sure we don't miss an interrupt + DISABLE_INTERRUPTS # make sure we don't miss an interrupt # setting need_resched or sigpending # between sampling and the iret TRACE_IRQS_OFF @@ -490,7 +496,7 @@ syscall_exit_work: testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP), %cl jz work_pending TRACE_IRQS_ON - sti # could let do_syscall_trace() call + ENABLE_INTERRUPTS # could let do_syscall_trace() call # schedule() instead movl %esp, %eax movl $1, %edx @@ -668,7 +674,7 @@ ENTRY(device_not_available) pushl $-1 # mark this as an int CFI_ADJUST_CFA_OFFSET 4 SAVE_ALL - movl %cr0, %eax + GET_CR0_INTO_EAX testl $0x4, %eax # EM (math emulation bit) jne device_not_available_emulate preempt_stop @@ -811,7 +817,7 @@ nmi_16bit_stack: call do_nmi RESTORE_REGS lss 12+4(%esp), %esp # back to 16bit stack -1: iret +1: INTERRUPT_RETURN CFI_ENDPROC .section __ex_table,"a" .align 4 -- cgit v1.2.1 From 78be3706b21a232310590fe00258b224177ac05f Mon Sep 17 00:00:00 2001 From: Rusty Russell <rusty@rustcorp.com.au> Date: Tue, 26 Sep 2006 10:52:39 +0200 Subject: [PATCH] i386: Allow a kernel not to be in ring 0 We allow for the fact that the guest kernel may not run in ring 0. This requires some abstraction in a few places when setting %cs or checking privilege level (user vs kernel). This is Chris' [RFC PATCH 15/33] move segment checks to subarch, except rather than using #define USER_MODE_MASK which depends on a config option, we use Zach's more flexible approach of assuming ring 3 == userspace. I also used "get_kernel_rpl()" over "get_kernel_cs()" because I think it reads better in the code... 1) Remove the hardcoded 3 and introduce #define SEGMENT_RPL_MASK 3 2) Add a get_kernel_rpl() macro, and don't assume it's zero. And: Clean up of patch for letting kernel run other than ring 0: a. Add some comments about the SEGMENT_IS_*_CODE() macros. b. Add a USER_RPL macro. (Code was comparing a value to a mask in some places and to the magic number 3 in other places.) c. Add macros for table indicator field and use them. d. Change the entry.S tests for LDT stack segment to use the macros Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> Signed-off-by: Zachary Amsden <zach@vmware.com> Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Andi Kleen <ak@suse.de> --- arch/i386/kernel/entry.S | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'arch/i386/kernel/entry.S') diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S index 3872fca5c74a..284f2e908ad0 100644 --- a/arch/i386/kernel/entry.S +++ b/arch/i386/kernel/entry.S @@ -240,8 +240,9 @@ ret_from_intr: check_userspace: movl EFLAGS(%esp), %eax # mix EFLAGS and CS movb CS(%esp), %al - testl $(VM_MASK | 3), %eax - jz resume_kernel + andl $(VM_MASK | SEGMENT_RPL_MASK), %eax + cmpl $USER_RPL, %eax + jb resume_kernel # not returning to v8086 or userspace ENTRY(resume_userspace) DISABLE_INTERRUPTS # make sure we don't miss an interrupt # setting need_resched or sigpending @@ -377,8 +378,8 @@ restore_all: # See comments in process.c:copy_thread() for details. movb OLDSS(%esp), %ah movb CS(%esp), %al - andl $(VM_MASK | (4 << 8) | 3), %eax - cmpl $((4 << 8) | 3), %eax + andl $(VM_MASK | (SEGMENT_TI_MASK << 8) | SEGMENT_RPL_MASK), %eax + cmpl $((SEGMENT_LDT << 8) | USER_RPL), %eax CFI_REMEMBER_STATE je ldt_ss # returning to user-space with LDT SS restore_nocheck: -- cgit v1.2.1 From adf1423698f00d00b267f7dca8231340ce7d65ef Mon Sep 17 00:00:00 2001 From: Jan Beulich <jbeulich@novell.com> Date: Tue, 26 Sep 2006 10:52:41 +0200 Subject: [PATCH] i386/x86-64: Work around gcc bug with noreturn functions in unwinder Current gcc generates calls not jumps to noreturn functions. When that happens the return address can point to the next function, which confuses the unwinder. This patch works around it by marking asynchronous exception frames in contrast normal call frames in the unwind information. Then teach the unwinder to decode this. For normal call frames the unwinder now subtracts one from the address which avoids this problem. The standard libgcc unwinder uses the same trick. It doesn't include adjustment of the printed address (i.e. for the original example, it'd still be kernel_math_error+0 that gets displayed, but the unwinder wouldn't get confused anymore. This only works with binutils 2.6.17+ and some versions of H.J.Lu's 2.6.16 unfortunately because earlier binutils don't support .cfi_signal_frame [AK: added automatic detection of the new binutils and wrote description] Signed-off-by: Jan Beulich <jbeulich@novell.com> Signed-off-by: Andi Kleen <ak@suse.de> --- arch/i386/kernel/entry.S | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/i386/kernel/entry.S') diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S index 284f2e908ad0..5a63d6fdb70e 100644 --- a/arch/i386/kernel/entry.S +++ b/arch/i386/kernel/entry.S @@ -183,18 +183,21 @@ VM_MASK = 0x00020000 #define RING0_INT_FRAME \ CFI_STARTPROC simple;\ + CFI_SIGNAL_FRAME;\ CFI_DEF_CFA esp, 3*4;\ /*CFI_OFFSET cs, -2*4;*/\ CFI_OFFSET eip, -3*4 #define RING0_EC_FRAME \ CFI_STARTPROC simple;\ + CFI_SIGNAL_FRAME;\ CFI_DEF_CFA esp, 4*4;\ /*CFI_OFFSET cs, -2*4;*/\ CFI_OFFSET eip, -3*4 #define RING0_PTREGS_FRAME \ CFI_STARTPROC simple;\ + CFI_SIGNAL_FRAME;\ CFI_DEF_CFA esp, OLDESP-EBX;\ /*CFI_OFFSET cs, CS-OLDESP;*/\ CFI_OFFSET eip, EIP-OLDESP;\ @@ -275,6 +278,7 @@ need_resched: # sysenter call handler stub ENTRY(sysenter_entry) CFI_STARTPROC simple + CFI_SIGNAL_FRAME CFI_DEF_CFA esp, 0 CFI_REGISTER esp, ebp movl TSS_sysenter_esp0(%esp),%esp -- cgit v1.2.1