summaryrefslogtreecommitdiff
path: root/rts/StgCRun.c
diff options
context:
space:
mode:
authorBartosz Nitka <niteria@gmail.com>2018-05-03 10:43:38 -0700
committerBartosz Nitka <niteria@gmail.com>2018-05-11 04:02:03 -0700
commitcb5c2fe875965b7aedbc189012803fc62e48fb3f (patch)
treebc5d65def3de1b2a08b569f9661a0db602a719ac /rts/StgCRun.c
parent40a76c99624ae82e960bad0f1c832ddec4aece16 (diff)
downloadhaskell-cb5c2fe875965b7aedbc189012803fc62e48fb3f.tar.gz
Fix unwinding of C -> Haskell FFI calls with -threaded
See the new note. Test Plan: manual testing with patched gdb Reviewers: bgamari, simonmar, erikd Subscribers: rwbarton, thomie, carter Differential Revision: https://phabricator.haskell.org/D4666
Diffstat (limited to 'rts/StgCRun.c')
-rw-r--r--rts/StgCRun.c175
1 files changed, 165 insertions, 10 deletions
diff --git a/rts/StgCRun.c b/rts/StgCRun.c
index ab66c649fc..82d4a4c372 100644
--- a/rts/StgCRun.c
+++ b/rts/StgCRun.c
@@ -59,8 +59,8 @@
#include "StgRun.h"
#include "Capability.h"
-#if defined(DEBUG)
#include "RtsUtils.h"
+#if defined(DEBUG)
#include "Printer.h"
#endif
@@ -90,14 +90,6 @@ StgFunPtr StgReturn(void)
#else /* !USE_MINIINTERPRETER */
-#if defined(LEADING_UNDERSCORE)
-#define STG_RUN "_StgRun"
-#define STG_RETURN "_StgReturn"
-#else
-#define STG_RUN "StgRun"
-#define STG_RETURN "StgReturn"
-#endif
-
#if defined(mingw32_HOST_OS)
/*
* Note [Windows Stack allocations]
@@ -257,6 +249,122 @@ StgRunIsImplementedInAssembler(void)
#define STG_HIDDEN ".hidden "
#endif
+/*
+Note [Unwinding foreign exports on x86-64]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+For foreign exports, that is Haskell functions exported as C functions when
+we unwind we have to unwind from Haskell code into C code. The current story
+is as follows:
+
+ * The Haskell stack always has stg_stop_thread_info frame at the bottom
+ * We annotate stg_stop_thread_info to unwind the instruction pointer to a
+ label inside StgRun called StgRunJmp. It's the last instruction before the
+ code jumps into Haskell.
+ * StgRun - which is implemented in assembler is annotated with some manual
+ unwinding information. It unwinds all the registers that it has saved
+ on the stack. This is important as rsp and rbp are often required for
+ getting to the next frame and the rest of the saved registers are useful
+ when inspecting locals in gdb.
+
+
+ Example x86-64 stack for an FFI call
+ from C into a Haskell function:
+
+
+ HASKELL HEAP
+ "ADDRESS SPACE"
+
+ +--------------------+ <------ rbp
+ | |
+ | |
+ | |
+ | |
+ | Haskell |
+ | evaluation stack |
+ | |
+ | |
+ |--------------------|
+ |stg_catch_frame_info|
+ |--------------------|
+ | stg_forceIO_info |
+ |--------------------|
+ |stg_stop_thread_info| -------
+ +--------------------+ |
+ ... |
+ (other heap objects) |
+ ... |
+ |
+ |
+ |
+ C STACK "ADDRESS SPACE" |
+ v
+ +-----------------------------+ <------ rsp
+ | |
+ | RESERVED_C_STACK_BYTES ~16k |
+ | |
+ |-----------------------------|
+ | rbx ||
+ |-----------------------------| \
+ | rbp | |
+ |-----------------------------| \
+ | r12 | |
+ |-----------------------------| \
+ | r13 | | STG_RUN_STACK_FRAME_SIZE
+ |-----------------------------| /
+ | r14 | |
+ |-----------------------------| /
+ | r15 | |
+ |-----------------------------|/
+ | rip saved by call StgRun |
+ | in schedule() |
+ +-----------------------------+
+ ...
+ schedule() stack frame
+
+
+ Lower addresses on the top
+
+One little snag in this approach is that the annotations accepted by the
+assembler are surprisingly unexpressive. I had to resort to a .cfi_escape
+and hand-assemble a DWARF expression. What made it worse was that big numbers
+are LEB128 encoded, which makes them variable byte length, with length depending
+on the magnitude. I took a shortcut and assumed the magnitude of the relevant
+constant. I think it changes very rarely, so it shouldn't be a big burden.
+
+Here's an example stack generated this way:
+
+ Thread 1 "m" hit Breakpoint 1, Fib_zdfstableZZC0ZZCmainZZCFibZZCfib1_info () at Fib.hs:9
+ 9 fib a = return (a + 1)
+ #0 Fib_zdfstableZZC0ZZCmainZZCFibZZCfib1_info () at Fib.hs:9
+ #1 stg_catch_frame_info () at rts/Exception.cmm:372
+ #2 stg_forceIO_info () at rts/StgStartup.cmm:178
+ #3 stg_stop_thread_info () at rts/StgStartup.cmm:42
+ #4 0x00000000007048ab in StgRunIsImplementedInAssembler () at rts/StgCRun.c:255
+ #5 0x00000000006fcf42 in schedule (initialCapability=initialCapability@entry=0x8adac0 <MainCapability>, task=task@entry=0x8cf2a0) at rts/Schedule.c:451
+ #6 0x00000000006fe18e in scheduleWaitThread (tso=0x4200006388, ret=<optimized out>, pcap=0x7fffffffdac0) at rts/Schedule.c:2533
+ #7 0x000000000040a21e in hs_fib ()
+ #8 0x000000000040a083 in main (argc=1, argv=0x7fffffffdc48) at m.cpp:15
+
+(This is from patched gdb. See Note [Info Offset].)
+
+The previous approach was to encode the unwinding information for select
+registers in stg_stop_thread_info with Cmm annotations. The unfortunate thing
+about that approach was that it required introduction of an artificial MachSp
+register that wasn't meaningful outside unwinding. I discovered that to get
+stack unwinding working under -threaded runtime I also needed to unwind rbp
+which would require adding MachRbp. If we wanted to see saved locals in gdb,
+we'd have to add more. The core of the problem is that Cmm is architecture
+independent, while unwinding isn't.
+
+Note [Unwinding foreign imports]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+For unwinding foreign imports, that is C functions exposed as Haskell functions
+no special handling is required. The C function unwinds according to the rip
+saved on the stack by the call instruction. Then we perform regular Haskell
+stack unwinding.
+*/
+
+
static void GNUC3_ATTRIBUTE(used)
StgRunIsImplementedInAssembler(void)
{
@@ -297,6 +405,28 @@ StgRunIsImplementedInAssembler(void)
"movq %%xmm14,128(%%rax)\n\t"
"movq %%xmm15,136(%%rax)\n\t"
#endif
+
+ /*
+ * Let the unwinder know where we saved the registers
+ * See Note [Unwinding foreign exports on x86-64].
+ */
+ ".cfi_def_cfa rsp, 0\n\t"
+ ".cfi_offset rbx, %c2\n\t"
+ ".cfi_offset rbp, %c3\n\t"
+ ".cfi_offset r12, %c4\n\t"
+ ".cfi_offset r13, %c5\n\t"
+ ".cfi_offset r14, %c6\n\t"
+ ".cfi_offset r15, %c7\n\t"
+ ".cfi_offset rip, %c8\n\t"
+ ".cfi_escape " // DW_CFA_val_expression is not expressible otherwise
+ "0x16, " // DW_CFA_val_expression
+ "0x07, " // register num 7 - rsp
+ "0x04, " // block length
+ "0x77, " // DW_OP_breg7 - signed LEB128 offset from rsp
+ "%c9, " // signed LEB128 encoded delta - byte 1
+ "%c10, " // signed LEB128 encoded delta - byte 2
+ "%c11\n\t" // signed LEB128 encoded delta - byte 2
+
/*
* Set BaseReg
*/
@@ -313,6 +443,12 @@ StgRunIsImplementedInAssembler(void)
#else
"movq %%rdi,%%rax\n\t"
#endif
+
+ STG_GLOBAL xstr(STG_RUN_JMP) "\n"
+#if !defined(mingw32_HOST_OS)
+ STG_HIDDEN xstr(STG_RUN_JMP) "\n"
+#endif
+ xstr(STG_RUN_JMP) ":\n\t"
"jmp *%%rax\n\t"
".globl " STG_RETURN "\n"
@@ -349,7 +485,26 @@ StgRunIsImplementedInAssembler(void)
:
: "i"(RESERVED_C_STACK_BYTES),
- "i"(STG_RUN_STACK_FRAME_SIZE /* stack frame size */)
+ "i"(STG_RUN_STACK_FRAME_SIZE /* stack frame size */),
+ "i"(RESERVED_C_STACK_BYTES /* rbx relative to cfa (rsp) */),
+ "i"(RESERVED_C_STACK_BYTES + 8 /* rbp relative to cfa (rsp) */),
+ "i"(RESERVED_C_STACK_BYTES + 16 /* r12 relative to cfa (rsp) */),
+ "i"(RESERVED_C_STACK_BYTES + 24 /* r13 relative to cfa (rsp) */),
+ "i"(RESERVED_C_STACK_BYTES + 32 /* r14 relative to cfa (rsp) */),
+ "i"(RESERVED_C_STACK_BYTES + 40 /* r15 relative to cfa (rsp) */),
+ "i"(RESERVED_C_STACK_BYTES + STG_RUN_STACK_FRAME_SIZE
+ /* rip relative to cfa */),
+#define RSP_DELTA (RESERVED_C_STACK_BYTES + STG_RUN_STACK_FRAME_SIZE + 8)
+#if (RSP_DELTA < (1<<14)) || (RSP_DELTA >= (1<<21))
+#error "RSP_DELTA signed LEB128 encoding isn't 3 bytes"
+#endif
+ "i"((RSP_DELTA & 127) | 128
+ /* signed LEB128-encoded delta from rsp - byte 1 */),
+ "i"(((RSP_DELTA >> 7) & 127) | 128
+ /* signed LEB128-encoded delta from rsp - byte 2 */),
+ "i"((RSP_DELTA >> 14) & 127
+ /* signed LEB128-encoded delta from rsp - byte 3 */)
+#undef RSP_DELTA
);
/*
* See Note [Stack Alignment on X86]