summaryrefslogtreecommitdiff
path: root/src/cmd/5l
diff options
context:
space:
mode:
authorRuss Cox <rsc@golang.org>2013-06-27 11:32:01 -0400
committerRuss Cox <rsc@golang.org>2013-06-27 11:32:01 -0400
commit219336e1cce24310ad7f7f235d5e22d566d8c781 (patch)
treef3f95bd097ab60cfefd52eae7593d1b3d43124fd /src/cmd/5l
parent28ea20f10232680724a9d3711ec0936fb99ff389 (diff)
downloadgo-219336e1cce24310ad7f7f235d5e22d566d8c781.tar.gz
runtime: record proper goroutine state during stack split
Until now, the goroutine state has been scattered during the execution of newstack and oldstack. It's all there, and those routines know how to get back to a working goroutine, but other pieces of the system, like stack traces, do not. If something does interrupt the newstack or oldstack execution, the rest of the system can't understand the goroutine. For example, if newstack decides there is an overflow and calls throw, the stack tracer wouldn't dump the goroutine correctly. For newstack to save a useful state snapshot, it needs to be able to rewind the PC in the function that triggered the split back to the beginning of the function. (The PC is a few instructions in, just after the call to morestack.) To make that possible, we change the prologues to insert a jmp back to the beginning of the function after the call to morestack. That is, the prologue used to be roughly: TEXT myfunc check for split jmpcond nosplit call morestack nosplit: sub $xxx, sp Now an extra instruction is inserted after the call: TEXT myfunc start: check for split jmpcond nosplit call morestack jmp start nosplit: sub $xxx, sp The jmp is not executed directly. It is decoded and simulated by runtime.rewindmorestack to discover the beginning of the function, and then the call to morestack returns directly to the start label instead of to the jump instruction. So logically the jmp is still executed, just not by the cpu. The prologue thus repeats in the case of a function that needs a stack split, but against the cost of the split itself, the extra few instructions are noise. The repeated prologue has the nice effect of making a stack split double-check that the new stack is big enough: if morestack happens to return on a too-small stack, we'll now notice before corruption happens. The ability for newstack to rewind to the beginning of the function should help preemption too. If newstack decides that it was called for preemption instead of a stack split, it now has the goroutine state correctly paused if rescheduling is needed, and when the goroutine can run again, it can return to the start label on its original stack and re-execute the split check. Here is an example of a split stack overflow showing the full trace, without any special cases in the stack printer. (This one was triggered by making the split check incorrect.) runtime: newstack framesize=0x0 argsize=0x18 sp=0x6aebd0 stack=[0x6b0000, 0x6b0fa0] morebuf={pc:0x69f5b sp:0x6aebd8 lr:0x0} sched={pc:0x68880 sp:0x6aebd0 lr:0x0 ctxt:0x34e700} runtime: split stack overflow: 0x6aebd0 < 0x6b0000 fatal error: runtime: split stack overflow goroutine 1 [stack split]: runtime.mallocgc(0x290, 0x100000000, 0x1) /Users/rsc/g/go/src/pkg/runtime/zmalloc_darwin_amd64.c:21 fp=0x6aebd8 runtime.new() /Users/rsc/g/go/src/pkg/runtime/zmalloc_darwin_amd64.c:682 +0x5b fp=0x6aec08 go/build.(*Context).Import(0x5ae340, 0xc210030c71, 0xa, 0xc2100b4380, 0x1b, ...) /Users/rsc/g/go/src/pkg/go/build/build.go:424 +0x3a fp=0x6b00a0 main.loadImport(0xc210030c71, 0xa, 0xc2100b4380, 0x1b, 0xc2100b42c0, ...) /Users/rsc/g/go/src/cmd/go/pkg.go:249 +0x371 fp=0x6b01a8 main.(*Package).load(0xc21017c800, 0xc2100b42c0, 0xc2101828c0, 0x0, 0x0, ...) /Users/rsc/g/go/src/cmd/go/pkg.go:431 +0x2801 fp=0x6b0c98 main.loadPackage(0x369040, 0x7, 0xc2100b42c0, 0x0) /Users/rsc/g/go/src/cmd/go/pkg.go:709 +0x857 fp=0x6b0f80 ----- stack segment boundary ----- main.(*builder).action(0xc2100902a0, 0x0, 0x0, 0xc2100e6c00, 0xc2100e5750, ...) /Users/rsc/g/go/src/cmd/go/build.go:539 +0x437 fp=0x6b14a0 main.(*builder).action(0xc2100902a0, 0x0, 0x0, 0xc21015b400, 0x2, ...) /Users/rsc/g/go/src/cmd/go/build.go:528 +0x1d2 fp=0x6b1658 main.(*builder).test(0xc2100902a0, 0xc210092000, 0x0, 0x0, 0xc21008ff60, ...) /Users/rsc/g/go/src/cmd/go/test.go:622 +0x1b53 fp=0x6b1f68 ----- stack segment boundary ----- main.runTest(0x5a6b20, 0xc21000a020, 0x2, 0x2) /Users/rsc/g/go/src/cmd/go/test.go:366 +0xd09 fp=0x6a5cf0 main.main() /Users/rsc/g/go/src/cmd/go/main.go:161 +0x4f9 fp=0x6a5f78 runtime.main() /Users/rsc/g/go/src/pkg/runtime/proc.c:183 +0x92 fp=0x6a5fa0 runtime.goexit() /Users/rsc/g/go/src/pkg/runtime/proc.c:1266 fp=0x6a5fa8 And here is a seg fault during oldstack: SIGSEGV: segmentation violation PC=0x1b2a6 runtime.oldstack() /Users/rsc/g/go/src/pkg/runtime/stack.c:159 +0x76 runtime.lessstack() /Users/rsc/g/go/src/pkg/runtime/asm_amd64.s:270 +0x22 goroutine 1 [stack unsplit]: fmt.(*pp).printArg(0x2102e64e0, 0xe5c80, 0x2102c9220, 0x73, 0x0, ...) /Users/rsc/g/go/src/pkg/fmt/print.go:818 +0x3d3 fp=0x221031e6f8 fmt.(*pp).doPrintf(0x2102e64e0, 0x12fb20, 0x2, 0x221031eb98, 0x1, ...) /Users/rsc/g/go/src/pkg/fmt/print.go:1183 +0x15cb fp=0x221031eaf0 fmt.Sprintf(0x12fb20, 0x2, 0x221031eb98, 0x1, 0x1, ...) /Users/rsc/g/go/src/pkg/fmt/print.go:234 +0x67 fp=0x221031eb40 flag.(*stringValue).String(0x2102c9210, 0x1, 0x0) /Users/rsc/g/go/src/pkg/flag/flag.go:180 +0xb3 fp=0x221031ebb0 flag.(*FlagSet).Var(0x2102f6000, 0x293d38, 0x2102c9210, 0x143490, 0xa, ...) /Users/rsc/g/go/src/pkg/flag/flag.go:633 +0x40 fp=0x221031eca0 flag.(*FlagSet).StringVar(0x2102f6000, 0x2102c9210, 0x143490, 0xa, 0x12fa60, ...) /Users/rsc/g/go/src/pkg/flag/flag.go:550 +0x91 fp=0x221031ece8 flag.(*FlagSet).String(0x2102f6000, 0x143490, 0xa, 0x12fa60, 0x0, ...) /Users/rsc/g/go/src/pkg/flag/flag.go:563 +0x87 fp=0x221031ed38 flag.String(0x143490, 0xa, 0x12fa60, 0x0, 0x161950, ...) /Users/rsc/g/go/src/pkg/flag/flag.go:570 +0x6b fp=0x221031ed80 testing.init() /Users/rsc/g/go/src/pkg/testing/testing.go:-531 +0xbb fp=0x221031edc0 strings_test.init() /Users/rsc/g/go/src/pkg/strings/strings_test.go:1115 +0x62 fp=0x221031ef70 main.init() strings/_test/_testmain.go:90 +0x3d fp=0x221031ef78 runtime.main() /Users/rsc/g/go/src/pkg/runtime/proc.c:180 +0x8a fp=0x221031efa0 runtime.goexit() /Users/rsc/g/go/src/pkg/runtime/proc.c:1269 fp=0x221031efa8 goroutine 2 [runnable]: runtime.MHeap_Scavenger() /Users/rsc/g/go/src/pkg/runtime/mheap.c:438 runtime.goexit() /Users/rsc/g/go/src/pkg/runtime/proc.c:1269 created by runtime.main /Users/rsc/g/go/src/pkg/runtime/proc.c:166 rax 0x23ccc0 rbx 0x23ccc0 rcx 0x0 rdx 0x38 rdi 0x2102c0170 rsi 0x221032cfe0 rbp 0x221032cfa0 rsp 0x7fff5fbff5b0 r8 0x2102c0120 r9 0x221032cfa0 r10 0x221032c000 r11 0x104ce8 r12 0xe5c80 r13 0x1be82baac718 r14 0x13091135f7d69200 r15 0x0 rip 0x1b2a6 rflags 0x10246 cs 0x2b fs 0x0 gs 0x0 Fixes issue 5723. R=r, dvyukov, go.peter.90, dave, iant CC=golang-dev https://codereview.appspot.com/10360048
Diffstat (limited to 'src/cmd/5l')
-rw-r--r--src/cmd/5l/noop.c176
1 files changed, 70 insertions, 106 deletions
diff --git a/src/cmd/5l/noop.c b/src/cmd/5l/noop.c
index 63a0d9b83..ace03ffd6 100644
--- a/src/cmd/5l/noop.c
+++ b/src/cmd/5l/noop.c
@@ -32,13 +32,7 @@
#include "l.h"
#include "../ld/lib.h"
-
-// see ../../runtime/proc.c:/StackGuard
-enum
-{
- StackBig = 4096,
- StackSmall = 128,
-};
+#include "../../pkg/runtime/stack.h"
static Sym* sym_div;
static Sym* sym_divu;
@@ -180,33 +174,7 @@ noops(void)
break;
}
- if(p->reg & NOSPLIT) {
- q1 = prg();
- q1->as = AMOVW;
- q1->scond |= C_WBIT;
- q1->line = p->line;
- q1->from.type = D_REG;
- q1->from.reg = REGLINK;
- q1->to.type = D_OREG;
- q1->to.offset = -autosize;
- q1->to.reg = REGSP;
- q1->spadj = autosize;
- q1->link = p->link;
- p->link = q1;
- } else if (autosize < StackBig) {
- // split stack check for small functions
- // MOVW g_stackguard(g), R1
- // CMP R1, $-autosize(SP)
- // MOVW.LO $autosize, R1
- // MOVW.LO $args, R2
- // MOVW.LO R14, R3
- // BL.LO runtime.morestack(SB) // modifies LR
- // MOVW.W R14,$-autosize(SP)
-
- // TODO(kaib): add more trampolines
- // TODO(kaib): put stackguard in register
- // TODO(kaib): add support for -K and underflow detection
-
+ if(!(p->reg & NOSPLIT)) {
// MOVW g_stackguard(g), R1
p = appendp(p);
p->as = AMOVW;
@@ -215,16 +183,18 @@ noops(void)
p->to.type = D_REG;
p->to.reg = 1;
- if(autosize < StackSmall) {
- // CMP R1, SP
+ if(autosize <= StackSmall) {
+ // small stack: SP < stackguard
+ // CMP stackguard, SP
p = appendp(p);
p->as = ACMP;
p->from.type = D_REG;
p->from.reg = 1;
p->reg = REGSP;
- } else {
- // MOVW $-autosize(SP), R2
- // CMP R1, R2
+ } else if(autosize <= StackBig) {
+ // large stack: SP-framesize < stackguard-StackSmall
+ // MOVW $-autosize(SP), R2
+ // CMP stackguard, R2
p = appendp(p);
p->as = AMOVW;
p->from.type = D_CONST;
@@ -238,103 +208,97 @@ noops(void)
p->from.type = D_REG;
p->from.reg = 1;
p->reg = 2;
+ } else {
+ // such a large stack we need to protect against wraparound
+ // if SP is close to zero.
+ // SP-stackguard+StackGuard < framesize + (StackGuard-StackSmall)
+ // The +StackGuard on both sides is required to keep the left side positive:
+ // SP is allowed to be slightly below stackguard. See stack.h.
+ // MOVW $StackGuard(SP), R2
+ // SUB R1, R2
+ // MOVW $(autosize+(StackGuard-StackSmall)), R3
+ // CMP R3, R2
+ p = appendp(p);
+ p->as = AMOVW;
+ p->from.type = D_CONST;
+ p->from.reg = REGSP;
+ p->from.offset = StackGuard;
+ p->to.type = D_REG;
+ p->to.reg = 2;
+
+ p = appendp(p);
+ p->as = ASUB;
+ p->from.type = D_REG;
+ p->from.reg = 1;
+ p->to.type = D_REG;
+ p->to.reg = 2;
+
+ p = appendp(p);
+ p->as = AMOVW;
+ p->from.type = D_CONST;
+ p->from.offset = autosize + (StackGuard - StackSmall);
+ p->to.type = D_REG;
+ p->to.reg = 3;
+
+ p = appendp(p);
+ p->as = ACMP;
+ p->from.type = D_REG;
+ p->from.reg = 3;
+ p->reg = 2;
}
-
- // MOVW.LO $autosize, R1
+
+ // MOVW.LS $autosize, R1
p = appendp(p);
p->as = AMOVW;
- p->scond = C_SCOND_LO;
+ p->scond = C_SCOND_LS;
p->from.type = D_CONST;
p->from.offset = autosize;
p->to.type = D_REG;
p->to.reg = 1;
- // MOVW.LO $args, R2
+ // MOVW.LS $args, R2
p = appendp(p);
p->as = AMOVW;
- p->scond = C_SCOND_LO;
+ p->scond = C_SCOND_LS;
p->from.type = D_CONST;
p->from.offset = (cursym->text->to.offset2 + 3) & ~3;
p->to.type = D_REG;
p->to.reg = 2;
- // MOVW.LO R14, R3
+ // MOVW.LS R14, R3
p = appendp(p);
p->as = AMOVW;
- p->scond = C_SCOND_LO;
+ p->scond = C_SCOND_LS;
p->from.type = D_REG;
p->from.reg = REGLINK;
p->to.type = D_REG;
p->to.reg = 3;
- // BL.LO runtime.morestack(SB) // modifies LR
+ // BL.LS runtime.morestack(SB) // modifies LR, returns with LO still asserted
p = appendp(p);
p->as = ABL;
- p->scond = C_SCOND_LO;
+ p->scond = C_SCOND_LS;
p->to.type = D_BRANCH;
p->to.sym = symmorestack;
p->cond = pmorestack;
-
- // MOVW.W R14,$-autosize(SP)
- p = appendp(p);
- p->as = AMOVW;
- p->scond |= C_WBIT;
- p->from.type = D_REG;
- p->from.reg = REGLINK;
- p->to.type = D_OREG;
- p->to.offset = -autosize;
- p->to.reg = REGSP;
- p->spadj = autosize;
- } else { // > StackBig
- // MOVW $autosize, R1
- // MOVW $args, R2
- // MOVW R14, R3
- // BL runtime.morestack(SB) // modifies LR
- // MOVW.W R14,$-autosize(SP)
-
- // MOVW $autosize, R1
- p = appendp(p);
- p->as = AMOVW;
- p->from.type = D_CONST;
- p->from.offset = autosize;
- p->to.type = D_REG;
- p->to.reg = 1;
-
- // MOVW $args, R2
- // also need to store the extra 4 bytes.
- p = appendp(p);
- p->as = AMOVW;
- p->from.type = D_CONST;
- p->from.offset = (cursym->text->to.offset2 + 3) & ~3;
- p->to.type = D_REG;
- p->to.reg = 2;
-
- // MOVW R14, R3
- p = appendp(p);
- p->as = AMOVW;
- p->from.type = D_REG;
- p->from.reg = REGLINK;
- p->to.type = D_REG;
- p->to.reg = 3;
-
- // BL runtime.morestack(SB) // modifies LR
+
+ // BLS start
p = appendp(p);
- p->as = ABL;
+ p->as = ABLS;
p->to.type = D_BRANCH;
- p->to.sym = symmorestack;
- p->cond = pmorestack;
-
- // MOVW.W R14,$-autosize(SP)
- p = appendp(p);
- p->as = AMOVW;
- p->scond |= C_WBIT;
- p->from.type = D_REG;
- p->from.reg = REGLINK;
- p->to.type = D_OREG;
- p->to.offset = -autosize;
- p->to.reg = REGSP;
- p->spadj = autosize;
+ p->cond = cursym->text->link;
}
+
+ // MOVW.W R14,$-autosize(SP)
+ p = appendp(p);
+ p->as = AMOVW;
+ p->scond |= C_WBIT;
+ p->from.type = D_REG;
+ p->from.reg = REGLINK;
+ p->to.type = D_OREG;
+ p->to.offset = -autosize;
+ p->to.reg = REGSP;
+ p->spadj = autosize;
break;
case ARET: