summaryrefslogtreecommitdiff
path: root/src/cmd/8g
diff options
context:
space:
mode:
authorRuss Cox <rsc@golang.org>2014-03-27 14:05:57 -0400
committerRuss Cox <rsc@golang.org>2014-03-27 14:05:57 -0400
commit0a20bd6a589a127b185b76450265fa090bcd129e (patch)
tree5f06a0b69a9d21623a3aca109877919fed292e81 /src/cmd/8g
parent267ad0b11b2a75b67816c335f7e68f97d893f674 (diff)
downloadgo-0a20bd6a589a127b185b76450265fa090bcd129e.tar.gz
cmd/gc: liveness-related bug fixes
1. On entry to a function, only zero the ambiguously live stack variables. Before, we were zeroing all stack variables containing pointers. The zeroing is pretty inefficient right now (issue 7624), but there are also too many stack variables detected as ambiguously live (issue 7345), and that must be addressed before deciding how to improve the zeroing code. (Changes in 5g/ggen.c, 6g/ggen.c, 8g/ggen.c, gc/pgen.c) Fixes issue 7647. 2. Make the regopt word-based liveness analysis preserve the whole-variable liveness property expected by the garbage collection bitmap liveness analysis. That is, if the regopt liveness decides that one word in a struct needs to be preserved, make sure it preserves the entire struct. This is particularly important for multiword values such as strings, slices, and interfaces, in which all the words need to be present in order to understand the meaning. (Changes in 5g/reg.c, 6g/reg.c, 8g/reg.c.) Fixes issue 7591. 3. Make the regopt word-based liveness analysis treat a variable as having its address taken - which makes it preserved across all future calls - whenever n->addrtaken is set, for consistency with the gc bitmap liveness analysis, even if there is no machine instruction actually taking the address. In this case n->addrtaken is incorrect (a nicer way to put it is overconservative), and ideally there would be no such cases, but they can happen and the two analyses need to agree. (Changes in 5g/reg.c, 6g/reg.c, 8g/reg.c; test in bug484.go.) Fixes crashes found by turning off "zero everything" in step 1. 4. Remove spurious VARDEF annotations. As the comment in gc/pgen.c explains, the VARDEF must immediately precede the initialization. It cannot be too early, and it cannot be too late. In particular, if a function call sits between the VARDEF and the actual machine instructions doing the initialization, the variable will be treated as live during that function call even though it is uninitialized, leading to problems. (Changes in gc/gen.c; test in live.go.) Fixes crashes found by turning off "zero everything" in step 1. 5. Do not treat loading the address of a wide value as a signal that the value must be initialized. Instead depend on the existence of a VARDEF or the first actual read/write of a word in the value. If the load is in order to pass the address to a function that does the actual initialization, treating the load as an implicit VARDEF causes the same problems as described in step 4. The alternative is to arrange to zero every such value before passing it to the real initialization function, but this is a much easier and more efficient change. (Changes in gc/plive.c.) Fixes crashes found by turning off "zero everything" in step 1. 6. Treat wide input parameters with their address taken as initialized on entry to the function. Otherwise they look "ambiguously live" and we will try to emit code to zero them. (Changes in gc/plive.c.) Fixes crashes found by turning off "zero everything" in step 1. 7. An array of length 0 has no pointers, even if the element type does. Without this change, the zeroing code complains when asked to clear a 0-length array. (Changes in gc/reflect.c.) LGTM=khr R=khr CC=golang-codereviews https://codereview.appspot.com/80160044
Diffstat (limited to 'src/cmd/8g')
-rw-r--r--src/cmd/8g/ggen.c45
-rw-r--r--src/cmd/8g/reg.c78
2 files changed, 103 insertions, 20 deletions
diff --git a/src/cmd/8g/ggen.c b/src/cmd/8g/ggen.c
index 741564ad5..eddba9bac 100644
--- a/src/cmd/8g/ggen.c
+++ b/src/cmd/8g/ggen.c
@@ -17,6 +17,8 @@ defframe(Prog *ptxt)
uint32 frame;
Prog *p;
vlong i;
+ NodeList *l;
+ Node *n;
// fill in argument size
ptxt->to.offset2 = rnd(curfn->type->argwid, widthptr);
@@ -28,26 +30,33 @@ defframe(Prog *ptxt)
// insert code to contain ambiguously live variables
// so that garbage collector only sees initialized values
// when it looks for pointers.
+ //
+ // TODO: determine best way to zero the given values.
+ // among other problems, AX is initialized to 0 multiple times,
+ // but that's really the tip of the iceberg.
p = ptxt;
- if(stkzerosize % widthptr != 0)
- fatal("zero size not a multiple of ptr size");
- if(stkzerosize == 0) {
- // nothing
- } else if(stkzerosize <= 2*widthptr) {
- for(i = 0; i < stkzerosize; i += widthptr) {
- p = appendpp(p, AMOVL, D_CONST, 0, D_SP+D_INDIR, frame-stkzerosize+i);
- }
- } else if(stkzerosize <= 16*widthptr) {
- p = appendpp(p, AMOVL, D_CONST, 0, D_AX, 0);
- for(i = 0; i < stkzerosize; i += widthptr) {
- p = appendpp(p, AMOVL, D_AX, 0, D_SP+D_INDIR, frame-stkzerosize+i);
+ for(l=curfn->dcl; l != nil; l = l->next) {
+ n = l->n;
+ if(!n->needzero)
+ continue;
+ if(n->class != PAUTO)
+ fatal("needzero class %d", n->class);
+ if(n->type->width % widthptr != 0 || n->xoffset % widthptr != 0 || n->type->width == 0)
+ fatal("var %lN has size %d offset %d", n, (int)n->type->width, (int)n->xoffset);
+ if(n->type->width <= 2*widthptr) {
+ for(i = 0; i < n->type->width; i += widthptr)
+ p = appendpp(p, AMOVL, D_CONST, 0, D_SP+D_INDIR, frame+n->xoffset+i);
+ } else if(n->type->width <= 16*widthptr) {
+ p = appendpp(p, AMOVL, D_CONST, 0, D_AX, 0);
+ for(i = 0; i < n->type->width; i += widthptr)
+ p = appendpp(p, AMOVL, D_AX, 0, D_SP+D_INDIR, frame+n->xoffset+i);
+ } else {
+ p = appendpp(p, AMOVL, D_CONST, 0, D_AX, 0);
+ p = appendpp(p, AMOVL, D_CONST, n->type->width/widthptr, D_CX, 0);
+ p = appendpp(p, ALEAL, D_SP+D_INDIR, frame+n->xoffset, D_DI, 0);
+ p = appendpp(p, AREP, D_NONE, 0, D_NONE, 0);
+ p = appendpp(p, ASTOSL, D_NONE, 0, D_NONE, 0);
}
- } else {
- p = appendpp(p, AMOVL, D_CONST, 0, D_AX, 0);
- p = appendpp(p, AMOVL, D_CONST, stkzerosize/widthptr, D_CX, 0);
- p = appendpp(p, ALEAL, D_SP+D_INDIR, frame-stkzerosize, D_DI, 0);
- p = appendpp(p, AREP, D_NONE, 0, D_NONE, 0);
- appendpp(p, ASTOSL, D_NONE, 0, D_NONE, 0);
}
}
diff --git a/src/cmd/8g/reg.c b/src/cmd/8g/reg.c
index 98edfd9a9..af3e834c9 100644
--- a/src/cmd/8g/reg.c
+++ b/src/cmd/8g/reg.c
@@ -159,8 +159,12 @@ regopt(Prog *firstp)
* find use and set of variables
*/
g = flowstart(firstp, sizeof(Reg));
- if(g == nil)
+ if(g == nil) {
+ for(i=0; i<nvar; i++)
+ var[i].node->opt = nil;
return;
+ }
+
firstr = (Reg*)g->start;
for(r = firstr; r != R; r = (Reg*)r->f.link) {
@@ -368,6 +372,8 @@ brk:
/*
* free aux structures. peep allocates new ones.
*/
+ for(i=0; i<nvar; i++)
+ var[i].node->opt = nil;
flowend(g);
firstr = R;
@@ -631,6 +637,13 @@ mkvar(Reg *r, Adr *a)
v->width = w;
v->addr = flag; // funny punning
v->node = node;
+
+ // node->opt is the head of a linked list
+ // of Vars within the given Node, so that
+ // we can start at a Var and find all the other
+ // Vars in the same Go variable.
+ v->nextinnode = node->opt;
+ node->opt = v;
if(debug['R'])
print("bit=%2d et=%2E w=%d+%d %#N %D flag=%d\n", i, et, o, w, node, a, v->addr);
@@ -644,6 +657,24 @@ mkvar(Reg *r, Adr *a)
for(z=0; z<BITS; z++)
params.b[z] |= bit.b[z];
+ // Treat values with their address taken as live at calls,
+ // because the garbage collector's liveness analysis in ../gc/plive.c does.
+ // These must be consistent or else we will elide stores and the garbage
+ // collector will see uninitialized data.
+ // The typical case where our own analysis is out of sync is when the
+ // node appears to have its address taken but that code doesn't actually
+ // get generated and therefore doesn't show up as an address being
+ // taken when we analyze the instruction stream.
+ // One instance of this case is when a closure uses the same name as
+ // an outer variable for one of its own variables declared with :=.
+ // The parser flags the outer variable as possibly shared, and therefore
+ // sets addrtaken, even though it ends up not being actually shared.
+ // If we were better about _ elision, _ = &x would suffice too.
+ // The broader := in a closure problem is mentioned in a comment in
+ // closure.c:/^typecheckclosure and dcl.c:/^oldname.
+ if(node->addrtaken)
+ setaddrs(bit);
+
return bit;
none:
@@ -654,7 +685,8 @@ void
prop(Reg *r, Bits ref, Bits cal)
{
Reg *r1, *r2;
- int z;
+ int z, i, j;
+ Var *v, *v1;
for(r1 = r; r1 != R; r1 = (Reg*)r1->f.p1) {
for(z=0; z<BITS; z++) {
@@ -677,6 +709,48 @@ prop(Reg *r, Bits ref, Bits cal)
cal.b[z] |= ref.b[z] | externs.b[z];
ref.b[z] = 0;
}
+
+ // cal.b is the current approximation of what's live across the call.
+ // Every bit in cal.b is a single stack word. For each such word,
+ // find all the other tracked stack words in the same Go variable
+ // (struct/slice/string/interface) and mark them live too.
+ // This is necessary because the liveness analysis for the garbage
+ // collector works at variable granularity, not at word granularity.
+ // It is fundamental for slice/string/interface: the garbage collector
+ // needs the whole value, not just some of the words, in order to
+ // interpret the other bits correctly. Specifically, slice needs a consistent
+ // ptr and cap, string needs a consistent ptr and len, and interface
+ // needs a consistent type word and data word.
+ for(z=0; z<BITS; z++) {
+ if(cal.b[z] == 0)
+ continue;
+ for(i=0; i<32; i++) {
+ if(z*32+i >= nvar || ((cal.b[z]>>i)&1) == 0)
+ continue;
+ v = var+z*32+i;
+ if(v->node->opt == nil) // v represents fixed register, not Go variable
+ continue;
+
+ // v->node->opt is the head of a linked list of Vars
+ // corresponding to tracked words from the Go variable v->node.
+ // Walk the list and set all the bits.
+ // For a large struct this could end up being quadratic:
+ // after the first setting, the outer loop (for z, i) would see a 1 bit
+ // for all of the remaining words in the struct, and for each such
+ // word would go through and turn on all the bits again.
+ // To avoid the quadratic behavior, we only turn on the bits if
+ // v is the head of the list or if the head's bit is not yet turned on.
+ // This will set the bits at most twice, keeping the overall loop linear.
+ v1 = v->node->opt;
+ j = v1 - var;
+ if(v == v1 || ((cal.b[j/32]>>(j&31))&1) == 0) {
+ for(; v1 != nil; v1 = v1->nextinnode) {
+ j = v1 - var;
+ cal.b[j/32] |= 1<<(j&31);
+ }
+ }
+ }
+ }
break;
case ATEXT: