diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/cmd/5g/gobj.c | 2 | ||||
-rw-r--r-- | src/cmd/5g/gsubr.c | 3 | ||||
-rw-r--r-- | src/cmd/5g/opt.h | 14 | ||||
-rw-r--r-- | src/cmd/5g/reg.c | 50 | ||||
-rw-r--r-- | src/cmd/6g/gobj.c | 2 | ||||
-rw-r--r-- | src/cmd/6g/opt.h | 14 | ||||
-rw-r--r-- | src/cmd/6g/reg.c | 97 | ||||
-rw-r--r-- | src/cmd/8g/gobj.c | 2 | ||||
-rw-r--r-- | src/cmd/8g/opt.h | 14 | ||||
-rw-r--r-- | src/cmd/8g/reg.c | 92 | ||||
-rw-r--r-- | src/cmd/9g/ggen.c | 20 | ||||
-rw-r--r-- | src/cmd/9g/gobj.c | 2 | ||||
-rw-r--r-- | src/cmd/9g/gsubr.c | 26 | ||||
-rw-r--r-- | src/cmd/9g/opt.h | 49 | ||||
-rw-r--r-- | src/cmd/9g/peep.c | 4 | ||||
-rw-r--r-- | src/cmd/9g/prog.c | 22 | ||||
-rw-r--r-- | src/cmd/9g/reg.c | 1212 | ||||
-rw-r--r-- | src/cmd/9l/9.out.h | 4 | ||||
-rw-r--r-- | src/cmd/gc/bits.c | 34 | ||||
-rw-r--r-- | src/cmd/gc/go.h | 12 | ||||
-rw-r--r-- | src/cmd/objdump/objdump_test.go | 8 | ||||
-rw-r--r-- | src/liblink/list9.c | 11 | ||||
-rw-r--r-- | src/reflect/asm_power64x.s | 7 | ||||
-rw-r--r-- | src/runtime/asm_power64x.s | 55 | ||||
-rw-r--r-- | src/runtime/gcinfo_test.go | 8 | ||||
-rw-r--r-- | src/runtime/mgc0.c | 1 | ||||
-rw-r--r-- | src/runtime/panic.c | 2 | ||||
-rw-r--r-- | src/runtime/runtime.c | 6 | ||||
-rw-r--r-- | src/runtime/signal_power64x.c | 2 |
29 files changed, 1467 insertions, 308 deletions
diff --git a/src/cmd/5g/gobj.c b/src/cmd/5g/gobj.c index 5e988878f..65f731685 100644 --- a/src/cmd/5g/gobj.c +++ b/src/cmd/5g/gobj.c @@ -86,7 +86,7 @@ datagostring(Strlit *sval, Addr *a) sym = stringsym(sval->s, sval->len); a->type = D_OREG; a->name = D_EXTERN; - a->etype = TINT32; + a->etype = TSTRING; a->offset = 0; // header a->reg = NREG; a->sym = linksym(sym); diff --git a/src/cmd/5g/gsubr.c b/src/cmd/5g/gsubr.c index 06e274e14..f09197963 100644 --- a/src/cmd/5g/gsubr.c +++ b/src/cmd/5g/gsubr.c @@ -1353,9 +1353,10 @@ naddr(Node *n, Addr *a, int canemitcode) case OITAB: // itable of interface value naddr(n->left, a, canemitcode); - a->etype = TINT32; + a->etype = simtype[tptr]; if(a->type == D_CONST && a->offset == 0) break; // len(nil) + a->width = widthptr; break; case OSPTR: diff --git a/src/cmd/5g/opt.h b/src/cmd/5g/opt.h index 1946c1d33..5016d1cc8 100644 --- a/src/cmd/5g/opt.h +++ b/src/cmd/5g/opt.h @@ -63,8 +63,8 @@ enum uint32 BLOAD(Reg*); uint32 BSTORE(Reg*); -uint32 LOAD(Reg*); -uint32 STORE(Reg*); +uint64 LOAD(Reg*); +uint64 STORE(Reg*); */ // A Reg is a wrapper around a single Prog (one instruction) that holds @@ -145,7 +145,7 @@ void synch(Reg*, Bits); uint32 allreg(uint32, Rgn*); void paint1(Reg*, int); uint32 paint2(Reg*, int); -void paint3(Reg*, int, int32, int); +void paint3(Reg*, int, uint32, int); void addreg(Adr*, int); void dumpit(char *str, Flow *r0, int); @@ -156,10 +156,10 @@ void peep(Prog*); void excise(Flow*); int copyu(Prog*, Adr*, Adr*); -int32 RtoB(int); -int32 FtoB(int); -int BtoR(int32); -int BtoF(int32); +uint32 RtoB(int); +uint32 FtoB(int); +int BtoR(uint32); +int BtoF(uint32); /* * prog.c diff --git a/src/cmd/5g/reg.c b/src/cmd/5g/reg.c index 27d9d3e8b..441792873 100644 --- a/src/cmd/5g/reg.c +++ b/src/cmd/5g/reg.c @@ -35,7 +35,7 @@ #include "opt.h" #define NREGVAR 32 -#define REGBITS ((uint32)0xffffffff) +#define REGBITS ((uint64)0xffffffffull) /*c2go enum { NREGVAR = 32, REGBITS = 0xffffffff, @@ -86,7 +86,7 @@ setaddrs(Bits bit) i = bnum(bit); node = var[i].node; n = var[i].name; - bit.b[i/32] &= ~(1L<<(i%32)); + biclr(&bit, i); // disable all pieces of that variable for(i=0; i<nvar; i++) { @@ -393,7 +393,7 @@ loop2: for(z=0; z<BITS; z++) bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) & ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]); - if(bany(&bit) & !r->f.refset) { + if(bany(&bit) && !r->f.refset) { // should never happen - all variables are preset if(debug['w']) print("%L: used and not set: %Q\n", r->f.prog->lineno, bit); @@ -425,7 +425,7 @@ loop2: if(debug['R'] > 1) print("\n"); paint1(r, i); - bit.b[i/32] &= ~(1L<<(i%32)); + biclr(&bit, i); if(change <= 0) { if(debug['R']) print("%L $%d: %Q\n", @@ -570,7 +570,7 @@ walkvardef(Node *n, Reg *r, int active) break; for(v=n->opt; v!=nil; v=v->nextinnode) { bn = v - var; - r1->act.b[bn/32] |= 1L << (bn%32); + biset(&r1->act, bn); } if(r1->f.prog->as == ABL) break; @@ -606,7 +606,7 @@ addsplits(void) ~(r->calahead.b[z] & addrs.b[z]); while(bany(&bit)) { i = bnum(bit); - bit.b[i/32] &= ~(1L << (i%32)); + biclr(&bit, i); } } } @@ -972,10 +972,10 @@ prop(Reg *r, Bits ref, Bits cal) for(z=0; z<BITS; z++) { if(cal.b[z] == 0) continue; - for(i=0; i<32; i++) { - if(z*32+i >= nvar || ((cal.b[z]>>i)&1) == 0) + for(i=0; i<64; i++) { + if(z*64+i >= nvar || ((cal.b[z]>>i)&1) == 0) continue; - v = var+z*32+i; + v = var+z*64+i; if(v->node->opt == nil) // v represents fixed register, not Go variable continue; @@ -991,10 +991,10 @@ prop(Reg *r, Bits ref, Bits cal) // This will set the bits at most twice, keeping the overall loop linear. v1 = v->node->opt; j = v1 - var; - if(v == v1 || ((cal.b[j/32]>>(j&31))&1) == 0) { + if(v == v1 || !btest(&cal, j)) { for(; v1 != nil; v1 = v1->nextinnode) { j = v1 - var; - cal.b[j/32] |= 1<<(j&31); + biset(&cal, j); } } } @@ -1115,10 +1115,10 @@ paint1(Reg *r, int bn) Reg *r1; Prog *p; int z; - uint32 bb; + uint64 bb; - z = bn/32; - bb = 1L<<(bn%32); + z = bn/64; + bb = 1LL<<(bn%64); if(r->act.b[z] & bb) return; for(;;) { @@ -1193,10 +1193,10 @@ paint2(Reg *r, int bn) { Reg *r1; int z; - uint32 bb, vreg; + uint64 bb, vreg; - z = bn/32; - bb = 1L << (bn%32); + z = bn/64; + bb = 1LL << (bn%64); vreg = regbits; if(!(r->act.b[z] & bb)) return vreg; @@ -1240,15 +1240,15 @@ paint2(Reg *r, int bn) } void -paint3(Reg *r, int bn, int32 rb, int rn) +paint3(Reg *r, int bn, uint32 rb, int rn) { Reg *r1; Prog *p; int z; - uint32 bb; + uint64 bb; - z = bn/32; - bb = 1L << (bn%32); + z = bn/64; + bb = 1LL << (bn%64); if(r->act.b[z] & bb) return; for(;;) { @@ -1333,7 +1333,7 @@ addreg(Adr *a, int rn) * 10 R10 * 12 R12 */ -int32 +uint32 RtoB(int r) { if(r >= REGTMP-2 && r != 12) // excluded R9 and R10 for m and g, but not R12 @@ -1342,7 +1342,7 @@ RtoB(int r) } int -BtoR(int32 b) +BtoR(uint32 b) { b &= 0x11fcL; // excluded R9 and R10 for m and g, but not R12 if(b == 0) @@ -1357,7 +1357,7 @@ BtoR(int32 b) * ... ... * 31 F15 */ -int32 +uint32 FtoB(int f) { @@ -1367,7 +1367,7 @@ FtoB(int f) } int -BtoF(int32 b) +BtoF(uint32 b) { b &= 0xfffc0000L; diff --git a/src/cmd/6g/gobj.c b/src/cmd/6g/gobj.c index 04e837b13..dbb4ff62c 100644 --- a/src/cmd/6g/gobj.c +++ b/src/cmd/6g/gobj.c @@ -81,7 +81,7 @@ datagostring(Strlit *sval, Addr *a) a->sym = linksym(sym); a->node = sym->def; a->offset = 0; // header - a->etype = TINT32; + a->etype = TSTRING; } void diff --git a/src/cmd/6g/opt.h b/src/cmd/6g/opt.h index dbd039d89..4c9bb89fc 100644 --- a/src/cmd/6g/opt.h +++ b/src/cmd/6g/opt.h @@ -63,8 +63,8 @@ enum uint32 BLOAD(Reg*); uint32 BSTORE(Reg*); -uint32 LOAD(Reg*); -uint32 STORE(Reg*); +uint64 LOAD(Reg*); +uint64 STORE(Reg*); */ // A Reg is a wrapper around a single Prog (one instruction) that holds @@ -141,7 +141,7 @@ void synch(Reg*, Bits); uint32 allreg(uint32, Rgn*); void paint1(Reg*, int); uint32 paint2(Reg*, int); -void paint3(Reg*, int, int32, int); +void paint3(Reg*, int, uint32, int); void addreg(Adr*, int); void dumpone(Flow*, int); void dumpit(char*, Flow*, int); @@ -153,10 +153,10 @@ void peep(Prog*); void excise(Flow*); int copyu(Prog*, Adr*, Adr*); -int32 RtoB(int); -int32 FtoB(int); -int BtoR(int32); -int BtoF(int32); +uint32 RtoB(int); +uint32 FtoB(int); +int BtoR(uint32); +int BtoF(uint32); /* * prog.c diff --git a/src/cmd/6g/reg.c b/src/cmd/6g/reg.c index 1f757e197..4ce2f4db0 100644 --- a/src/cmd/6g/reg.c +++ b/src/cmd/6g/reg.c @@ -34,7 +34,7 @@ #include "opt.h" #define NREGVAR 32 /* 16 general + 16 floating */ -#define REGBITS ((uint32)0xffffffff) +#define REGBITS ((uint64)0xffffffffull) /*c2go enum { NREGVAR = 32, REGBITS = 0xffffffff, @@ -71,7 +71,7 @@ setaddrs(Bits bit) i = bnum(bit); node = var[i].node; n = var[i].name; - bit.b[i/32] &= ~(1L<<(i%32)); + biclr(&bit, i); // disable all pieces of that variable for(i=0; i<nvar; i++) { @@ -364,7 +364,7 @@ loop2: rgp->varno = i; change = 0; paint1(r, i); - bit.b[i/32] &= ~(1L<<(i%32)); + biclr(&bit, i); if(change <= 0) continue; rgp->cost = change; @@ -477,7 +477,7 @@ walkvardef(Node *n, Reg *r, int active) break; for(v=n->opt; v!=nil; v=v->nextinnode) { bn = v - var; - r1->act.b[bn/32] |= 1L << (bn%32); + biset(&r1->act, bn); } if(r1->f.prog->as == ACALL) break; @@ -621,6 +621,9 @@ mkvar(Reg *r, Adr *a) if(r != R) r->use1.b[0] |= doregbits(a->index); + if(t >= D_INDIR && t < 2*D_INDIR) + goto none; + switch(t) { default: regu = doregbits(t); @@ -822,10 +825,10 @@ prop(Reg *r, Bits ref, Bits cal) for(z=0; z<BITS; z++) { if(cal.b[z] == 0) continue; - for(i=0; i<32; i++) { - if(z*32+i >= nvar || ((cal.b[z]>>i)&1) == 0) + for(i=0; i<64; i++) { + if(z*64+i >= nvar || ((cal.b[z]>>i)&1) == 0) continue; - v = var+z*32+i; + v = var+z*64+i; if(v->node->opt == nil) // v represents fixed register, not Go variable continue; @@ -841,10 +844,10 @@ prop(Reg *r, Bits ref, Bits cal) // This will set the bits at most twice, keeping the overall loop linear. v1 = v->node->opt; j = v1 - var; - if(v == v1 || ((cal.b[j/32]>>(j&31))&1) == 0) { + if(v == v1 || !btest(&cal, j)) { for(; v1 != nil; v1 = v1->nextinnode) { j = v1 - var; - cal.b[j/32] |= 1UL<<(j&31); + biset(&cal, j); } } } @@ -959,10 +962,10 @@ paint1(Reg *r, int bn) { Reg *r1; int z; - uint32 bb; + uint64 bb; - z = bn/32; - bb = 1L<<(bn%32); + z = bn/64; + bb = 1LL<<(bn%64); if(r->act.b[z] & bb) return; for(;;) { @@ -1017,54 +1020,14 @@ paint1(Reg *r, int bn) } uint32 -regset(Reg *r, uint32 bb) -{ - uint32 b, set; - Adr v; - int c; - - set = 0; - v = zprog.from; - while(b = bb & ~(bb-1)) { - v.type = b & 0xFFFF? BtoR(b): BtoF(b); - if(v.type == 0) - fatal("zero v.type for %#ux", b); - c = copyu(r->f.prog, &v, nil); - if(c == 3) - set |= b; - bb &= ~b; - } - return set; -} - -uint32 -reguse(Reg *r, uint32 bb) -{ - uint32 b, set; - Adr v; - int c; - - set = 0; - v = zprog.from; - while(b = bb & ~(bb-1)) { - v.type = b & 0xFFFF? BtoR(b): BtoF(b); - c = copyu(r->f.prog, &v, nil); - if(c == 1 || c == 2 || c == 4) - set |= b; - bb &= ~b; - } - return set; -} - -uint32 paint2(Reg *r, int bn) { Reg *r1; int z; - uint32 bb, vreg, x; + uint64 bb, vreg; - z = bn/32; - bb = 1L << (bn%32); + z = bn/64; + bb = 1LL << (bn%64); vreg = regbits; if(!(r->act.b[z] & bb)) return vreg; @@ -1105,27 +1068,19 @@ paint2(Reg *r, int bn) break; } - bb = vreg; - for(; r; r=(Reg*)r->f.s1) { - x = r->regu & ~bb; - if(x) { - vreg |= reguse(r, x); - bb |= regset(r, x); - } - } return vreg; } void -paint3(Reg *r, int bn, int32 rb, int rn) +paint3(Reg *r, int bn, uint32 rb, int rn) { Reg *r1; Prog *p; int z; - uint32 bb; + uint64 bb; - z = bn/32; - bb = 1L << (bn%32); + z = bn/64; + bb = 1LL << (bn%64); if(r->act.b[z] & bb) return; for(;;) { @@ -1198,7 +1153,7 @@ addreg(Adr *a, int rn) ostats.ncvtreg++; } -int32 +uint32 RtoB(int r) { @@ -1208,7 +1163,7 @@ RtoB(int r) } int -BtoR(int32 b) +BtoR(uint32 b) { b &= 0xffffL; if(nacl) @@ -1224,7 +1179,7 @@ BtoR(int32 b) * ... * 31 X15 */ -int32 +uint32 FtoB(int f) { if(f < D_X0 || f > D_X15) @@ -1233,7 +1188,7 @@ FtoB(int f) } int -BtoF(int32 b) +BtoF(uint32 b) { b &= 0xFFFF0000L; diff --git a/src/cmd/8g/gobj.c b/src/cmd/8g/gobj.c index fa0605e6c..af287f702 100644 --- a/src/cmd/8g/gobj.c +++ b/src/cmd/8g/gobj.c @@ -81,7 +81,7 @@ datagostring(Strlit *sval, Addr *a) a->sym = linksym(sym); a->node = sym->def; a->offset = 0; // header - a->etype = TINT32; + a->etype = TSTRING; } void diff --git a/src/cmd/8g/opt.h b/src/cmd/8g/opt.h index 09f58c40a..0e2d165b1 100644 --- a/src/cmd/8g/opt.h +++ b/src/cmd/8g/opt.h @@ -63,8 +63,8 @@ enum uint32 BLOAD(Reg*); uint32 BSTORE(Reg*); -uint32 LOAD(Reg*); -uint32 STORE(Reg*); +uint64 LOAD(Reg*); +uint64 STORE(Reg*); */ // A Reg is a wrapper around a single Prog (one instruction) that holds @@ -159,7 +159,7 @@ void synch(Reg*, Bits); uint32 allreg(uint32, Rgn*); void paint1(Reg*, int); uint32 paint2(Reg*, int); -void paint3(Reg*, int, int32, int); +void paint3(Reg*, int, uint32, int); void addreg(Adr*, int); void dumpone(Flow*, int); void dumpit(char*, Flow*, int); @@ -171,10 +171,10 @@ void peep(Prog*); void excise(Flow*); int copyu(Prog*, Adr*, Adr*); -int32 RtoB(int); -int32 FtoB(int); -int BtoR(int32); -int BtoF(int32); +uint32 RtoB(int); +uint32 FtoB(int); +int BtoR(uint32); +int BtoF(uint32); /* * prog.c diff --git a/src/cmd/8g/reg.c b/src/cmd/8g/reg.c index 302b273a1..79d60bed5 100644 --- a/src/cmd/8g/reg.c +++ b/src/cmd/8g/reg.c @@ -34,7 +34,7 @@ #include "opt.h" #define NREGVAR 16 /* 8 integer + 8 floating */ -#define REGBITS ((uint32)0xffff) +#define REGBITS ((uint64)0xffffull) /*c2go enum { NREGVAR = 16, REGBITS = (1<<NREGVAR) - 1, @@ -71,7 +71,7 @@ setaddrs(Bits bit) i = bnum(bit); node = var[i].node; n = var[i].name; - bit.b[i/32] &= ~(1L<<(i%32)); + biclr(&bit, i); // disable all pieces of that variable for(i=0; i<nvar; i++) { @@ -336,7 +336,7 @@ loop2: rgp->varno = i; change = 0; paint1(r, i); - bit.b[i/32] &= ~(1L<<(i%32)); + biclr(&bit, i); if(change <= 0) continue; rgp->cost = change; @@ -446,7 +446,7 @@ walkvardef(Node *n, Reg *r, int active) break; for(v=n->opt; v!=nil; v=v->nextinnode) { bn = v - var; - r1->act.b[bn/32] |= 1L << (bn%32); + biset(&r1->act, bn); } if(r1->f.prog->as == ACALL) break; @@ -788,10 +788,10 @@ prop(Reg *r, Bits ref, Bits cal) for(z=0; z<BITS; z++) { if(cal.b[z] == 0) continue; - for(i=0; i<32; i++) { - if(z*32+i >= nvar || ((cal.b[z]>>i)&1) == 0) + for(i=0; i<64; i++) { + if(z*64+i >= nvar || ((cal.b[z]>>i)&1) == 0) continue; - v = var+z*32+i; + v = var+z*64+i; if(v->node->opt == nil) // v represents fixed register, not Go variable continue; @@ -807,10 +807,10 @@ prop(Reg *r, Bits ref, Bits cal) // This will set the bits at most twice, keeping the overall loop linear. v1 = v->node->opt; j = v1 - var; - if(v == v1 || ((cal.b[j/32]>>(j&31))&1) == 0) { + if(v == v1 || !btest(&cal, j)) { for(; v1 != nil; v1 = v1->nextinnode) { j = v1 - var; - cal.b[j/32] |= 1<<(j&31); + biset(&cal, j); } } } @@ -926,10 +926,10 @@ paint1(Reg *r, int bn) Reg *r1; Prog *p; int z; - uint32 bb; + uint64 bb; - z = bn/32; - bb = 1L<<(bn%32); + z = bn/64; + bb = 1LL<<(bn%64); if(r->act.b[z] & bb) return; for(;;) { @@ -996,52 +996,14 @@ paint1(Reg *r, int bn) } uint32 -regset(Reg *r, uint32 bb) -{ - uint32 b, set; - Adr v; - int c; - - set = 0; - v = zprog.from; - while(b = bb & ~(bb-1)) { - v.type = b & 0xFF ? BtoR(b): BtoF(b); - c = copyu(r->f.prog, &v, nil); - if(c == 3) - set |= b; - bb &= ~b; - } - return set; -} - -uint32 -reguse(Reg *r, uint32 bb) -{ - uint32 b, set; - Adr v; - int c; - - set = 0; - v = zprog.from; - while(b = bb & ~(bb-1)) { - v.type = b & 0xFF ? BtoR(b): BtoF(b); - c = copyu(r->f.prog, &v, nil); - if(c == 1 || c == 2 || c == 4) - set |= b; - bb &= ~b; - } - return set; -} - -uint32 paint2(Reg *r, int bn) { Reg *r1; int z; - uint32 bb, vreg, x; + uint64 bb, vreg; - z = bn/32; - bb = 1L << (bn%32); + z = bn/64; + bb = 1LL << (bn%64); vreg = regbits; if(!(r->act.b[z] & bb)) return vreg; @@ -1082,27 +1044,19 @@ paint2(Reg *r, int bn) break; } - bb = vreg; - for(; r; r=(Reg*)r->f.s1) { - x = r->regu & ~bb; - if(x) { - vreg |= reguse(r, x); - bb |= regset(r, x); - } - } return vreg; } void -paint3(Reg *r, int bn, int32 rb, int rn) +paint3(Reg *r, int bn, uint32 rb, int rn) { Reg *r1; Prog *p; int z; - uint32 bb; + uint64 bb; - z = bn/32; - bb = 1L << (bn%32); + z = bn/64; + bb = 1LL << (bn%64); if(r->act.b[z] & bb) return; for(;;) { @@ -1175,7 +1129,7 @@ addreg(Adr *a, int rn) ostats.ncvtreg++; } -int32 +uint32 RtoB(int r) { @@ -1185,7 +1139,7 @@ RtoB(int r) } int -BtoR(int32 b) +BtoR(uint32 b) { b &= 0xffL; @@ -1194,7 +1148,7 @@ BtoR(int32 b) return bitno(b) + D_AX; } -int32 +uint32 FtoB(int f) { if(f < D_X0 || f > D_X7) @@ -1203,7 +1157,7 @@ FtoB(int f) } int -BtoF(int32 b) +BtoF(uint32 b) { b &= 0xFF00L; if(b == 0) diff --git a/src/cmd/9g/ggen.c b/src/cmd/9g/ggen.c index c41d8eb41..7d9cf5050 100644 --- a/src/cmd/9g/ggen.c +++ b/src/cmd/9g/ggen.c @@ -900,7 +900,7 @@ ret: void clearfat(Node *nl) { - uint64 w, c, q, t; + uint64 w, c, q, t, boff; Node dst, end, r0, *f; Prog *p, *pl; @@ -944,6 +944,8 @@ clearfat(Node *nl) patch(gbranch(ABNE, T, 0), pl); regfree(&end); + // The loop leaves R3 on the last zeroed dword + boff = 8; } else if(q >= 4) { p = gins(ASUB, N, &dst); p->from.type = D_CONST; @@ -953,17 +955,21 @@ clearfat(Node *nl) afunclit(&p->to, f); // 4 and 128 = magic constants: see ../../runtime/asm_power64x.s p->to.offset = 4*(128-q); - } else - for(t = 0; t < q; t++) { - p = gins(AMOVD, &r0, &dst); - p->to.type = D_OREG; - p->to.offset = 8*t; + // duffzero leaves R3 on the last zeroed dword + boff = 8; + } else { + for(t = 0; t < q; t++) { + p = gins(AMOVD, &r0, &dst); + p->to.type = D_OREG; + p->to.offset = 8*t; + } + boff = 8*q; } for(t = 0; t < c; t++) { p = gins(AMOVB, &r0, &dst); p->to.type = D_OREG; - p->to.offset = t; + p->to.offset = t+boff; } reg[REGRT1]--; } diff --git a/src/cmd/9g/gobj.c b/src/cmd/9g/gobj.c index fdd7606bc..3da55878a 100644 --- a/src/cmd/9g/gobj.c +++ b/src/cmd/9g/gobj.c @@ -89,7 +89,7 @@ datagostring(Strlit *sval, Addr *a) a->reg = NREG; a->node = sym->def; a->offset = 0; // header - a->etype = TINT32; + a->etype = TSTRING; } void diff --git a/src/cmd/9g/gsubr.c b/src/cmd/9g/gsubr.c index d8b62b1da..e5cd5ed4b 100644 --- a/src/cmd/9g/gsubr.c +++ b/src/cmd/9g/gsubr.c @@ -1001,10 +1001,13 @@ hard: Prog* gins(int as, Node *f, Node *t) { - //int32 w; + int32 w; Prog *p; Addr af, at; + // TODO(austin): Add self-move test like in 6g (but be careful + // of truncation moves) + memset(&af, 0, sizeof af); memset(&at, 0, sizeof at); if(f != N) @@ -1021,9 +1024,6 @@ gins(int as, Node *f, Node *t) if(debug['g']) print("%P\n", p); - // TODO(minux): enable these. - // right now it fails on MOVD $type."".TypeAssertionError(SB) [width=1], R7 [width=8] - /* w = 0; switch(as) { case AMOVB: @@ -1046,15 +1046,16 @@ gins(int as, Node *f, Node *t) break; case AMOVD: case AMOVDU: + if(af.type == D_CONST) + break; w = 8; break; } - if(w != 0 && ((f != N && af.width < w) || (t != N && at.width > w))) { + if(w != 0 && ((f != N && af.width < w) || (t != N && at.type != D_REG && at.width > w))) { dump("f", f); dump("t", t); fatal("bad width: %P (%d, %d)\n", p, af.width, at.width); } - */ return p; } @@ -1116,12 +1117,9 @@ naddr(Node *n, Addr *a, int canemitcode) case ONAME: a->etype = 0; - a->width = 0; a->reg = NREG; - if(n->type != T) { + if(n->type != T) a->etype = simtype[n->type->etype]; - a->width = n->type->width; - } a->offset = n->xoffset; s = n->sym; a->node = n->orig; @@ -1242,15 +1240,16 @@ naddr(Node *n, Addr *a, int canemitcode) naddr(n->left, a, canemitcode); a->etype = simtype[tptr]; if(a->type == D_CONST && a->offset == 0) - break; // len(nil) + break; // itab(nil) + a->width = widthptr; break; case OSPTR: // pointer in a string or slice naddr(n->left, a, canemitcode); + a->etype = simtype[tptr]; if(a->type == D_CONST && a->offset == 0) break; // ptr(nil) - a->etype = simtype[tptr]; a->offset += Array_array; a->width = widthptr; break; @@ -1262,6 +1261,7 @@ naddr(Node *n, Addr *a, int canemitcode) if(a->type == D_CONST && a->offset == 0) break; // len(nil) a->offset += Array_nel; + a->width = widthint; break; case OCAP: @@ -1271,6 +1271,7 @@ naddr(Node *n, Addr *a, int canemitcode) if(a->type == D_CONST && a->offset == 0) break; // cap(nil) a->offset += Array_cap; + a->width = widthint; break; case OADDR: @@ -1288,6 +1289,7 @@ naddr(Node *n, Addr *a, int canemitcode) default: fatal("naddr: OADDR %d\n", a->type); } + break; } } diff --git a/src/cmd/9g/opt.h b/src/cmd/9g/opt.h index d3cbcb957..7f15b5a69 100644 --- a/src/cmd/9g/opt.h +++ b/src/cmd/9g/opt.h @@ -70,24 +70,40 @@ struct Reg { Flow f; - Bits set; // variables written by this instruction. - Bits use1; // variables read by prog->from. - Bits use2; // variables read by prog->to. + Bits set; // regopt variables written by this instruction. + Bits use1; // regopt variables read by prog->from. + Bits use2; // regopt variables read by prog->to. + // refahead/refbehind are the regopt variables whose current + // value may be used in the following/preceding instructions + // up to a CALL (or the value is clobbered). Bits refbehind; Bits refahead; + // calahead/calbehind are similar, but for variables in + // instructions that are reachable after hitting at least one + // CALL. Bits calbehind; Bits calahead; Bits regdiff; Bits act; - int32 regu; // register used bitmap + uint64 regu; // register used bitmap }; #define R ((Reg*)0) /*c2go extern Reg *R; */ #define NRGN 600 /*c2go enum { NRGN = 600 }; */ + +// A Rgn represents a single regopt variable over a region of code +// where a register could potentially be dedicated to that variable. +// The code encompassed by a Rgn is defined by the flow graph, +// starting at enter, flood-filling forward while varno is refahead +// and backward while varno is refbehind, and following branches. A +// single variable may be represented by multiple disjoint Rgns and +// each Rgn may choose a different register for that variable. +// Registers are allocated to regions greedily in order of descending +// cost. struct Rgn { Reg* enter; @@ -104,7 +120,7 @@ EXTERN Rgn* rgp; EXTERN int nregion; EXTERN int nvar; EXTERN int32 regbits; -EXTERN int32 exregbits; +EXTERN int32 exregbits; // TODO(austin) not used; remove EXTERN Bits externs; EXTERN Bits params; EXTERN Bits consts; @@ -118,10 +134,8 @@ EXTERN struct { int32 ncvtreg; int32 nspill; - int32 nreload; int32 ndelmov; int32 nvar; - int32 naddr; } ostats; /* @@ -133,10 +147,10 @@ void addmove(Reg*, int, int, int); Bits mkvar(Reg*, Adr*); void prop(Reg*, Bits, Bits); void synch(Reg*, Bits); -uint32 allreg(uint32, Rgn*); +uint64 allreg(uint64, Rgn*); void paint1(Reg*, int); -uint32 paint2(Reg*, int); -void paint3(Reg*, int, int32, int); +uint64 paint2(Reg*, int, int); +void paint3(Reg*, int, uint64, int); void addreg(Adr*, int); void dumpone(Flow*, int); void dumpit(char*, Flow*, int); @@ -160,8 +174,8 @@ typedef struct ProgInfo ProgInfo; struct ProgInfo { uint32 flags; // the bits below - uint64 reguse; // required registers used by this instruction - uint64 regset; // required registers set by this instruction + uint64 reguse; // registers implicitly used by this instruction + uint64 regset; // registers implicitly set by this instruction uint64 regindex; // registers used by addressing mode }; @@ -182,20 +196,21 @@ enum SizeF = 1<<7, // float aka float32 SizeD = 1<<8, // double aka float64 - // Left side: address taken, read, write. + // Left side (Prog.from): address taken, read, write. LeftAddr = 1<<9, LeftRead = 1<<10, LeftWrite = 1<<11, - - // Register in middle; never written. + + // Register in middle (Prog.reg); only ever read. RegRead = 1<<12, CanRegRead = 1<<13, - - // Right side: address taken, read, write. + + // Right side (Prog.to): address taken, read, write. RightAddr = 1<<14, RightRead = 1<<15, RightWrite = 1<<16, + // Instruction updates whichever of from/to is type D_OREG PostInc = 1<<17, // Instruction kinds diff --git a/src/cmd/9g/peep.c b/src/cmd/9g/peep.c index 5721d7b04..ec314d633 100644 --- a/src/cmd/9g/peep.c +++ b/src/cmd/9g/peep.c @@ -44,13 +44,15 @@ peep(Prog *p) void excise(Flow *r) { - Prog *p; + Prog *p, *l; p = r->prog; if(debug['P'] && debug['v']) print("%P ===delete===\n", p); + l = p->link; *p = zprog; p->as = ANOP; + p->link = l; ostats.ndelmov++; } diff --git a/src/cmd/9g/prog.c b/src/cmd/9g/prog.c index 0a51a533a..e3e50f28a 100644 --- a/src/cmd/9g/prog.c +++ b/src/cmd/9g/prog.c @@ -96,11 +96,8 @@ static ProgInfo progtable[ALAST] = { [ABGT]= {Cjmp}, [ABLE]= {Cjmp}, [ARETURN]= {Break}, - // In addtion, duffzero reads R0,R2 and writes R2. This fact must be - // encoded in peep.c (TODO) + [ADUFFZERO]= {Call}, - // In addtion, duffcopy reads R0,R2,R3 and writes R2,R3. This fact must be - // encoded in peep.c (TODO) [ADUFFCOPY]= {Call}, }; @@ -118,14 +115,14 @@ proginfo(ProgInfo *info, Prog *p) info->flags |= /*CanRegRead |*/ RightRead; } - if(p->from.type == D_OREG && p->from.reg != NREG) { - info->reguse |= RtoB(p->from.reg); + if((p->from.type == D_OREG || p->from.type == D_CONST) && p->from.reg != NREG) { + info->regindex |= RtoB(p->from.reg); if(info->flags & PostInc) { info->regset |= RtoB(p->from.reg); } } - if(p->to.type == D_OREG && p->to.reg != NREG) { - info->reguse |= RtoB(p->to.reg); + if((p->to.type == D_OREG || p->to.type == D_CONST) && p->to.reg != NREG) { + info->regindex |= RtoB(p->to.reg); if(info->flags & PostInc) { info->regset |= RtoB(p->to.reg); } @@ -135,4 +132,13 @@ proginfo(ProgInfo *info, Prog *p) info->flags &= ~LeftRead; info->flags |= LeftAddr; } + + if(p->as == ADUFFZERO) { + info->reguse |= RtoB(0) | RtoB(2); + info->regset |= RtoB(2); + } + if(p->as == ADUFFCOPY) { + info->reguse |= RtoB(0) | RtoB(2) | RtoB(3); + info->regset |= RtoB(2) | RtoB(3); + } } diff --git a/src/cmd/9g/reg.c b/src/cmd/9g/reg.c index bbebf3fe0..b911a2399 100644 --- a/src/cmd/9g/reg.c +++ b/src/cmd/9g/reg.c @@ -33,14 +33,1197 @@ #include "gg.h" #include "opt.h" +#define NREGVAR 64 /* 32 general + 32 floating */ +#define REGBITS ((uint64)0xffffffffffffffffull) +/*c2go enum { + NREGVAR = 64, + REGBITS = 0xffffffffffffffff, +}; +*/ + +static Reg* firstr; +static int first = 1; + +int +rcmp(const void *a1, const void *a2) +{ + Rgn *p1, *p2; + int c1, c2; + + p1 = (Rgn*)a1; + p2 = (Rgn*)a2; + c1 = p2->cost; + c2 = p1->cost; + if(c1 -= c2) + return c1; + return p2->varno - p1->varno; +} + +static void +setaddrs(Bits bit) +{ + int i, n; + Var *v; + Node *node; + + while(bany(&bit)) { + // convert each bit to a variable + i = bnum(bit); + node = var[i].node; + n = var[i].name; + biclr(&bit, i); + + // disable all pieces of that variable + for(i=0; i<nvar; i++) { + v = var+i; + if(v->node == node && v->name == n) + v->addr = 2; + } + } +} + +static char* regname[] = { + ".R0", + ".R1", + ".R2", + ".R3", + ".R4", + ".R5", + ".R6", + ".R7", + ".R8", + ".R9", + ".R10", + ".R11", + ".R12", + ".R13", + ".R14", + ".R15", + ".R16", + ".R17", + ".R18", + ".R19", + ".R20", + ".R21", + ".R22", + ".R23", + ".R24", + ".R25", + ".R26", + ".R27", + ".R28", + ".R29", + ".R30", + ".R31", + ".F0", + ".F1", + ".F2", + ".F3", + ".F4", + ".F5", + ".F6", + ".F7", + ".F8", + ".F9", + ".F10", + ".F11", + ".F12", + ".F13", + ".F14", + ".F15", + ".F16", + ".F17", + ".F18", + ".F19", + ".F20", + ".F21", + ".F22", + ".F23", + ".F24", + ".F25", + ".F26", + ".F27", + ".F28", + ".F29", + ".F30", + ".F31", +}; + +static Node* regnodes[NREGVAR]; + +static void walkvardef(Node *n, Reg *r, int active); + void -regopt(Prog *p) +regopt(Prog *firstp) { - USED(p); - // TODO(minux) + Reg *r, *r1; + Prog *p; + Graph *g; + ProgInfo info; + int i, z, active; + uint64 vreg, usedreg; + Bits bit; + + if(first) { + fmtinstall('Q', Qconv); + first = 0; + } + + mergetemp(firstp); + + /* + * control flow is more complicated in generated go code + * than in generated c code. define pseudo-variables for + * registers, so we have complete register usage information. + */ + nvar = NREGVAR; + memset(var, 0, NREGVAR*sizeof var[0]); + for(i=0; i<NREGVAR; i++) { + if(regnodes[i] == N) + regnodes[i] = newname(lookup(regname[i])); + var[i].node = regnodes[i]; + } + + // Exclude registers with fixed functions + regbits = (1<<D_R0)|RtoB(REGSP)|RtoB(REGG); + // Also exclude floating point registers with fixed constants + regbits |= FtoB(D_F0+27)|FtoB(D_F0+28)|FtoB(D_F0+29)|FtoB(D_F0+30)|FtoB(D_F0+31); + externs = zbits; + params = zbits; + consts = zbits; + addrs = zbits; + ivar = zbits; + ovar = zbits; + + /* + * pass 1 + * build aux data structure + * allocate pcs + * find use and set of variables + */ + g = flowstart(firstp, sizeof(Reg)); + if(g == nil) { + for(i=0; i<nvar; i++) + var[i].node->opt = nil; + return; + } + + firstr = (Reg*)g->start; + + for(r = firstr; r != R; r = (Reg*)r->f.link) { + p = r->f.prog; + if(p->as == AVARDEF || p->as == AVARKILL) + continue; + proginfo(&info, p); + + // Avoid making variables for direct-called functions. + if(p->as == ABL && p->to.name == D_EXTERN) + continue; + + // from vs to doesn't matter for registers + r->use1.b[0] |= info.reguse | info.regindex; + r->set.b[0] |= info.regset; + + // Compute used register for from + bit = mkvar(r, &p->from); + if(info.flags & LeftAddr) + setaddrs(bit); + if(info.flags & LeftRead) + for(z=0; z<BITS; z++) + r->use1.b[z] |= bit.b[z]; + + // Compute used register for reg + if(info.flags & RegRead) { + if(p->from.type != D_FREG) + r->use1.b[0] |= RtoB(p->reg); + else + r->use1.b[0] |= FtoB(D_F0+p->reg); + } + + // Currently we never generate three register forms. + // If we do, this will need to change. + if(p->from3.type != D_NONE) + fatal("regopt not implemented for from3"); + + // Compute used register for to + bit = mkvar(r, &p->to); + if(info.flags & RightAddr) + setaddrs(bit); + if(info.flags & RightRead) + for(z=0; z<BITS; z++) + r->use2.b[z] |= bit.b[z]; + if(info.flags & RightWrite) + for(z=0; z<BITS; z++) + r->set.b[z] |= bit.b[z]; + } + + for(i=0; i<nvar; i++) { + Var *v = var+i; + if(v->addr) { + bit = blsh(i); + for(z=0; z<BITS; z++) + addrs.b[z] |= bit.b[z]; + } + + if(debug['R'] && debug['v']) + print("bit=%2d addr=%d et=%-6E w=%-2d s=%N + %lld\n", + i, v->addr, v->etype, v->width, v->node, v->offset); + } + + if(debug['R'] && debug['v']) + dumpit("pass1", &firstr->f, 1); + + /* + * pass 2 + * find looping structure + */ + flowrpo(g); + + if(debug['R'] && debug['v']) + dumpit("pass2", &firstr->f, 1); + + /* + * pass 2.5 + * iterate propagating fat vardef covering forward + * r->act records vars with a VARDEF since the last CALL. + * (r->act will be reused in pass 5 for something else, + * but we'll be done with it by then.) + */ + active = 0; + for(r = firstr; r != R; r = (Reg*)r->f.link) { + r->f.active = 0; + r->act = zbits; + } + for(r = firstr; r != R; r = (Reg*)r->f.link) { + p = r->f.prog; + if(p->as == AVARDEF && isfat(p->to.node->type) && p->to.node->opt != nil) { + active++; + walkvardef(p->to.node, r, active); + } + } + + /* + * pass 3 + * iterate propagating usage + * back until flow graph is complete + */ +loop1: + change = 0; + for(r = firstr; r != R; r = (Reg*)r->f.link) + r->f.active = 0; + for(r = firstr; r != R; r = (Reg*)r->f.link) + if(r->f.prog->as == ARET) + prop(r, zbits, zbits); +loop11: + /* pick up unreachable code */ + i = 0; + for(r = firstr; r != R; r = r1) { + r1 = (Reg*)r->f.link; + if(r1 && r1->f.active && !r->f.active) { + prop(r, zbits, zbits); + i = 1; + } + } + if(i) + goto loop11; + if(change) + goto loop1; + + if(debug['R'] && debug['v']) + dumpit("pass3", &firstr->f, 1); + + /* + * pass 4 + * iterate propagating register/variable synchrony + * forward until graph is complete + */ +loop2: + change = 0; + for(r = firstr; r != R; r = (Reg*)r->f.link) + r->f.active = 0; + synch(firstr, zbits); + if(change) + goto loop2; + + if(debug['R'] && debug['v']) + dumpit("pass4", &firstr->f, 1); + + /* + * pass 4.5 + * move register pseudo-variables into regu. + */ + for(r = firstr; r != R; r = (Reg*)r->f.link) { + r->regu = (r->refbehind.b[0] | r->set.b[0]) & REGBITS; + + r->set.b[0] &= ~REGBITS; + r->use1.b[0] &= ~REGBITS; + r->use2.b[0] &= ~REGBITS; + r->refbehind.b[0] &= ~REGBITS; + r->refahead.b[0] &= ~REGBITS; + r->calbehind.b[0] &= ~REGBITS; + r->calahead.b[0] &= ~REGBITS; + r->regdiff.b[0] &= ~REGBITS; + r->act.b[0] &= ~REGBITS; + } + + if(debug['R'] && debug['v']) + dumpit("pass4.5", &firstr->f, 1); + + /* + * pass 5 + * isolate regions + * calculate costs (paint1) + */ + r = firstr; + if(r) { + for(z=0; z<BITS; z++) + bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) & + ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]); + if(bany(&bit) && !r->f.refset) { + // should never happen - all variables are preset + if(debug['w']) + print("%L: used and not set: %Q\n", r->f.prog->lineno, bit); + r->f.refset = 1; + } + } + for(r = firstr; r != R; r = (Reg*)r->f.link) + r->act = zbits; + rgp = region; + nregion = 0; + for(r = firstr; r != R; r = (Reg*)r->f.link) { + for(z=0; z<BITS; z++) + bit.b[z] = r->set.b[z] & + ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]); + if(bany(&bit) && !r->f.refset) { + if(debug['w']) + print("%L: set and not used: %Q\n", r->f.prog->lineno, bit); + r->f.refset = 1; + excise(&r->f); + } + for(z=0; z<BITS; z++) + bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]); + while(bany(&bit)) { + i = bnum(bit); + rgp->enter = r; + rgp->varno = i; + change = 0; + paint1(r, i); + biclr(&bit, i); + if(change <= 0) + continue; + rgp->cost = change; + nregion++; + if(nregion >= NRGN) { + if(debug['R'] && debug['v']) + print("too many regions\n"); + goto brk; + } + rgp++; + } + } +brk: + qsort(region, nregion, sizeof(region[0]), rcmp); + + if(debug['R'] && debug['v']) + dumpit("pass5", &firstr->f, 1); + + /* + * pass 6 + * determine used registers (paint2) + * replace code (paint3) + */ + rgp = region; + if(debug['R'] && debug['v']) + print("\nregisterizing\n"); + for(i=0; i<nregion; i++) { + if(debug['R'] && debug['v']) + print("region %d: cost %d varno %d enter %d\n", i, rgp->cost, rgp->varno, rgp->enter->f.prog->pc); + bit = blsh(rgp->varno); + usedreg = paint2(rgp->enter, rgp->varno, 0); + vreg = allreg(usedreg, rgp); + if(rgp->regno != 0) { + if(debug['R'] && debug['v']) { + Var *v; + + v = var + rgp->varno; + print("registerize %N+%lld (bit=%2d et=%2E) in %R usedreg=%llx vreg=%llx\n", + v->node, v->offset, rgp->varno, v->etype, rgp->regno, usedreg, vreg); + } + paint3(rgp->enter, rgp->varno, vreg, rgp->regno); + } + rgp++; + } + + /* + * free aux structures. peep allocates new ones. + */ + for(i=0; i<nvar; i++) + var[i].node->opt = nil; + flowend(g); + firstr = R; + + if(debug['R'] && debug['v']) { + // Rebuild flow graph, since we inserted instructions + g = flowstart(firstp, sizeof(Reg)); + firstr = (Reg*)g->start; + dumpit("pass6", &firstr->f, 1); + flowend(g); + firstr = R; + } + + /* + * pass 7 + * peep-hole on basic block + */ + if(!debug['R'] || debug['P']) + peep(firstp); + + /* + * eliminate nops + */ + for(p=firstp; p!=P; p=p->link) { + while(p->link != P && p->link->as == ANOP) + p->link = p->link->link; + if(p->to.type == D_BRANCH) + while(p->to.u.branch != P && p->to.u.branch->as == ANOP) + p->to.u.branch = p->to.u.branch->link; + } + + if(debug['R']) { + if(ostats.ncvtreg || + ostats.nspill || + ostats.ndelmov || + ostats.nvar || + 0) + print("\nstats\n"); + + if(ostats.ncvtreg) + print(" %4d cvtreg\n", ostats.ncvtreg); + if(ostats.nspill) + print(" %4d spill\n", ostats.nspill); + if(ostats.ndelmov) + print(" %4d delmov\n", ostats.ndelmov); + if(ostats.nvar) + print(" %4d var\n", ostats.nvar); + + memset(&ostats, 0, sizeof(ostats)); + } + return; } +static void +walkvardef(Node *n, Reg *r, int active) +{ + Reg *r1, *r2; + int bn; + Var *v; + + for(r1=r; r1!=R; r1=(Reg*)r1->f.s1) { + if(r1->f.active == active) + break; + r1->f.active = active; + if(r1->f.prog->as == AVARKILL && r1->f.prog->to.node == n) + break; + for(v=n->opt; v!=nil; v=v->nextinnode) { + bn = v - var; + biset(&r1->act, bn); + } + if(r1->f.prog->as == ABL) + break; + } + + for(r2=r; r2!=r1; r2=(Reg*)r2->f.s1) + if(r2->f.s2 != nil) + walkvardef(n, (Reg*)r2->f.s2, active); +} + +/* + * add mov b,rn + * just after r + */ +void +addmove(Reg *r, int bn, int rn, int f) +{ + Prog *p, *p1, *p2; + Adr *a; + Var *v; + + p1 = mal(sizeof(*p1)); + *p1 = zprog; + p = r->f.prog; + + // If there's a stack fixup coming (ADD $n,R1 after BL newproc or BL deferproc), + // delay the load until after the fixup. + p2 = p->link; + if(p2 && p2->as == AADD && p2->to.reg == REGSP && p2->to.type == D_REG) + p = p2; + + p1->link = p->link; + p->link = p1; + p1->lineno = p->lineno; + + v = var + bn; + + a = &p1->to; + a->name = v->name; + a->node = v->node; + a->sym = linksym(v->node->sym); + a->offset = v->offset; + a->etype = v->etype; + a->type = D_OREG; + if(a->etype == TARRAY || a->sym == nil) + a->type = D_CONST; + + if(v->addr) + fatal("addmove: shouldn't be doing this %A\n", a); + + switch(v->etype) { + default: + print("What is this %E\n", v->etype); + + case TINT8: + p1->as = AMOVB; + break; + case TBOOL: + case TUINT8: +//print("movbu %E %d %S\n", v->etype, bn, v->sym); + p1->as = AMOVBZ; + break; + case TINT16: + p1->as = AMOVH; + break; + case TUINT16: + p1->as = AMOVHZ; + break; + case TINT32: + p1->as = AMOVW; + break; + case TUINT32: + case TPTR32: + p1->as = AMOVWZ; + break; + case TINT64: + case TUINT64: + case TPTR64: + p1->as = AMOVD; + break; + case TFLOAT32: + p1->as = AFMOVS; + break; + case TFLOAT64: + p1->as = AFMOVD; + break; + } + + p1->from.type = D_REG; + p1->from.reg = rn; + if(rn >= NREG) { + p1->from.type = D_FREG; + p1->from.reg = rn-NREG; + } + if(!f) { + p1->from = *a; + *a = zprog.from; + a->type = D_REG; + a->reg = rn; + if(rn >= NREG) { + a->type = D_FREG; + a->reg = rn-NREG; + } + if(v->etype == TUINT8 || v->etype == TBOOL) + p1->as = AMOVBZ; + if(v->etype == TUINT16) + p1->as = AMOVHZ; + } + if(debug['R']) + print("%P\t.a%P\n", p, p1); + ostats.nspill++; +} + +static int +overlap(int64 o1, int w1, int64 o2, int w2) +{ + int64 t1, t2; + + t1 = o1+w1; + t2 = o2+w2; + + if(!(t1 > o2 && t2 > o1)) + return 0; + + return 1; +} + +Bits +mkvar(Reg *r, Adr *a) +{ + USED(r); + Var *v; + int i, t, n, et, z, flag; + int64 w; + int64 o; + Bits bit; + Node *node; + + // mark registers used + t = a->type; + switch(t) { + default: + print("type %d %d %D\n", t, a->name, a); + goto none; + + case D_NONE: + goto none; + + case D_BRANCH: + case D_CONST: + case D_FCONST: + case D_SCONST: + case D_SPR: + case D_OREG: + break; + + case D_REG: + if(a->reg != NREG) { + bit = zbits; + bit.b[0] = RtoB(a->reg); + return bit; + } + break; + + case D_FREG: + if(a->reg != NREG) { + bit = zbits; + bit.b[0] = FtoB(D_F0+a->reg); + return bit; + } + break; + } + + switch(a->name) { + default: + goto none; + + case D_EXTERN: + case D_STATIC: + case D_AUTO: + case D_PARAM: + n = a->name; + break; + } + + node = a->node; + if(node == N || node->op != ONAME || node->orig == N) + goto none; + node = node->orig; + if(node->orig != node) + fatal("%D: bad node", a); + if(node->sym == S || node->sym->name[0] == '.') + goto none; + et = a->etype; + o = a->offset; + w = a->width; + if(w < 0) + fatal("bad width %lld for %D", w, a); + + flag = 0; + for(i=0; i<nvar; i++) { + v = var+i; + if(v->node == node && v->name == n) { + if(v->offset == o) + if(v->etype == et) + if(v->width == w) + return blsh(i); + + // if they overlap, disable both + if(overlap(v->offset, v->width, o, w)) { + v->addr = 1; + flag = 1; + } + } + } + + switch(et) { + case 0: + case TFUNC: + goto none; + } + + if(nvar >= NVAR) { + if(debug['w'] > 1 && node != N) + fatal("variable not optimized: %#N", node); + + // If we're not tracking a word in a variable, mark the rest as + // having its address taken, so that we keep the whole thing + // live at all calls. otherwise we might optimize away part of + // a variable but not all of it. + for(i=0; i<nvar; i++) { + v = var+i; + if(v->node == node) + v->addr = 1; + } + goto none; + } + + i = nvar; + nvar++; + v = var+i; + v->offset = o; + v->name = n; + v->etype = et; + v->width = w; + v->addr = flag; // funny punning + v->node = node; + + // node->opt is the head of a linked list + // of Vars within the given Node, so that + // we can start at a Var and find all the other + // Vars in the same Go variable. + v->nextinnode = node->opt; + node->opt = v; + + bit = blsh(i); + if(n == D_EXTERN || n == D_STATIC) + for(z=0; z<BITS; z++) + externs.b[z] |= bit.b[z]; + if(n == D_PARAM) + for(z=0; z<BITS; z++) + params.b[z] |= bit.b[z]; + + if(node->class == PPARAM) + for(z=0; z<BITS; z++) + ivar.b[z] |= bit.b[z]; + if(node->class == PPARAMOUT) + for(z=0; z<BITS; z++) + ovar.b[z] |= bit.b[z]; + + // Treat values with their address taken as live at calls, + // because the garbage collector's liveness analysis in ../gc/plive.c does. + // These must be consistent or else we will elide stores and the garbage + // collector will see uninitialized data. + // The typical case where our own analysis is out of sync is when the + // node appears to have its address taken but that code doesn't actually + // get generated and therefore doesn't show up as an address being + // taken when we analyze the instruction stream. + // One instance of this case is when a closure uses the same name as + // an outer variable for one of its own variables declared with :=. + // The parser flags the outer variable as possibly shared, and therefore + // sets addrtaken, even though it ends up not being actually shared. + // If we were better about _ elision, _ = &x would suffice too. + // The broader := in a closure problem is mentioned in a comment in + // closure.c:/^typecheckclosure and dcl.c:/^oldname. + if(node->addrtaken) + v->addr = 1; + + // Disable registerization for globals, because: + // (1) we might panic at any time and we want the recovery code + // to see the latest values (issue 1304). + // (2) we don't know what pointers might point at them and we want + // loads via those pointers to see updated values and vice versa (issue 7995). + // + // Disable registerization for results if using defer, because the deferred func + // might recover and return, causing the current values to be used. + if(node->class == PEXTERN || (hasdefer && node->class == PPARAMOUT)) + v->addr = 1; + + if(debug['R']) + print("bit=%2d et=%2E w=%lld+%lld %#N %D flag=%d\n", i, et, o, w, node, a, v->addr); + ostats.nvar++; + + return bit; + +none: + return zbits; +} + +void +prop(Reg *r, Bits ref, Bits cal) +{ + Reg *r1, *r2; + int z, i, j; + Var *v, *v1; + + for(r1 = r; r1 != R; r1 = (Reg*)r1->f.p1) { + for(z=0; z<BITS; z++) { + ref.b[z] |= r1->refahead.b[z]; + if(ref.b[z] != r1->refahead.b[z]) { + r1->refahead.b[z] = ref.b[z]; + change++; + } + cal.b[z] |= r1->calahead.b[z]; + if(cal.b[z] != r1->calahead.b[z]) { + r1->calahead.b[z] = cal.b[z]; + change++; + } + } + switch(r1->f.prog->as) { + case ABL: + if(noreturn(r1->f.prog)) + break; + + // Mark all input variables (ivar) as used, because that's what the + // liveness bitmaps say. The liveness bitmaps say that so that a + // panic will not show stale values in the parameter dump. + // Mark variables with a recent VARDEF (r1->act) as used, + // so that the optimizer flushes initializations to memory, + // so that if a garbage collection happens during this CALL, + // the collector will see initialized memory. Again this is to + // match what the liveness bitmaps say. + for(z=0; z<BITS; z++) { + cal.b[z] |= ref.b[z] | externs.b[z] | ivar.b[z] | r1->act.b[z]; + ref.b[z] = 0; + } + + // cal.b is the current approximation of what's live across the call. + // Every bit in cal.b is a single stack word. For each such word, + // find all the other tracked stack words in the same Go variable + // (struct/slice/string/interface) and mark them live too. + // This is necessary because the liveness analysis for the garbage + // collector works at variable granularity, not at word granularity. + // It is fundamental for slice/string/interface: the garbage collector + // needs the whole value, not just some of the words, in order to + // interpret the other bits correctly. Specifically, slice needs a consistent + // ptr and cap, string needs a consistent ptr and len, and interface + // needs a consistent type word and data word. + for(z=0; z<BITS; z++) { + if(cal.b[z] == 0) + continue; + for(i=0; i<64; i++) { + if(z*64+i >= nvar || ((cal.b[z]>>i)&1) == 0) + continue; + v = var+z*64+i; + if(v->node->opt == nil) // v represents fixed register, not Go variable + continue; + + // v->node->opt is the head of a linked list of Vars + // corresponding to tracked words from the Go variable v->node. + // Walk the list and set all the bits. + // For a large struct this could end up being quadratic: + // after the first setting, the outer loop (for z, i) would see a 1 bit + // for all of the remaining words in the struct, and for each such + // word would go through and turn on all the bits again. + // To avoid the quadratic behavior, we only turn on the bits if + // v is the head of the list or if the head's bit is not yet turned on. + // This will set the bits at most twice, keeping the overall loop linear. + v1 = v->node->opt; + j = v1 - var; + if(v == v1 || !btest(&cal, j)) { + for(; v1 != nil; v1 = v1->nextinnode) { + j = v1 - var; + biset(&cal, j); + } + } + } + } + break; + + case ATEXT: + for(z=0; z<BITS; z++) { + cal.b[z] = 0; + ref.b[z] = 0; + } + break; + + case ARET: + for(z=0; z<BITS; z++) { + cal.b[z] = externs.b[z] | ovar.b[z]; + ref.b[z] = 0; + } + break; + } + for(z=0; z<BITS; z++) { + ref.b[z] = (ref.b[z] & ~r1->set.b[z]) | + r1->use1.b[z] | r1->use2.b[z]; + cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]); + r1->refbehind.b[z] = ref.b[z]; + r1->calbehind.b[z] = cal.b[z]; + } + if(r1->f.active) + break; + r1->f.active = 1; + } + for(; r != r1; r = (Reg*)r->f.p1) + for(r2 = (Reg*)r->f.p2; r2 != R; r2 = (Reg*)r2->f.p2link) + prop(r2, r->refbehind, r->calbehind); +} + +void +synch(Reg *r, Bits dif) +{ + Reg *r1; + int z; + + for(r1 = r; r1 != R; r1 = (Reg*)r1->f.s1) { + for(z=0; z<BITS; z++) { + dif.b[z] = (dif.b[z] & + ~(~r1->refbehind.b[z] & r1->refahead.b[z])) | + r1->set.b[z] | r1->regdiff.b[z]; + if(dif.b[z] != r1->regdiff.b[z]) { + r1->regdiff.b[z] = dif.b[z]; + change++; + } + } + if(r1->f.active) + break; + r1->f.active = 1; + for(z=0; z<BITS; z++) + dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]); + if(r1->f.s2 != nil) + synch((Reg*)r1->f.s2, dif); + } +} + +uint64 +allreg(uint64 b, Rgn *r) +{ + Var *v; + int i; + + v = var + r->varno; + r->regno = 0; + switch(v->etype) { + + default: + fatal("unknown etype %d/%E", bitno(b), v->etype); + break; + + case TINT8: + case TUINT8: + case TINT16: + case TUINT16: + case TINT32: + case TUINT32: + case TINT64: + case TUINT64: + case TINT: + case TUINT: + case TUINTPTR: + case TBOOL: + case TPTR32: + case TPTR64: + i = BtoR(~b); + if(i && r->cost > 0) { + r->regno = i; + return RtoB(i); + } + break; + + case TFLOAT32: + case TFLOAT64: + i = BtoF(~b); + if(i && r->cost > 0) { + r->regno = i; + return FtoB(i); + } + break; + } + return 0; +} + +void +paint1(Reg *r, int bn) +{ + Reg *r1; + int z; + uint64 bb; + + z = bn/64; + bb = 1LL<<(bn%64); + if(r->act.b[z] & bb) + return; + for(;;) { + if(!(r->refbehind.b[z] & bb)) + break; + r1 = (Reg*)r->f.p1; + if(r1 == R) + break; + if(!(r1->refahead.b[z] & bb)) + break; + if(r1->act.b[z] & bb) + break; + r = r1; + } + + if(LOAD(r) & ~(r->set.b[z]&~(r->use1.b[z]|r->use2.b[z])) & bb) { + change -= CLOAD * r->f.loop; + } + for(;;) { + r->act.b[z] |= bb; + + if(r->f.prog->as != ANOP) { // don't give credit for NOPs + if(r->use1.b[z] & bb) + change += CREF * r->f.loop; + if((r->use2.b[z]|r->set.b[z]) & bb) + change += CREF * r->f.loop; + } + + if(STORE(r) & r->regdiff.b[z] & bb) { + change -= CLOAD * r->f.loop; + } + + if(r->refbehind.b[z] & bb) + for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link) + if(r1->refahead.b[z] & bb) + paint1(r1, bn); + + if(!(r->refahead.b[z] & bb)) + break; + r1 = (Reg*)r->f.s2; + if(r1 != R) + if(r1->refbehind.b[z] & bb) + paint1(r1, bn); + r = (Reg*)r->f.s1; + if(r == R) + break; + if(r->act.b[z] & bb) + break; + if(!(r->refbehind.b[z] & bb)) + break; + } +} + +uint64 +paint2(Reg *r, int bn, int depth) +{ + Reg *r1; + int z; + uint64 bb, vreg; + + z = bn/64; + bb = 1LL << (bn%64); + vreg = regbits; + if(!(r->act.b[z] & bb)) + return vreg; + for(;;) { + if(!(r->refbehind.b[z] & bb)) + break; + r1 = (Reg*)r->f.p1; + if(r1 == R) + break; + if(!(r1->refahead.b[z] & bb)) + break; + if(!(r1->act.b[z] & bb)) + break; + r = r1; + } + for(;;) { + if(debug['R'] && debug['v']) + print(" paint2 %d %P\n", depth, r->f.prog); + + r->act.b[z] &= ~bb; + + vreg |= r->regu; + + if(r->refbehind.b[z] & bb) + for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link) + if(r1->refahead.b[z] & bb) + vreg |= paint2(r1, bn, depth+1); + + if(!(r->refahead.b[z] & bb)) + break; + r1 = (Reg*)r->f.s2; + if(r1 != R) + if(r1->refbehind.b[z] & bb) + vreg |= paint2(r1, bn, depth+1); + r = (Reg*)r->f.s1; + if(r == R) + break; + if(!(r->act.b[z] & bb)) + break; + if(!(r->refbehind.b[z] & bb)) + break; + } + return vreg; +} + +void +paint3(Reg *r, int bn, uint64 rb, int rn) +{ + Reg *r1; + Prog *p; + int z; + uint64 bb; + + z = bn/64; + bb = 1LL << (bn%64); + if(r->act.b[z] & bb) + return; + for(;;) { + if(!(r->refbehind.b[z] & bb)) + break; + r1 = (Reg*)r->f.p1; + if(r1 == R) + break; + if(!(r1->refahead.b[z] & bb)) + break; + if(r1->act.b[z] & bb) + break; + r = r1; + } + + if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb) + addmove(r, bn, rn, 0); + for(;;) { + r->act.b[z] |= bb; + p = r->f.prog; + + if(r->use1.b[z] & bb) { + if(debug['R'] && debug['v']) + print("%P", p); + addreg(&p->from, rn); + if(debug['R'] && debug['v']) + print(" ===change== %P\n", p); + } + if((r->use2.b[z]|r->set.b[z]) & bb) { + if(debug['R'] && debug['v']) + print("%P", p); + addreg(&p->to, rn); + if(debug['R'] && debug['v']) + print(" ===change== %P\n", p); + } + + if(STORE(r) & r->regdiff.b[z] & bb) + addmove(r, bn, rn, 1); + r->regu |= rb; + + if(r->refbehind.b[z] & bb) + for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link) + if(r1->refahead.b[z] & bb) + paint3(r1, bn, rb, rn); + + if(!(r->refahead.b[z] & bb)) + break; + r1 = (Reg*)r->f.s2; + if(r1 != R) + if(r1->refbehind.b[z] & bb) + paint3(r1, bn, rb, rn); + r = (Reg*)r->f.s1; + if(r == R) + break; + if(r->act.b[z] & bb) + break; + if(!(r->refbehind.b[z] & bb)) + break; + } +} + +void +addreg(Adr *a, int rn) +{ + a->sym = nil; + a->node = nil; + a->name = D_NONE; + a->type = D_REG; + a->reg = rn; + if(rn >= NREG) { + a->type = D_FREG; + a->reg = rn-NREG; + } + + ostats.ncvtreg++; +} + /* * track register variables including external registers: * bit reg @@ -56,7 +1239,7 @@ regopt(Prog *p) uint64 RtoB(int r) { - if(r >= D_R0 && r <= D_R0+31) + if(r > D_R0 && r <= D_R0+31) return 1ULL << (r - D_R0); return 0; } @@ -64,7 +1247,7 @@ RtoB(int r) int BtoR(uint64 b) { - b &= 0xffffffff; + b &= 0xffffffffull; if(b == 0) return 0; return bitno(b) + D_R0; @@ -139,6 +1322,7 @@ void dumpit(char *str, Flow *r0, int isreg) { Flow *r, *r1; + int s1v, s2v; print("\n%s\n", str); for(r = r0; r != nil; r = r->link) { @@ -150,12 +1334,16 @@ dumpit(char *str, Flow *r0, int isreg) print(" %.4ud", (int)r1->prog->pc); print("\n"); } -// r1 = r->s1; -// if(r1 != R) { -// print(" succ:"); -// for(; r1 != R; r1 = r1->s1) -// print(" %.4ud", (int)r1->prog->pc); -// print("\n"); -// } + // If at least one successor is "interesting", print both + s1v = (r->s1 != nil) && (r->s1->prog != r->prog->link); + s2v = (r->s2 != nil) && (r->s2->prog != r->prog->link); + if(s1v || s2v) { + print(" succ:"); + if(r->s1 != nil) + print(" %.4ud", (int)r->s1->prog->pc); + if(r->s2 != nil) + print(" %.4ud", (int)r->s2->prog->pc); + print("\n"); + } } } diff --git a/src/cmd/9l/9.out.h b/src/cmd/9l/9.out.h index e494e90ca..08a339318 100644 --- a/src/cmd/9l/9.out.h +++ b/src/cmd/9l/9.out.h @@ -131,7 +131,7 @@ enum C_NCLASS, /* must be the last */ }; -enum as +enum { AXXX, AADD, @@ -501,7 +501,7 @@ enum D_R0 = 0, // type is D_REG D_F0 = D_R0+NREG, // type is D_FREG -/* reg names iff type is D_SPR */ +/* reg names in offset field iff type is D_SPR */ D_XER = 1, D_LR = 8, D_CTR = 9 diff --git a/src/cmd/gc/bits.c b/src/cmd/gc/bits.c index 2e79f6f1d..fe9a168dc 100644 --- a/src/cmd/gc/bits.c +++ b/src/cmd/gc/bits.c @@ -95,11 +95,11 @@ int bnum(Bits a) { int i; - int32 b; + uint64 b; for(i=0; i<BITS; i++) if(b = a.b[i]) - return 32*i + bitno(b); + return 64*i + bitno(b); fatal("bad in bnum"); return 0; } @@ -110,27 +110,35 @@ blsh(uint n) Bits c; c = zbits; - c.b[n/32] = 1L << (n%32); + c.b[n/64] = 1LL << (n%64); return c; } -/* int -bset(Bits a, uint n) +btest(Bits *a, uint n) { - if(a.b[n/32] & (1L << (n%32))) - return 1; - return 0; + return (a->b[n/64] & (1LL << (n%64))) != 0; +} + +void +biset(Bits *a, uint n) +{ + a->b[n/64] |= 1LL << (n%64); +} + +void +biclr(Bits *a, uint n) +{ + a->b[n/64] &= ~(1LL << (n%64)); } -*/ int -bitno(int32 b) +bitno(uint64 b) { int i; - for(i=0; i<32; i++) - if(b & (1L<<i)) + for(i=0; i<64; i++) + if(b & (1LL<<i)) return i; fatal("bad in bitno"); return 0; @@ -157,7 +165,7 @@ Qconv(Fmt *fp) if(var[i].offset != 0) fmtprint(fp, "%+lld", (vlong)var[i].offset); } - bits.b[i/32] &= ~(1L << (i%32)); + biclr(&bits, i); } return 0; } diff --git a/src/cmd/gc/go.h b/src/cmd/gc/go.h index cc590416b..c695c5bf3 100644 --- a/src/cmd/gc/go.h +++ b/src/cmd/gc/go.h @@ -704,13 +704,13 @@ enum Ecomplit = 1<<11, // type in composite literal }; -#define BITS 5 -#define NVAR (BITS*sizeof(uint32)*8) +#define BITS 3 +#define NVAR (BITS*sizeof(uint64)*8) typedef struct Bits Bits; struct Bits { - uint32 b[BITS]; + uint64 b[BITS]; }; EXTERN Bits zbits; @@ -1027,12 +1027,14 @@ int Qconv(Fmt *fp); Bits band(Bits a, Bits b); int bany(Bits *a); int beq(Bits a, Bits b); -int bitno(int32 b); +int bitno(uint64 b); Bits blsh(uint n); Bits bnot(Bits a); int bnum(Bits a); Bits bor(Bits a, Bits b); -int bset(Bits a, uint n); +int btest(Bits *a, uint n); +void biset(Bits *a, uint n); +void biclr(Bits *a, uint n); /* * bv.c diff --git a/src/cmd/objdump/objdump_test.go b/src/cmd/objdump/objdump_test.go index 2bb74663c..bd09ae9f9 100644 --- a/src/cmd/objdump/objdump_test.go +++ b/src/cmd/objdump/objdump_test.go @@ -101,6 +101,10 @@ func testDisasm(t *testing.T, flags ...string) { } func TestDisasm(t *testing.T) { + switch runtime.GOARCH { + case "power64", "power64le": + t.Skipf("skipping on %s, issue 9039", runtime.GOARCH) + } testDisasm(t) } @@ -109,5 +113,9 @@ func TestDisasmExtld(t *testing.T) { case "plan9", "windows": t.Skipf("skipping on %s", runtime.GOOS) } + switch runtime.GOARCH { + case "power64", "power64le": + t.Skipf("skipping on %s, no support for external linking, issue 9038", runtime.GOARCH) + } testDisasm(t, "-ldflags=-linkmode=external") } diff --git a/src/liblink/list9.c b/src/liblink/list9.c index 041c6884f..c9190d894 100644 --- a/src/liblink/list9.c +++ b/src/liblink/list9.c @@ -259,11 +259,12 @@ Dconv(Fmt *fp) sprint(str, "%s+%.5lux(BRANCH)", a->sym->name, v); else sprint(str, "%.5lux(BRANCH)", v); - } else - if(a->sym != nil) - sprint(str, "%s+%lld(APC)", a->sym->name, a->offset); - else - sprint(str, "%lld(APC)", a->offset); + } else if(a->u.branch != nil) + sprint(str, "%lld", a->u.branch->pc); + else if(a->sym != nil) + sprint(str, "%s+%lld(APC)", a->sym->name, a->offset); + else + sprint(str, "%lld(APC)", a->offset); break; case D_FCONST: diff --git a/src/reflect/asm_power64x.s b/src/reflect/asm_power64x.s index e430cdf04..472063824 100644 --- a/src/reflect/asm_power64x.s +++ b/src/reflect/asm_power64x.s @@ -5,12 +5,14 @@ // +build power64 power64le #include "textflag.h" +#include "funcdata.h" // makeFuncStub is the code half of the function returned by MakeFunc. // See the comment on the declaration of makeFuncStub in makefunc.go // for more details. -// No argsize here, gc generates argsize info at call site. +// No arg size here, runtime pulls arg map out of the func value. TEXT ·makeFuncStub(SB),(NOSPLIT|WRAPPER),$16 + NO_LOCAL_POINTERS MOVD R11, 8(R1) MOVD $argframe+0(FP), R3 MOVD R3, 16(R1) @@ -20,8 +22,9 @@ TEXT ·makeFuncStub(SB),(NOSPLIT|WRAPPER),$16 // methodValueCall is the code half of the function returned by makeMethodValue. // See the comment on the declaration of methodValueCall in makefunc.go // for more details. -// No argsize here, gc generates argsize info at call site. +// No arg size here; runtime pulls arg map out of the func value. TEXT ·methodValueCall(SB),(NOSPLIT|WRAPPER),$16 + NO_LOCAL_POINTERS MOVD R11, 8(R1) MOVD $argframe+0(FP), R3 MOVD R3, 16(R1) diff --git a/src/runtime/asm_power64x.s b/src/runtime/asm_power64x.s index fd0c6be16..ba900c2b3 100644 --- a/src/runtime/asm_power64x.s +++ b/src/runtime/asm_power64x.s @@ -86,7 +86,7 @@ TEXT runtime·reginit(SB),NOSPLIT,$-8-0 // void gosave(Gobuf*) // save state in Gobuf; setjmp TEXT runtime·gosave(SB), NOSPLIT, $-8-8 - MOVD gobuf+0(FP), R3 + MOVD buf+0(FP), R3 MOVD R1, gobuf_sp(R3) MOVD LR, R31 MOVD R31, gobuf_pc(R3) @@ -99,7 +99,7 @@ TEXT runtime·gosave(SB), NOSPLIT, $-8-8 // void gogo(Gobuf*) // restore state from Gobuf; longjmp TEXT runtime·gogo(SB), NOSPLIT, $-8-8 - MOVD gobuf+0(FP), R5 + MOVD buf+0(FP), R5 MOVD gobuf_g(R5), g // make sure g is not nil MOVD 0(g), R4 MOVD gobuf_sp(R5), R1 @@ -299,7 +299,7 @@ TEXT runtime·morestack_noctxt(SB),NOSPLIT,$-8-0 // Note: can't just "BR NAME(SB)" - bad inlining results. TEXT ·reflectcall(SB), NOSPLIT, $-8-24 - MOVW argsize+16(FP), R3 + MOVWZ n+16(FP), R3 DISPATCH(runtime·call16, 16) DISPATCH(runtime·call32, 32) DISPATCH(runtime·call64, 64) @@ -335,8 +335,8 @@ TEXT ·reflectcall(SB), NOSPLIT, $-8-24 TEXT NAME(SB), WRAPPER, $MAXSIZE-24; \ NO_LOCAL_POINTERS; \ /* copy arguments to stack */ \ - MOVD argptr+8(FP), R3; \ - MOVW argsize+16(FP), R4; \ + MOVD arg+8(FP), R3; \ + MOVWZ n+16(FP), R4; \ MOVD R1, R5; \ ADD $(8-1), R5; \ SUB $1, R3; \ @@ -353,9 +353,9 @@ TEXT NAME(SB), WRAPPER, $MAXSIZE-24; \ PCDATA $PCDATA_StackMapIndex, $0; \ BL (CTR); \ /* copy return values back */ \ - MOVD argptr+8(FP), R3; \ - MOVW argsize+16(FP), R4; \ - MOVW retoffset+20(FP), R6; \ + MOVD arg+8(FP), R3; \ + MOVWZ n+16(FP), R4; \ + MOVWZ retoffset+20(FP), R6; \ MOVD R1, R5; \ ADD R6, R5; \ ADD R6, R3; \ @@ -398,7 +398,7 @@ CALLFN(·call268435456, 268435456) CALLFN(·call536870912, 536870912) CALLFN(·call1073741824, 1073741824) -// bool cas(int32 *val, int32 old, int32 new) +// bool cas(uint32 *ptr, uint32 old, uint32 new) // Atomically: // if(*val == old){ // *val = new; @@ -406,9 +406,9 @@ CALLFN(·call1073741824, 1073741824) // } else // return 0; TEXT runtime·cas(SB), NOSPLIT, $0-17 - MOVD p+0(FP), R3 - MOVW old+8(FP), R4 - MOVW new+12(FP), R5 + MOVD ptr+0(FP), R3 + MOVWZ old+8(FP), R4 + MOVWZ new+12(FP), R5 cas_again: SYNC LWAR (R3), R6 @@ -425,7 +425,7 @@ cas_fail: MOVD $0, R3 BR -5(PC) -// bool runtime·cas64(uint64 *val, uint64 old, uint64 new) +// bool runtime·cas64(uint64 *ptr, uint64 old, uint64 new) // Atomically: // if(*val == *old){ // *val = new; @@ -434,7 +434,7 @@ cas_fail: // return 0; // } TEXT runtime·cas64(SB), NOSPLIT, $0-25 - MOVD p+0(FP), R3 + MOVD ptr+0(FP), R3 MOVD old+8(FP), R4 MOVD new+16(FP), R5 cas64_again: @@ -475,12 +475,12 @@ TEXT runtime·atomicstoreuintptr(SB), NOSPLIT, $0-16 TEXT runtime·casp1(SB), NOSPLIT, $0-25 BR runtime·cas64(SB) -// uint32 xadd(uint32 volatile *val, int32 delta) +// uint32 xadd(uint32 volatile *ptr, int32 delta) // Atomically: // *val += delta; // return *val; TEXT runtime·xadd(SB), NOSPLIT, $0-20 - MOVD p+0(FP), R4 + MOVD ptr+0(FP), R4 MOVW delta+8(FP), R5 SYNC LWAR (R4), R3 @@ -493,7 +493,7 @@ TEXT runtime·xadd(SB), NOSPLIT, $0-20 RETURN TEXT runtime·xadd64(SB), NOSPLIT, $0-24 - MOVD p+0(FP), R4 + MOVD ptr+0(FP), R4 MOVD delta+8(FP), R5 SYNC LDAR (R4), R3 @@ -506,7 +506,7 @@ TEXT runtime·xadd64(SB), NOSPLIT, $0-24 RETURN TEXT runtime·xchg(SB), NOSPLIT, $0-20 - MOVD p+0(FP), R4 + MOVD ptr+0(FP), R4 MOVW new+8(FP), R5 SYNC LWAR (R4), R3 @@ -518,7 +518,7 @@ TEXT runtime·xchg(SB), NOSPLIT, $0-20 RETURN TEXT runtime·xchg64(SB), NOSPLIT, $0-24 - MOVD p+0(FP), R4 + MOVD ptr+0(FP), R4 MOVD new+8(FP), R5 SYNC LDAR (R4), R3 @@ -651,7 +651,7 @@ TEXT runtime·setcallerpc(SB),NOSPLIT,$-8-16 RETURN TEXT runtime·getcallersp(SB),NOSPLIT,$0-16 - MOVD sp+0(FP), R3 + MOVD argp+0(FP), R3 SUB $8, R3 MOVD R3, ret+8(FP) RETURN @@ -695,16 +695,17 @@ TEXT runtime·aeshashstr(SB),NOSPLIT,$-8-0 TEXT runtime·memeq(SB),NOSPLIT,$-8-25 MOVD a+0(FP), R3 MOVD b+8(FP), R4 - MOVD count+16(FP), R5 + MOVD size+16(FP), R5 SUB $1, R3 SUB $1, R4 ADD R3, R5, R8 loop: CMP R3, R8 - BNE 4(PC) + BNE test MOVD $1, R3 MOVB R3, ret+24(FP) RETURN +test: MOVBZU 1(R3), R6 MOVBZU 1(R4), R7 CMP R6, R7 @@ -828,7 +829,7 @@ notfound: // in ../../cmd/9g/ggen.c:/^clearfat. // R0: always zero // R3 (aka REGRT1): ptr to memory to be zeroed - 8 -// R3 is updated as a side effect. +// On return, R3 points to the last zeroed dword. TEXT runtime·duffzero(SB), NOSPLIT, $-8-0 MOVDU R0, 8(R3) MOVDU R0, 8(R3) @@ -964,7 +965,7 @@ TEXT runtime·fastrand1(SB), NOSPLIT, $0-4 MOVD g_m(g), R4 MOVWZ m_fastrand(R4), R3 ADD R3, R3 - CMP R3, $0 + CMPW R3, $0 BGE 2(PC) XOR $0x88888eef, R3 MOVW R3, m_fastrand(R4) @@ -979,3 +980,9 @@ TEXT runtime·return0(SB), NOSPLIT, $0 // Must obey the gcc calling convention. TEXT _cgo_topofstack(SB),NOSPLIT,$0 MOVD R0, 26(R0) + +// The top-most function running on a goroutine +// returns to goexit+PCQuantum. +TEXT runtime·goexit(SB),NOSPLIT,$-8-0 + MOVD R0, R0 // NOP + BL runtime·goexit1(SB) // does not return diff --git a/src/runtime/gcinfo_test.go b/src/runtime/gcinfo_test.go index 1a33f3b3b..662b7546d 100644 --- a/src/runtime/gcinfo_test.go +++ b/src/runtime/gcinfo_test.go @@ -137,7 +137,7 @@ func infoBigStruct() []byte { BitsScalar, BitsScalar, BitsScalar, BitsScalar, // t int; y uint16; u uint64 BitsPointer, BitsDead, // i string } - case "amd64": + case "amd64", "power64", "power64le": return []byte{ BitsPointer, // q *int BitsScalar, BitsScalar, BitsScalar, // w byte; e [17]byte @@ -153,12 +153,6 @@ func infoBigStruct() []byte { BitsScalar, BitsScalar, BitsDead, BitsScalar, BitsScalar, // t int; y uint16; u uint64 BitsPointer, BitsDead, // i string } - case "power64", "power64le": - return []byte{ - BitsPointer, BitsScalar, BitsScalar, BitsScalar, - BitsMultiWord, BitsSlice, BitsScalar, BitsScalar, - BitsScalar, BitsScalar, BitsMultiWord, BitsString, - } default: panic("unknown arch") } diff --git a/src/runtime/mgc0.c b/src/runtime/mgc0.c index 3248b0f49..f37c01af0 100644 --- a/src/runtime/mgc0.c +++ b/src/runtime/mgc0.c @@ -122,6 +122,7 @@ enum { Debug = 0, + DebugPtrs = 0, // if 1, print trace of every pointer load during GC ConcurrentSweep = 1, FinBlockSize = 4*1024, diff --git a/src/runtime/panic.c b/src/runtime/panic.c index 46683b2b0..b19fdd0e1 100644 --- a/src/runtime/panic.c +++ b/src/runtime/panic.c @@ -69,7 +69,7 @@ runtime·recovery_m(G *gp) // each call to deferproc. // (The pc we're returning to does pop pop // before it tests the return value.) - // On the arm there are 2 saved LRs mixed in too. + // On the arm and power there are 2 saved LRs mixed in too. if(thechar == '5' || thechar == '9') gp->sched.sp = (uintptr)argp - 4*sizeof(uintptr); else diff --git a/src/runtime/runtime.c b/src/runtime/runtime.c index f19f8e4be..a68414284 100644 --- a/src/runtime/runtime.c +++ b/src/runtime/runtime.c @@ -226,6 +226,12 @@ runtime·check(void) if(z != 4) runtime·throw("cas4"); + z = 0xffffffff; + if(!runtime·cas(&z, 0xffffffff, 0xfffffffe)) + runtime·throw("cas5"); + if(z != 0xfffffffe) + runtime·throw("cas6"); + k = (byte*)0xfedcb123; if(sizeof(void*) == 8) k = (byte*)((uintptr)k<<10); diff --git a/src/runtime/signal_power64x.c b/src/runtime/signal_power64x.c index 89c5c7848..c0bf1c4a5 100644 --- a/src/runtime/signal_power64x.c +++ b/src/runtime/signal_power64x.c @@ -124,7 +124,7 @@ runtime·sighandler(int32 sig, Siginfo *info, void *ctxt, G *gp) if(runtime·gotraceback(&crash)){ runtime·goroutineheader(gp); - runtime·traceback(SIG_PC(info, ctxt), SIG_SP(info, ctxt), SIG_LINK(info, ctxt), gp); + runtime·tracebacktrap(SIG_PC(info, ctxt), SIG_SP(info, ctxt), SIG_LINK(info, ctxt), gp); runtime·tracebackothers(gp); runtime·printf("\n"); runtime·dumpregs(info, ctxt); |