summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/cmd/5g/gobj.c2
-rw-r--r--src/cmd/5g/gsubr.c3
-rw-r--r--src/cmd/5g/opt.h14
-rw-r--r--src/cmd/5g/reg.c50
-rw-r--r--src/cmd/6g/gobj.c2
-rw-r--r--src/cmd/6g/opt.h14
-rw-r--r--src/cmd/6g/reg.c97
-rw-r--r--src/cmd/8g/gobj.c2
-rw-r--r--src/cmd/8g/opt.h14
-rw-r--r--src/cmd/8g/reg.c92
-rw-r--r--src/cmd/9g/ggen.c20
-rw-r--r--src/cmd/9g/gobj.c2
-rw-r--r--src/cmd/9g/gsubr.c26
-rw-r--r--src/cmd/9g/opt.h49
-rw-r--r--src/cmd/9g/peep.c4
-rw-r--r--src/cmd/9g/prog.c22
-rw-r--r--src/cmd/9g/reg.c1212
-rw-r--r--src/cmd/9l/9.out.h4
-rw-r--r--src/cmd/gc/bits.c34
-rw-r--r--src/cmd/gc/go.h12
-rw-r--r--src/cmd/objdump/objdump_test.go8
-rw-r--r--src/liblink/list9.c11
-rw-r--r--src/reflect/asm_power64x.s7
-rw-r--r--src/runtime/asm_power64x.s55
-rw-r--r--src/runtime/gcinfo_test.go8
-rw-r--r--src/runtime/mgc0.c1
-rw-r--r--src/runtime/panic.c2
-rw-r--r--src/runtime/runtime.c6
-rw-r--r--src/runtime/signal_power64x.c2
29 files changed, 1467 insertions, 308 deletions
diff --git a/src/cmd/5g/gobj.c b/src/cmd/5g/gobj.c
index 5e988878f..65f731685 100644
--- a/src/cmd/5g/gobj.c
+++ b/src/cmd/5g/gobj.c
@@ -86,7 +86,7 @@ datagostring(Strlit *sval, Addr *a)
sym = stringsym(sval->s, sval->len);
a->type = D_OREG;
a->name = D_EXTERN;
- a->etype = TINT32;
+ a->etype = TSTRING;
a->offset = 0; // header
a->reg = NREG;
a->sym = linksym(sym);
diff --git a/src/cmd/5g/gsubr.c b/src/cmd/5g/gsubr.c
index 06e274e14..f09197963 100644
--- a/src/cmd/5g/gsubr.c
+++ b/src/cmd/5g/gsubr.c
@@ -1353,9 +1353,10 @@ naddr(Node *n, Addr *a, int canemitcode)
case OITAB:
// itable of interface value
naddr(n->left, a, canemitcode);
- a->etype = TINT32;
+ a->etype = simtype[tptr];
if(a->type == D_CONST && a->offset == 0)
break; // len(nil)
+ a->width = widthptr;
break;
case OSPTR:
diff --git a/src/cmd/5g/opt.h b/src/cmd/5g/opt.h
index 1946c1d33..5016d1cc8 100644
--- a/src/cmd/5g/opt.h
+++ b/src/cmd/5g/opt.h
@@ -63,8 +63,8 @@ enum
uint32 BLOAD(Reg*);
uint32 BSTORE(Reg*);
-uint32 LOAD(Reg*);
-uint32 STORE(Reg*);
+uint64 LOAD(Reg*);
+uint64 STORE(Reg*);
*/
// A Reg is a wrapper around a single Prog (one instruction) that holds
@@ -145,7 +145,7 @@ void synch(Reg*, Bits);
uint32 allreg(uint32, Rgn*);
void paint1(Reg*, int);
uint32 paint2(Reg*, int);
-void paint3(Reg*, int, int32, int);
+void paint3(Reg*, int, uint32, int);
void addreg(Adr*, int);
void dumpit(char *str, Flow *r0, int);
@@ -156,10 +156,10 @@ void peep(Prog*);
void excise(Flow*);
int copyu(Prog*, Adr*, Adr*);
-int32 RtoB(int);
-int32 FtoB(int);
-int BtoR(int32);
-int BtoF(int32);
+uint32 RtoB(int);
+uint32 FtoB(int);
+int BtoR(uint32);
+int BtoF(uint32);
/*
* prog.c
diff --git a/src/cmd/5g/reg.c b/src/cmd/5g/reg.c
index 27d9d3e8b..441792873 100644
--- a/src/cmd/5g/reg.c
+++ b/src/cmd/5g/reg.c
@@ -35,7 +35,7 @@
#include "opt.h"
#define NREGVAR 32
-#define REGBITS ((uint32)0xffffffff)
+#define REGBITS ((uint64)0xffffffffull)
/*c2go enum {
NREGVAR = 32,
REGBITS = 0xffffffff,
@@ -86,7 +86,7 @@ setaddrs(Bits bit)
i = bnum(bit);
node = var[i].node;
n = var[i].name;
- bit.b[i/32] &= ~(1L<<(i%32));
+ biclr(&bit, i);
// disable all pieces of that variable
for(i=0; i<nvar; i++) {
@@ -393,7 +393,7 @@ loop2:
for(z=0; z<BITS; z++)
bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) &
~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]);
- if(bany(&bit) & !r->f.refset) {
+ if(bany(&bit) && !r->f.refset) {
// should never happen - all variables are preset
if(debug['w'])
print("%L: used and not set: %Q\n", r->f.prog->lineno, bit);
@@ -425,7 +425,7 @@ loop2:
if(debug['R'] > 1)
print("\n");
paint1(r, i);
- bit.b[i/32] &= ~(1L<<(i%32));
+ biclr(&bit, i);
if(change <= 0) {
if(debug['R'])
print("%L $%d: %Q\n",
@@ -570,7 +570,7 @@ walkvardef(Node *n, Reg *r, int active)
break;
for(v=n->opt; v!=nil; v=v->nextinnode) {
bn = v - var;
- r1->act.b[bn/32] |= 1L << (bn%32);
+ biset(&r1->act, bn);
}
if(r1->f.prog->as == ABL)
break;
@@ -606,7 +606,7 @@ addsplits(void)
~(r->calahead.b[z] & addrs.b[z]);
while(bany(&bit)) {
i = bnum(bit);
- bit.b[i/32] &= ~(1L << (i%32));
+ biclr(&bit, i);
}
}
}
@@ -972,10 +972,10 @@ prop(Reg *r, Bits ref, Bits cal)
for(z=0; z<BITS; z++) {
if(cal.b[z] == 0)
continue;
- for(i=0; i<32; i++) {
- if(z*32+i >= nvar || ((cal.b[z]>>i)&1) == 0)
+ for(i=0; i<64; i++) {
+ if(z*64+i >= nvar || ((cal.b[z]>>i)&1) == 0)
continue;
- v = var+z*32+i;
+ v = var+z*64+i;
if(v->node->opt == nil) // v represents fixed register, not Go variable
continue;
@@ -991,10 +991,10 @@ prop(Reg *r, Bits ref, Bits cal)
// This will set the bits at most twice, keeping the overall loop linear.
v1 = v->node->opt;
j = v1 - var;
- if(v == v1 || ((cal.b[j/32]>>(j&31))&1) == 0) {
+ if(v == v1 || !btest(&cal, j)) {
for(; v1 != nil; v1 = v1->nextinnode) {
j = v1 - var;
- cal.b[j/32] |= 1<<(j&31);
+ biset(&cal, j);
}
}
}
@@ -1115,10 +1115,10 @@ paint1(Reg *r, int bn)
Reg *r1;
Prog *p;
int z;
- uint32 bb;
+ uint64 bb;
- z = bn/32;
- bb = 1L<<(bn%32);
+ z = bn/64;
+ bb = 1LL<<(bn%64);
if(r->act.b[z] & bb)
return;
for(;;) {
@@ -1193,10 +1193,10 @@ paint2(Reg *r, int bn)
{
Reg *r1;
int z;
- uint32 bb, vreg;
+ uint64 bb, vreg;
- z = bn/32;
- bb = 1L << (bn%32);
+ z = bn/64;
+ bb = 1LL << (bn%64);
vreg = regbits;
if(!(r->act.b[z] & bb))
return vreg;
@@ -1240,15 +1240,15 @@ paint2(Reg *r, int bn)
}
void
-paint3(Reg *r, int bn, int32 rb, int rn)
+paint3(Reg *r, int bn, uint32 rb, int rn)
{
Reg *r1;
Prog *p;
int z;
- uint32 bb;
+ uint64 bb;
- z = bn/32;
- bb = 1L << (bn%32);
+ z = bn/64;
+ bb = 1LL << (bn%64);
if(r->act.b[z] & bb)
return;
for(;;) {
@@ -1333,7 +1333,7 @@ addreg(Adr *a, int rn)
* 10 R10
* 12 R12
*/
-int32
+uint32
RtoB(int r)
{
if(r >= REGTMP-2 && r != 12) // excluded R9 and R10 for m and g, but not R12
@@ -1342,7 +1342,7 @@ RtoB(int r)
}
int
-BtoR(int32 b)
+BtoR(uint32 b)
{
b &= 0x11fcL; // excluded R9 and R10 for m and g, but not R12
if(b == 0)
@@ -1357,7 +1357,7 @@ BtoR(int32 b)
* ... ...
* 31 F15
*/
-int32
+uint32
FtoB(int f)
{
@@ -1367,7 +1367,7 @@ FtoB(int f)
}
int
-BtoF(int32 b)
+BtoF(uint32 b)
{
b &= 0xfffc0000L;
diff --git a/src/cmd/6g/gobj.c b/src/cmd/6g/gobj.c
index 04e837b13..dbb4ff62c 100644
--- a/src/cmd/6g/gobj.c
+++ b/src/cmd/6g/gobj.c
@@ -81,7 +81,7 @@ datagostring(Strlit *sval, Addr *a)
a->sym = linksym(sym);
a->node = sym->def;
a->offset = 0; // header
- a->etype = TINT32;
+ a->etype = TSTRING;
}
void
diff --git a/src/cmd/6g/opt.h b/src/cmd/6g/opt.h
index dbd039d89..4c9bb89fc 100644
--- a/src/cmd/6g/opt.h
+++ b/src/cmd/6g/opt.h
@@ -63,8 +63,8 @@ enum
uint32 BLOAD(Reg*);
uint32 BSTORE(Reg*);
-uint32 LOAD(Reg*);
-uint32 STORE(Reg*);
+uint64 LOAD(Reg*);
+uint64 STORE(Reg*);
*/
// A Reg is a wrapper around a single Prog (one instruction) that holds
@@ -141,7 +141,7 @@ void synch(Reg*, Bits);
uint32 allreg(uint32, Rgn*);
void paint1(Reg*, int);
uint32 paint2(Reg*, int);
-void paint3(Reg*, int, int32, int);
+void paint3(Reg*, int, uint32, int);
void addreg(Adr*, int);
void dumpone(Flow*, int);
void dumpit(char*, Flow*, int);
@@ -153,10 +153,10 @@ void peep(Prog*);
void excise(Flow*);
int copyu(Prog*, Adr*, Adr*);
-int32 RtoB(int);
-int32 FtoB(int);
-int BtoR(int32);
-int BtoF(int32);
+uint32 RtoB(int);
+uint32 FtoB(int);
+int BtoR(uint32);
+int BtoF(uint32);
/*
* prog.c
diff --git a/src/cmd/6g/reg.c b/src/cmd/6g/reg.c
index 1f757e197..4ce2f4db0 100644
--- a/src/cmd/6g/reg.c
+++ b/src/cmd/6g/reg.c
@@ -34,7 +34,7 @@
#include "opt.h"
#define NREGVAR 32 /* 16 general + 16 floating */
-#define REGBITS ((uint32)0xffffffff)
+#define REGBITS ((uint64)0xffffffffull)
/*c2go enum {
NREGVAR = 32,
REGBITS = 0xffffffff,
@@ -71,7 +71,7 @@ setaddrs(Bits bit)
i = bnum(bit);
node = var[i].node;
n = var[i].name;
- bit.b[i/32] &= ~(1L<<(i%32));
+ biclr(&bit, i);
// disable all pieces of that variable
for(i=0; i<nvar; i++) {
@@ -364,7 +364,7 @@ loop2:
rgp->varno = i;
change = 0;
paint1(r, i);
- bit.b[i/32] &= ~(1L<<(i%32));
+ biclr(&bit, i);
if(change <= 0)
continue;
rgp->cost = change;
@@ -477,7 +477,7 @@ walkvardef(Node *n, Reg *r, int active)
break;
for(v=n->opt; v!=nil; v=v->nextinnode) {
bn = v - var;
- r1->act.b[bn/32] |= 1L << (bn%32);
+ biset(&r1->act, bn);
}
if(r1->f.prog->as == ACALL)
break;
@@ -621,6 +621,9 @@ mkvar(Reg *r, Adr *a)
if(r != R)
r->use1.b[0] |= doregbits(a->index);
+ if(t >= D_INDIR && t < 2*D_INDIR)
+ goto none;
+
switch(t) {
default:
regu = doregbits(t);
@@ -822,10 +825,10 @@ prop(Reg *r, Bits ref, Bits cal)
for(z=0; z<BITS; z++) {
if(cal.b[z] == 0)
continue;
- for(i=0; i<32; i++) {
- if(z*32+i >= nvar || ((cal.b[z]>>i)&1) == 0)
+ for(i=0; i<64; i++) {
+ if(z*64+i >= nvar || ((cal.b[z]>>i)&1) == 0)
continue;
- v = var+z*32+i;
+ v = var+z*64+i;
if(v->node->opt == nil) // v represents fixed register, not Go variable
continue;
@@ -841,10 +844,10 @@ prop(Reg *r, Bits ref, Bits cal)
// This will set the bits at most twice, keeping the overall loop linear.
v1 = v->node->opt;
j = v1 - var;
- if(v == v1 || ((cal.b[j/32]>>(j&31))&1) == 0) {
+ if(v == v1 || !btest(&cal, j)) {
for(; v1 != nil; v1 = v1->nextinnode) {
j = v1 - var;
- cal.b[j/32] |= 1UL<<(j&31);
+ biset(&cal, j);
}
}
}
@@ -959,10 +962,10 @@ paint1(Reg *r, int bn)
{
Reg *r1;
int z;
- uint32 bb;
+ uint64 bb;
- z = bn/32;
- bb = 1L<<(bn%32);
+ z = bn/64;
+ bb = 1LL<<(bn%64);
if(r->act.b[z] & bb)
return;
for(;;) {
@@ -1017,54 +1020,14 @@ paint1(Reg *r, int bn)
}
uint32
-regset(Reg *r, uint32 bb)
-{
- uint32 b, set;
- Adr v;
- int c;
-
- set = 0;
- v = zprog.from;
- while(b = bb & ~(bb-1)) {
- v.type = b & 0xFFFF? BtoR(b): BtoF(b);
- if(v.type == 0)
- fatal("zero v.type for %#ux", b);
- c = copyu(r->f.prog, &v, nil);
- if(c == 3)
- set |= b;
- bb &= ~b;
- }
- return set;
-}
-
-uint32
-reguse(Reg *r, uint32 bb)
-{
- uint32 b, set;
- Adr v;
- int c;
-
- set = 0;
- v = zprog.from;
- while(b = bb & ~(bb-1)) {
- v.type = b & 0xFFFF? BtoR(b): BtoF(b);
- c = copyu(r->f.prog, &v, nil);
- if(c == 1 || c == 2 || c == 4)
- set |= b;
- bb &= ~b;
- }
- return set;
-}
-
-uint32
paint2(Reg *r, int bn)
{
Reg *r1;
int z;
- uint32 bb, vreg, x;
+ uint64 bb, vreg;
- z = bn/32;
- bb = 1L << (bn%32);
+ z = bn/64;
+ bb = 1LL << (bn%64);
vreg = regbits;
if(!(r->act.b[z] & bb))
return vreg;
@@ -1105,27 +1068,19 @@ paint2(Reg *r, int bn)
break;
}
- bb = vreg;
- for(; r; r=(Reg*)r->f.s1) {
- x = r->regu & ~bb;
- if(x) {
- vreg |= reguse(r, x);
- bb |= regset(r, x);
- }
- }
return vreg;
}
void
-paint3(Reg *r, int bn, int32 rb, int rn)
+paint3(Reg *r, int bn, uint32 rb, int rn)
{
Reg *r1;
Prog *p;
int z;
- uint32 bb;
+ uint64 bb;
- z = bn/32;
- bb = 1L << (bn%32);
+ z = bn/64;
+ bb = 1LL << (bn%64);
if(r->act.b[z] & bb)
return;
for(;;) {
@@ -1198,7 +1153,7 @@ addreg(Adr *a, int rn)
ostats.ncvtreg++;
}
-int32
+uint32
RtoB(int r)
{
@@ -1208,7 +1163,7 @@ RtoB(int r)
}
int
-BtoR(int32 b)
+BtoR(uint32 b)
{
b &= 0xffffL;
if(nacl)
@@ -1224,7 +1179,7 @@ BtoR(int32 b)
* ...
* 31 X15
*/
-int32
+uint32
FtoB(int f)
{
if(f < D_X0 || f > D_X15)
@@ -1233,7 +1188,7 @@ FtoB(int f)
}
int
-BtoF(int32 b)
+BtoF(uint32 b)
{
b &= 0xFFFF0000L;
diff --git a/src/cmd/8g/gobj.c b/src/cmd/8g/gobj.c
index fa0605e6c..af287f702 100644
--- a/src/cmd/8g/gobj.c
+++ b/src/cmd/8g/gobj.c
@@ -81,7 +81,7 @@ datagostring(Strlit *sval, Addr *a)
a->sym = linksym(sym);
a->node = sym->def;
a->offset = 0; // header
- a->etype = TINT32;
+ a->etype = TSTRING;
}
void
diff --git a/src/cmd/8g/opt.h b/src/cmd/8g/opt.h
index 09f58c40a..0e2d165b1 100644
--- a/src/cmd/8g/opt.h
+++ b/src/cmd/8g/opt.h
@@ -63,8 +63,8 @@ enum
uint32 BLOAD(Reg*);
uint32 BSTORE(Reg*);
-uint32 LOAD(Reg*);
-uint32 STORE(Reg*);
+uint64 LOAD(Reg*);
+uint64 STORE(Reg*);
*/
// A Reg is a wrapper around a single Prog (one instruction) that holds
@@ -159,7 +159,7 @@ void synch(Reg*, Bits);
uint32 allreg(uint32, Rgn*);
void paint1(Reg*, int);
uint32 paint2(Reg*, int);
-void paint3(Reg*, int, int32, int);
+void paint3(Reg*, int, uint32, int);
void addreg(Adr*, int);
void dumpone(Flow*, int);
void dumpit(char*, Flow*, int);
@@ -171,10 +171,10 @@ void peep(Prog*);
void excise(Flow*);
int copyu(Prog*, Adr*, Adr*);
-int32 RtoB(int);
-int32 FtoB(int);
-int BtoR(int32);
-int BtoF(int32);
+uint32 RtoB(int);
+uint32 FtoB(int);
+int BtoR(uint32);
+int BtoF(uint32);
/*
* prog.c
diff --git a/src/cmd/8g/reg.c b/src/cmd/8g/reg.c
index 302b273a1..79d60bed5 100644
--- a/src/cmd/8g/reg.c
+++ b/src/cmd/8g/reg.c
@@ -34,7 +34,7 @@
#include "opt.h"
#define NREGVAR 16 /* 8 integer + 8 floating */
-#define REGBITS ((uint32)0xffff)
+#define REGBITS ((uint64)0xffffull)
/*c2go enum {
NREGVAR = 16,
REGBITS = (1<<NREGVAR) - 1,
@@ -71,7 +71,7 @@ setaddrs(Bits bit)
i = bnum(bit);
node = var[i].node;
n = var[i].name;
- bit.b[i/32] &= ~(1L<<(i%32));
+ biclr(&bit, i);
// disable all pieces of that variable
for(i=0; i<nvar; i++) {
@@ -336,7 +336,7 @@ loop2:
rgp->varno = i;
change = 0;
paint1(r, i);
- bit.b[i/32] &= ~(1L<<(i%32));
+ biclr(&bit, i);
if(change <= 0)
continue;
rgp->cost = change;
@@ -446,7 +446,7 @@ walkvardef(Node *n, Reg *r, int active)
break;
for(v=n->opt; v!=nil; v=v->nextinnode) {
bn = v - var;
- r1->act.b[bn/32] |= 1L << (bn%32);
+ biset(&r1->act, bn);
}
if(r1->f.prog->as == ACALL)
break;
@@ -788,10 +788,10 @@ prop(Reg *r, Bits ref, Bits cal)
for(z=0; z<BITS; z++) {
if(cal.b[z] == 0)
continue;
- for(i=0; i<32; i++) {
- if(z*32+i >= nvar || ((cal.b[z]>>i)&1) == 0)
+ for(i=0; i<64; i++) {
+ if(z*64+i >= nvar || ((cal.b[z]>>i)&1) == 0)
continue;
- v = var+z*32+i;
+ v = var+z*64+i;
if(v->node->opt == nil) // v represents fixed register, not Go variable
continue;
@@ -807,10 +807,10 @@ prop(Reg *r, Bits ref, Bits cal)
// This will set the bits at most twice, keeping the overall loop linear.
v1 = v->node->opt;
j = v1 - var;
- if(v == v1 || ((cal.b[j/32]>>(j&31))&1) == 0) {
+ if(v == v1 || !btest(&cal, j)) {
for(; v1 != nil; v1 = v1->nextinnode) {
j = v1 - var;
- cal.b[j/32] |= 1<<(j&31);
+ biset(&cal, j);
}
}
}
@@ -926,10 +926,10 @@ paint1(Reg *r, int bn)
Reg *r1;
Prog *p;
int z;
- uint32 bb;
+ uint64 bb;
- z = bn/32;
- bb = 1L<<(bn%32);
+ z = bn/64;
+ bb = 1LL<<(bn%64);
if(r->act.b[z] & bb)
return;
for(;;) {
@@ -996,52 +996,14 @@ paint1(Reg *r, int bn)
}
uint32
-regset(Reg *r, uint32 bb)
-{
- uint32 b, set;
- Adr v;
- int c;
-
- set = 0;
- v = zprog.from;
- while(b = bb & ~(bb-1)) {
- v.type = b & 0xFF ? BtoR(b): BtoF(b);
- c = copyu(r->f.prog, &v, nil);
- if(c == 3)
- set |= b;
- bb &= ~b;
- }
- return set;
-}
-
-uint32
-reguse(Reg *r, uint32 bb)
-{
- uint32 b, set;
- Adr v;
- int c;
-
- set = 0;
- v = zprog.from;
- while(b = bb & ~(bb-1)) {
- v.type = b & 0xFF ? BtoR(b): BtoF(b);
- c = copyu(r->f.prog, &v, nil);
- if(c == 1 || c == 2 || c == 4)
- set |= b;
- bb &= ~b;
- }
- return set;
-}
-
-uint32
paint2(Reg *r, int bn)
{
Reg *r1;
int z;
- uint32 bb, vreg, x;
+ uint64 bb, vreg;
- z = bn/32;
- bb = 1L << (bn%32);
+ z = bn/64;
+ bb = 1LL << (bn%64);
vreg = regbits;
if(!(r->act.b[z] & bb))
return vreg;
@@ -1082,27 +1044,19 @@ paint2(Reg *r, int bn)
break;
}
- bb = vreg;
- for(; r; r=(Reg*)r->f.s1) {
- x = r->regu & ~bb;
- if(x) {
- vreg |= reguse(r, x);
- bb |= regset(r, x);
- }
- }
return vreg;
}
void
-paint3(Reg *r, int bn, int32 rb, int rn)
+paint3(Reg *r, int bn, uint32 rb, int rn)
{
Reg *r1;
Prog *p;
int z;
- uint32 bb;
+ uint64 bb;
- z = bn/32;
- bb = 1L << (bn%32);
+ z = bn/64;
+ bb = 1LL << (bn%64);
if(r->act.b[z] & bb)
return;
for(;;) {
@@ -1175,7 +1129,7 @@ addreg(Adr *a, int rn)
ostats.ncvtreg++;
}
-int32
+uint32
RtoB(int r)
{
@@ -1185,7 +1139,7 @@ RtoB(int r)
}
int
-BtoR(int32 b)
+BtoR(uint32 b)
{
b &= 0xffL;
@@ -1194,7 +1148,7 @@ BtoR(int32 b)
return bitno(b) + D_AX;
}
-int32
+uint32
FtoB(int f)
{
if(f < D_X0 || f > D_X7)
@@ -1203,7 +1157,7 @@ FtoB(int f)
}
int
-BtoF(int32 b)
+BtoF(uint32 b)
{
b &= 0xFF00L;
if(b == 0)
diff --git a/src/cmd/9g/ggen.c b/src/cmd/9g/ggen.c
index c41d8eb41..7d9cf5050 100644
--- a/src/cmd/9g/ggen.c
+++ b/src/cmd/9g/ggen.c
@@ -900,7 +900,7 @@ ret:
void
clearfat(Node *nl)
{
- uint64 w, c, q, t;
+ uint64 w, c, q, t, boff;
Node dst, end, r0, *f;
Prog *p, *pl;
@@ -944,6 +944,8 @@ clearfat(Node *nl)
patch(gbranch(ABNE, T, 0), pl);
regfree(&end);
+ // The loop leaves R3 on the last zeroed dword
+ boff = 8;
} else if(q >= 4) {
p = gins(ASUB, N, &dst);
p->from.type = D_CONST;
@@ -953,17 +955,21 @@ clearfat(Node *nl)
afunclit(&p->to, f);
// 4 and 128 = magic constants: see ../../runtime/asm_power64x.s
p->to.offset = 4*(128-q);
- } else
- for(t = 0; t < q; t++) {
- p = gins(AMOVD, &r0, &dst);
- p->to.type = D_OREG;
- p->to.offset = 8*t;
+ // duffzero leaves R3 on the last zeroed dword
+ boff = 8;
+ } else {
+ for(t = 0; t < q; t++) {
+ p = gins(AMOVD, &r0, &dst);
+ p->to.type = D_OREG;
+ p->to.offset = 8*t;
+ }
+ boff = 8*q;
}
for(t = 0; t < c; t++) {
p = gins(AMOVB, &r0, &dst);
p->to.type = D_OREG;
- p->to.offset = t;
+ p->to.offset = t+boff;
}
reg[REGRT1]--;
}
diff --git a/src/cmd/9g/gobj.c b/src/cmd/9g/gobj.c
index fdd7606bc..3da55878a 100644
--- a/src/cmd/9g/gobj.c
+++ b/src/cmd/9g/gobj.c
@@ -89,7 +89,7 @@ datagostring(Strlit *sval, Addr *a)
a->reg = NREG;
a->node = sym->def;
a->offset = 0; // header
- a->etype = TINT32;
+ a->etype = TSTRING;
}
void
diff --git a/src/cmd/9g/gsubr.c b/src/cmd/9g/gsubr.c
index d8b62b1da..e5cd5ed4b 100644
--- a/src/cmd/9g/gsubr.c
+++ b/src/cmd/9g/gsubr.c
@@ -1001,10 +1001,13 @@ hard:
Prog*
gins(int as, Node *f, Node *t)
{
- //int32 w;
+ int32 w;
Prog *p;
Addr af, at;
+ // TODO(austin): Add self-move test like in 6g (but be careful
+ // of truncation moves)
+
memset(&af, 0, sizeof af);
memset(&at, 0, sizeof at);
if(f != N)
@@ -1021,9 +1024,6 @@ gins(int as, Node *f, Node *t)
if(debug['g'])
print("%P\n", p);
- // TODO(minux): enable these.
- // right now it fails on MOVD $type."".TypeAssertionError(SB) [width=1], R7 [width=8]
- /*
w = 0;
switch(as) {
case AMOVB:
@@ -1046,15 +1046,16 @@ gins(int as, Node *f, Node *t)
break;
case AMOVD:
case AMOVDU:
+ if(af.type == D_CONST)
+ break;
w = 8;
break;
}
- if(w != 0 && ((f != N && af.width < w) || (t != N && at.width > w))) {
+ if(w != 0 && ((f != N && af.width < w) || (t != N && at.type != D_REG && at.width > w))) {
dump("f", f);
dump("t", t);
fatal("bad width: %P (%d, %d)\n", p, af.width, at.width);
}
- */
return p;
}
@@ -1116,12 +1117,9 @@ naddr(Node *n, Addr *a, int canemitcode)
case ONAME:
a->etype = 0;
- a->width = 0;
a->reg = NREG;
- if(n->type != T) {
+ if(n->type != T)
a->etype = simtype[n->type->etype];
- a->width = n->type->width;
- }
a->offset = n->xoffset;
s = n->sym;
a->node = n->orig;
@@ -1242,15 +1240,16 @@ naddr(Node *n, Addr *a, int canemitcode)
naddr(n->left, a, canemitcode);
a->etype = simtype[tptr];
if(a->type == D_CONST && a->offset == 0)
- break; // len(nil)
+ break; // itab(nil)
+ a->width = widthptr;
break;
case OSPTR:
// pointer in a string or slice
naddr(n->left, a, canemitcode);
+ a->etype = simtype[tptr];
if(a->type == D_CONST && a->offset == 0)
break; // ptr(nil)
- a->etype = simtype[tptr];
a->offset += Array_array;
a->width = widthptr;
break;
@@ -1262,6 +1261,7 @@ naddr(Node *n, Addr *a, int canemitcode)
if(a->type == D_CONST && a->offset == 0)
break; // len(nil)
a->offset += Array_nel;
+ a->width = widthint;
break;
case OCAP:
@@ -1271,6 +1271,7 @@ naddr(Node *n, Addr *a, int canemitcode)
if(a->type == D_CONST && a->offset == 0)
break; // cap(nil)
a->offset += Array_cap;
+ a->width = widthint;
break;
case OADDR:
@@ -1288,6 +1289,7 @@ naddr(Node *n, Addr *a, int canemitcode)
default:
fatal("naddr: OADDR %d\n", a->type);
}
+ break;
}
}
diff --git a/src/cmd/9g/opt.h b/src/cmd/9g/opt.h
index d3cbcb957..7f15b5a69 100644
--- a/src/cmd/9g/opt.h
+++ b/src/cmd/9g/opt.h
@@ -70,24 +70,40 @@ struct Reg
{
Flow f;
- Bits set; // variables written by this instruction.
- Bits use1; // variables read by prog->from.
- Bits use2; // variables read by prog->to.
+ Bits set; // regopt variables written by this instruction.
+ Bits use1; // regopt variables read by prog->from.
+ Bits use2; // regopt variables read by prog->to.
+ // refahead/refbehind are the regopt variables whose current
+ // value may be used in the following/preceding instructions
+ // up to a CALL (or the value is clobbered).
Bits refbehind;
Bits refahead;
+ // calahead/calbehind are similar, but for variables in
+ // instructions that are reachable after hitting at least one
+ // CALL.
Bits calbehind;
Bits calahead;
Bits regdiff;
Bits act;
- int32 regu; // register used bitmap
+ uint64 regu; // register used bitmap
};
#define R ((Reg*)0)
/*c2go extern Reg *R; */
#define NRGN 600
/*c2go enum { NRGN = 600 }; */
+
+// A Rgn represents a single regopt variable over a region of code
+// where a register could potentially be dedicated to that variable.
+// The code encompassed by a Rgn is defined by the flow graph,
+// starting at enter, flood-filling forward while varno is refahead
+// and backward while varno is refbehind, and following branches. A
+// single variable may be represented by multiple disjoint Rgns and
+// each Rgn may choose a different register for that variable.
+// Registers are allocated to regions greedily in order of descending
+// cost.
struct Rgn
{
Reg* enter;
@@ -104,7 +120,7 @@ EXTERN Rgn* rgp;
EXTERN int nregion;
EXTERN int nvar;
EXTERN int32 regbits;
-EXTERN int32 exregbits;
+EXTERN int32 exregbits; // TODO(austin) not used; remove
EXTERN Bits externs;
EXTERN Bits params;
EXTERN Bits consts;
@@ -118,10 +134,8 @@ EXTERN struct
{
int32 ncvtreg;
int32 nspill;
- int32 nreload;
int32 ndelmov;
int32 nvar;
- int32 naddr;
} ostats;
/*
@@ -133,10 +147,10 @@ void addmove(Reg*, int, int, int);
Bits mkvar(Reg*, Adr*);
void prop(Reg*, Bits, Bits);
void synch(Reg*, Bits);
-uint32 allreg(uint32, Rgn*);
+uint64 allreg(uint64, Rgn*);
void paint1(Reg*, int);
-uint32 paint2(Reg*, int);
-void paint3(Reg*, int, int32, int);
+uint64 paint2(Reg*, int, int);
+void paint3(Reg*, int, uint64, int);
void addreg(Adr*, int);
void dumpone(Flow*, int);
void dumpit(char*, Flow*, int);
@@ -160,8 +174,8 @@ typedef struct ProgInfo ProgInfo;
struct ProgInfo
{
uint32 flags; // the bits below
- uint64 reguse; // required registers used by this instruction
- uint64 regset; // required registers set by this instruction
+ uint64 reguse; // registers implicitly used by this instruction
+ uint64 regset; // registers implicitly set by this instruction
uint64 regindex; // registers used by addressing mode
};
@@ -182,20 +196,21 @@ enum
SizeF = 1<<7, // float aka float32
SizeD = 1<<8, // double aka float64
- // Left side: address taken, read, write.
+ // Left side (Prog.from): address taken, read, write.
LeftAddr = 1<<9,
LeftRead = 1<<10,
LeftWrite = 1<<11,
-
- // Register in middle; never written.
+
+ // Register in middle (Prog.reg); only ever read.
RegRead = 1<<12,
CanRegRead = 1<<13,
-
- // Right side: address taken, read, write.
+
+ // Right side (Prog.to): address taken, read, write.
RightAddr = 1<<14,
RightRead = 1<<15,
RightWrite = 1<<16,
+ // Instruction updates whichever of from/to is type D_OREG
PostInc = 1<<17,
// Instruction kinds
diff --git a/src/cmd/9g/peep.c b/src/cmd/9g/peep.c
index 5721d7b04..ec314d633 100644
--- a/src/cmd/9g/peep.c
+++ b/src/cmd/9g/peep.c
@@ -44,13 +44,15 @@ peep(Prog *p)
void
excise(Flow *r)
{
- Prog *p;
+ Prog *p, *l;
p = r->prog;
if(debug['P'] && debug['v'])
print("%P ===delete===\n", p);
+ l = p->link;
*p = zprog;
p->as = ANOP;
+ p->link = l;
ostats.ndelmov++;
}
diff --git a/src/cmd/9g/prog.c b/src/cmd/9g/prog.c
index 0a51a533a..e3e50f28a 100644
--- a/src/cmd/9g/prog.c
+++ b/src/cmd/9g/prog.c
@@ -96,11 +96,8 @@ static ProgInfo progtable[ALAST] = {
[ABGT]= {Cjmp},
[ABLE]= {Cjmp},
[ARETURN]= {Break},
- // In addtion, duffzero reads R0,R2 and writes R2. This fact must be
- // encoded in peep.c (TODO)
+
[ADUFFZERO]= {Call},
- // In addtion, duffcopy reads R0,R2,R3 and writes R2,R3. This fact must be
- // encoded in peep.c (TODO)
[ADUFFCOPY]= {Call},
};
@@ -118,14 +115,14 @@ proginfo(ProgInfo *info, Prog *p)
info->flags |= /*CanRegRead |*/ RightRead;
}
- if(p->from.type == D_OREG && p->from.reg != NREG) {
- info->reguse |= RtoB(p->from.reg);
+ if((p->from.type == D_OREG || p->from.type == D_CONST) && p->from.reg != NREG) {
+ info->regindex |= RtoB(p->from.reg);
if(info->flags & PostInc) {
info->regset |= RtoB(p->from.reg);
}
}
- if(p->to.type == D_OREG && p->to.reg != NREG) {
- info->reguse |= RtoB(p->to.reg);
+ if((p->to.type == D_OREG || p->to.type == D_CONST) && p->to.reg != NREG) {
+ info->regindex |= RtoB(p->to.reg);
if(info->flags & PostInc) {
info->regset |= RtoB(p->to.reg);
}
@@ -135,4 +132,13 @@ proginfo(ProgInfo *info, Prog *p)
info->flags &= ~LeftRead;
info->flags |= LeftAddr;
}
+
+ if(p->as == ADUFFZERO) {
+ info->reguse |= RtoB(0) | RtoB(2);
+ info->regset |= RtoB(2);
+ }
+ if(p->as == ADUFFCOPY) {
+ info->reguse |= RtoB(0) | RtoB(2) | RtoB(3);
+ info->regset |= RtoB(2) | RtoB(3);
+ }
}
diff --git a/src/cmd/9g/reg.c b/src/cmd/9g/reg.c
index bbebf3fe0..b911a2399 100644
--- a/src/cmd/9g/reg.c
+++ b/src/cmd/9g/reg.c
@@ -33,14 +33,1197 @@
#include "gg.h"
#include "opt.h"
+#define NREGVAR 64 /* 32 general + 32 floating */
+#define REGBITS ((uint64)0xffffffffffffffffull)
+/*c2go enum {
+ NREGVAR = 64,
+ REGBITS = 0xffffffffffffffff,
+};
+*/
+
+static Reg* firstr;
+static int first = 1;
+
+int
+rcmp(const void *a1, const void *a2)
+{
+ Rgn *p1, *p2;
+ int c1, c2;
+
+ p1 = (Rgn*)a1;
+ p2 = (Rgn*)a2;
+ c1 = p2->cost;
+ c2 = p1->cost;
+ if(c1 -= c2)
+ return c1;
+ return p2->varno - p1->varno;
+}
+
+static void
+setaddrs(Bits bit)
+{
+ int i, n;
+ Var *v;
+ Node *node;
+
+ while(bany(&bit)) {
+ // convert each bit to a variable
+ i = bnum(bit);
+ node = var[i].node;
+ n = var[i].name;
+ biclr(&bit, i);
+
+ // disable all pieces of that variable
+ for(i=0; i<nvar; i++) {
+ v = var+i;
+ if(v->node == node && v->name == n)
+ v->addr = 2;
+ }
+ }
+}
+
+static char* regname[] = {
+ ".R0",
+ ".R1",
+ ".R2",
+ ".R3",
+ ".R4",
+ ".R5",
+ ".R6",
+ ".R7",
+ ".R8",
+ ".R9",
+ ".R10",
+ ".R11",
+ ".R12",
+ ".R13",
+ ".R14",
+ ".R15",
+ ".R16",
+ ".R17",
+ ".R18",
+ ".R19",
+ ".R20",
+ ".R21",
+ ".R22",
+ ".R23",
+ ".R24",
+ ".R25",
+ ".R26",
+ ".R27",
+ ".R28",
+ ".R29",
+ ".R30",
+ ".R31",
+ ".F0",
+ ".F1",
+ ".F2",
+ ".F3",
+ ".F4",
+ ".F5",
+ ".F6",
+ ".F7",
+ ".F8",
+ ".F9",
+ ".F10",
+ ".F11",
+ ".F12",
+ ".F13",
+ ".F14",
+ ".F15",
+ ".F16",
+ ".F17",
+ ".F18",
+ ".F19",
+ ".F20",
+ ".F21",
+ ".F22",
+ ".F23",
+ ".F24",
+ ".F25",
+ ".F26",
+ ".F27",
+ ".F28",
+ ".F29",
+ ".F30",
+ ".F31",
+};
+
+static Node* regnodes[NREGVAR];
+
+static void walkvardef(Node *n, Reg *r, int active);
+
void
-regopt(Prog *p)
+regopt(Prog *firstp)
{
- USED(p);
- // TODO(minux)
+ Reg *r, *r1;
+ Prog *p;
+ Graph *g;
+ ProgInfo info;
+ int i, z, active;
+ uint64 vreg, usedreg;
+ Bits bit;
+
+ if(first) {
+ fmtinstall('Q', Qconv);
+ first = 0;
+ }
+
+ mergetemp(firstp);
+
+ /*
+ * control flow is more complicated in generated go code
+ * than in generated c code. define pseudo-variables for
+ * registers, so we have complete register usage information.
+ */
+ nvar = NREGVAR;
+ memset(var, 0, NREGVAR*sizeof var[0]);
+ for(i=0; i<NREGVAR; i++) {
+ if(regnodes[i] == N)
+ regnodes[i] = newname(lookup(regname[i]));
+ var[i].node = regnodes[i];
+ }
+
+ // Exclude registers with fixed functions
+ regbits = (1<<D_R0)|RtoB(REGSP)|RtoB(REGG);
+ // Also exclude floating point registers with fixed constants
+ regbits |= FtoB(D_F0+27)|FtoB(D_F0+28)|FtoB(D_F0+29)|FtoB(D_F0+30)|FtoB(D_F0+31);
+ externs = zbits;
+ params = zbits;
+ consts = zbits;
+ addrs = zbits;
+ ivar = zbits;
+ ovar = zbits;
+
+ /*
+ * pass 1
+ * build aux data structure
+ * allocate pcs
+ * find use and set of variables
+ */
+ g = flowstart(firstp, sizeof(Reg));
+ if(g == nil) {
+ for(i=0; i<nvar; i++)
+ var[i].node->opt = nil;
+ return;
+ }
+
+ firstr = (Reg*)g->start;
+
+ for(r = firstr; r != R; r = (Reg*)r->f.link) {
+ p = r->f.prog;
+ if(p->as == AVARDEF || p->as == AVARKILL)
+ continue;
+ proginfo(&info, p);
+
+ // Avoid making variables for direct-called functions.
+ if(p->as == ABL && p->to.name == D_EXTERN)
+ continue;
+
+ // from vs to doesn't matter for registers
+ r->use1.b[0] |= info.reguse | info.regindex;
+ r->set.b[0] |= info.regset;
+
+ // Compute used register for from
+ bit = mkvar(r, &p->from);
+ if(info.flags & LeftAddr)
+ setaddrs(bit);
+ if(info.flags & LeftRead)
+ for(z=0; z<BITS; z++)
+ r->use1.b[z] |= bit.b[z];
+
+ // Compute used register for reg
+ if(info.flags & RegRead) {
+ if(p->from.type != D_FREG)
+ r->use1.b[0] |= RtoB(p->reg);
+ else
+ r->use1.b[0] |= FtoB(D_F0+p->reg);
+ }
+
+ // Currently we never generate three register forms.
+ // If we do, this will need to change.
+ if(p->from3.type != D_NONE)
+ fatal("regopt not implemented for from3");
+
+ // Compute used register for to
+ bit = mkvar(r, &p->to);
+ if(info.flags & RightAddr)
+ setaddrs(bit);
+ if(info.flags & RightRead)
+ for(z=0; z<BITS; z++)
+ r->use2.b[z] |= bit.b[z];
+ if(info.flags & RightWrite)
+ for(z=0; z<BITS; z++)
+ r->set.b[z] |= bit.b[z];
+ }
+
+ for(i=0; i<nvar; i++) {
+ Var *v = var+i;
+ if(v->addr) {
+ bit = blsh(i);
+ for(z=0; z<BITS; z++)
+ addrs.b[z] |= bit.b[z];
+ }
+
+ if(debug['R'] && debug['v'])
+ print("bit=%2d addr=%d et=%-6E w=%-2d s=%N + %lld\n",
+ i, v->addr, v->etype, v->width, v->node, v->offset);
+ }
+
+ if(debug['R'] && debug['v'])
+ dumpit("pass1", &firstr->f, 1);
+
+ /*
+ * pass 2
+ * find looping structure
+ */
+ flowrpo(g);
+
+ if(debug['R'] && debug['v'])
+ dumpit("pass2", &firstr->f, 1);
+
+ /*
+ * pass 2.5
+ * iterate propagating fat vardef covering forward
+ * r->act records vars with a VARDEF since the last CALL.
+ * (r->act will be reused in pass 5 for something else,
+ * but we'll be done with it by then.)
+ */
+ active = 0;
+ for(r = firstr; r != R; r = (Reg*)r->f.link) {
+ r->f.active = 0;
+ r->act = zbits;
+ }
+ for(r = firstr; r != R; r = (Reg*)r->f.link) {
+ p = r->f.prog;
+ if(p->as == AVARDEF && isfat(p->to.node->type) && p->to.node->opt != nil) {
+ active++;
+ walkvardef(p->to.node, r, active);
+ }
+ }
+
+ /*
+ * pass 3
+ * iterate propagating usage
+ * back until flow graph is complete
+ */
+loop1:
+ change = 0;
+ for(r = firstr; r != R; r = (Reg*)r->f.link)
+ r->f.active = 0;
+ for(r = firstr; r != R; r = (Reg*)r->f.link)
+ if(r->f.prog->as == ARET)
+ prop(r, zbits, zbits);
+loop11:
+ /* pick up unreachable code */
+ i = 0;
+ for(r = firstr; r != R; r = r1) {
+ r1 = (Reg*)r->f.link;
+ if(r1 && r1->f.active && !r->f.active) {
+ prop(r, zbits, zbits);
+ i = 1;
+ }
+ }
+ if(i)
+ goto loop11;
+ if(change)
+ goto loop1;
+
+ if(debug['R'] && debug['v'])
+ dumpit("pass3", &firstr->f, 1);
+
+ /*
+ * pass 4
+ * iterate propagating register/variable synchrony
+ * forward until graph is complete
+ */
+loop2:
+ change = 0;
+ for(r = firstr; r != R; r = (Reg*)r->f.link)
+ r->f.active = 0;
+ synch(firstr, zbits);
+ if(change)
+ goto loop2;
+
+ if(debug['R'] && debug['v'])
+ dumpit("pass4", &firstr->f, 1);
+
+ /*
+ * pass 4.5
+ * move register pseudo-variables into regu.
+ */
+ for(r = firstr; r != R; r = (Reg*)r->f.link) {
+ r->regu = (r->refbehind.b[0] | r->set.b[0]) & REGBITS;
+
+ r->set.b[0] &= ~REGBITS;
+ r->use1.b[0] &= ~REGBITS;
+ r->use2.b[0] &= ~REGBITS;
+ r->refbehind.b[0] &= ~REGBITS;
+ r->refahead.b[0] &= ~REGBITS;
+ r->calbehind.b[0] &= ~REGBITS;
+ r->calahead.b[0] &= ~REGBITS;
+ r->regdiff.b[0] &= ~REGBITS;
+ r->act.b[0] &= ~REGBITS;
+ }
+
+ if(debug['R'] && debug['v'])
+ dumpit("pass4.5", &firstr->f, 1);
+
+ /*
+ * pass 5
+ * isolate regions
+ * calculate costs (paint1)
+ */
+ r = firstr;
+ if(r) {
+ for(z=0; z<BITS; z++)
+ bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) &
+ ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]);
+ if(bany(&bit) && !r->f.refset) {
+ // should never happen - all variables are preset
+ if(debug['w'])
+ print("%L: used and not set: %Q\n", r->f.prog->lineno, bit);
+ r->f.refset = 1;
+ }
+ }
+ for(r = firstr; r != R; r = (Reg*)r->f.link)
+ r->act = zbits;
+ rgp = region;
+ nregion = 0;
+ for(r = firstr; r != R; r = (Reg*)r->f.link) {
+ for(z=0; z<BITS; z++)
+ bit.b[z] = r->set.b[z] &
+ ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]);
+ if(bany(&bit) && !r->f.refset) {
+ if(debug['w'])
+ print("%L: set and not used: %Q\n", r->f.prog->lineno, bit);
+ r->f.refset = 1;
+ excise(&r->f);
+ }
+ for(z=0; z<BITS; z++)
+ bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]);
+ while(bany(&bit)) {
+ i = bnum(bit);
+ rgp->enter = r;
+ rgp->varno = i;
+ change = 0;
+ paint1(r, i);
+ biclr(&bit, i);
+ if(change <= 0)
+ continue;
+ rgp->cost = change;
+ nregion++;
+ if(nregion >= NRGN) {
+ if(debug['R'] && debug['v'])
+ print("too many regions\n");
+ goto brk;
+ }
+ rgp++;
+ }
+ }
+brk:
+ qsort(region, nregion, sizeof(region[0]), rcmp);
+
+ if(debug['R'] && debug['v'])
+ dumpit("pass5", &firstr->f, 1);
+
+ /*
+ * pass 6
+ * determine used registers (paint2)
+ * replace code (paint3)
+ */
+ rgp = region;
+ if(debug['R'] && debug['v'])
+ print("\nregisterizing\n");
+ for(i=0; i<nregion; i++) {
+ if(debug['R'] && debug['v'])
+ print("region %d: cost %d varno %d enter %d\n", i, rgp->cost, rgp->varno, rgp->enter->f.prog->pc);
+ bit = blsh(rgp->varno);
+ usedreg = paint2(rgp->enter, rgp->varno, 0);
+ vreg = allreg(usedreg, rgp);
+ if(rgp->regno != 0) {
+ if(debug['R'] && debug['v']) {
+ Var *v;
+
+ v = var + rgp->varno;
+ print("registerize %N+%lld (bit=%2d et=%2E) in %R usedreg=%llx vreg=%llx\n",
+ v->node, v->offset, rgp->varno, v->etype, rgp->regno, usedreg, vreg);
+ }
+ paint3(rgp->enter, rgp->varno, vreg, rgp->regno);
+ }
+ rgp++;
+ }
+
+ /*
+ * free aux structures. peep allocates new ones.
+ */
+ for(i=0; i<nvar; i++)
+ var[i].node->opt = nil;
+ flowend(g);
+ firstr = R;
+
+ if(debug['R'] && debug['v']) {
+ // Rebuild flow graph, since we inserted instructions
+ g = flowstart(firstp, sizeof(Reg));
+ firstr = (Reg*)g->start;
+ dumpit("pass6", &firstr->f, 1);
+ flowend(g);
+ firstr = R;
+ }
+
+ /*
+ * pass 7
+ * peep-hole on basic block
+ */
+ if(!debug['R'] || debug['P'])
+ peep(firstp);
+
+ /*
+ * eliminate nops
+ */
+ for(p=firstp; p!=P; p=p->link) {
+ while(p->link != P && p->link->as == ANOP)
+ p->link = p->link->link;
+ if(p->to.type == D_BRANCH)
+ while(p->to.u.branch != P && p->to.u.branch->as == ANOP)
+ p->to.u.branch = p->to.u.branch->link;
+ }
+
+ if(debug['R']) {
+ if(ostats.ncvtreg ||
+ ostats.nspill ||
+ ostats.ndelmov ||
+ ostats.nvar ||
+ 0)
+ print("\nstats\n");
+
+ if(ostats.ncvtreg)
+ print(" %4d cvtreg\n", ostats.ncvtreg);
+ if(ostats.nspill)
+ print(" %4d spill\n", ostats.nspill);
+ if(ostats.ndelmov)
+ print(" %4d delmov\n", ostats.ndelmov);
+ if(ostats.nvar)
+ print(" %4d var\n", ostats.nvar);
+
+ memset(&ostats, 0, sizeof(ostats));
+ }
+
return;
}
+static void
+walkvardef(Node *n, Reg *r, int active)
+{
+ Reg *r1, *r2;
+ int bn;
+ Var *v;
+
+ for(r1=r; r1!=R; r1=(Reg*)r1->f.s1) {
+ if(r1->f.active == active)
+ break;
+ r1->f.active = active;
+ if(r1->f.prog->as == AVARKILL && r1->f.prog->to.node == n)
+ break;
+ for(v=n->opt; v!=nil; v=v->nextinnode) {
+ bn = v - var;
+ biset(&r1->act, bn);
+ }
+ if(r1->f.prog->as == ABL)
+ break;
+ }
+
+ for(r2=r; r2!=r1; r2=(Reg*)r2->f.s1)
+ if(r2->f.s2 != nil)
+ walkvardef(n, (Reg*)r2->f.s2, active);
+}
+
+/*
+ * add mov b,rn
+ * just after r
+ */
+void
+addmove(Reg *r, int bn, int rn, int f)
+{
+ Prog *p, *p1, *p2;
+ Adr *a;
+ Var *v;
+
+ p1 = mal(sizeof(*p1));
+ *p1 = zprog;
+ p = r->f.prog;
+
+ // If there's a stack fixup coming (ADD $n,R1 after BL newproc or BL deferproc),
+ // delay the load until after the fixup.
+ p2 = p->link;
+ if(p2 && p2->as == AADD && p2->to.reg == REGSP && p2->to.type == D_REG)
+ p = p2;
+
+ p1->link = p->link;
+ p->link = p1;
+ p1->lineno = p->lineno;
+
+ v = var + bn;
+
+ a = &p1->to;
+ a->name = v->name;
+ a->node = v->node;
+ a->sym = linksym(v->node->sym);
+ a->offset = v->offset;
+ a->etype = v->etype;
+ a->type = D_OREG;
+ if(a->etype == TARRAY || a->sym == nil)
+ a->type = D_CONST;
+
+ if(v->addr)
+ fatal("addmove: shouldn't be doing this %A\n", a);
+
+ switch(v->etype) {
+ default:
+ print("What is this %E\n", v->etype);
+
+ case TINT8:
+ p1->as = AMOVB;
+ break;
+ case TBOOL:
+ case TUINT8:
+//print("movbu %E %d %S\n", v->etype, bn, v->sym);
+ p1->as = AMOVBZ;
+ break;
+ case TINT16:
+ p1->as = AMOVH;
+ break;
+ case TUINT16:
+ p1->as = AMOVHZ;
+ break;
+ case TINT32:
+ p1->as = AMOVW;
+ break;
+ case TUINT32:
+ case TPTR32:
+ p1->as = AMOVWZ;
+ break;
+ case TINT64:
+ case TUINT64:
+ case TPTR64:
+ p1->as = AMOVD;
+ break;
+ case TFLOAT32:
+ p1->as = AFMOVS;
+ break;
+ case TFLOAT64:
+ p1->as = AFMOVD;
+ break;
+ }
+
+ p1->from.type = D_REG;
+ p1->from.reg = rn;
+ if(rn >= NREG) {
+ p1->from.type = D_FREG;
+ p1->from.reg = rn-NREG;
+ }
+ if(!f) {
+ p1->from = *a;
+ *a = zprog.from;
+ a->type = D_REG;
+ a->reg = rn;
+ if(rn >= NREG) {
+ a->type = D_FREG;
+ a->reg = rn-NREG;
+ }
+ if(v->etype == TUINT8 || v->etype == TBOOL)
+ p1->as = AMOVBZ;
+ if(v->etype == TUINT16)
+ p1->as = AMOVHZ;
+ }
+ if(debug['R'])
+ print("%P\t.a%P\n", p, p1);
+ ostats.nspill++;
+}
+
+static int
+overlap(int64 o1, int w1, int64 o2, int w2)
+{
+ int64 t1, t2;
+
+ t1 = o1+w1;
+ t2 = o2+w2;
+
+ if(!(t1 > o2 && t2 > o1))
+ return 0;
+
+ return 1;
+}
+
+Bits
+mkvar(Reg *r, Adr *a)
+{
+ USED(r);
+ Var *v;
+ int i, t, n, et, z, flag;
+ int64 w;
+ int64 o;
+ Bits bit;
+ Node *node;
+
+ // mark registers used
+ t = a->type;
+ switch(t) {
+ default:
+ print("type %d %d %D\n", t, a->name, a);
+ goto none;
+
+ case D_NONE:
+ goto none;
+
+ case D_BRANCH:
+ case D_CONST:
+ case D_FCONST:
+ case D_SCONST:
+ case D_SPR:
+ case D_OREG:
+ break;
+
+ case D_REG:
+ if(a->reg != NREG) {
+ bit = zbits;
+ bit.b[0] = RtoB(a->reg);
+ return bit;
+ }
+ break;
+
+ case D_FREG:
+ if(a->reg != NREG) {
+ bit = zbits;
+ bit.b[0] = FtoB(D_F0+a->reg);
+ return bit;
+ }
+ break;
+ }
+
+ switch(a->name) {
+ default:
+ goto none;
+
+ case D_EXTERN:
+ case D_STATIC:
+ case D_AUTO:
+ case D_PARAM:
+ n = a->name;
+ break;
+ }
+
+ node = a->node;
+ if(node == N || node->op != ONAME || node->orig == N)
+ goto none;
+ node = node->orig;
+ if(node->orig != node)
+ fatal("%D: bad node", a);
+ if(node->sym == S || node->sym->name[0] == '.')
+ goto none;
+ et = a->etype;
+ o = a->offset;
+ w = a->width;
+ if(w < 0)
+ fatal("bad width %lld for %D", w, a);
+
+ flag = 0;
+ for(i=0; i<nvar; i++) {
+ v = var+i;
+ if(v->node == node && v->name == n) {
+ if(v->offset == o)
+ if(v->etype == et)
+ if(v->width == w)
+ return blsh(i);
+
+ // if they overlap, disable both
+ if(overlap(v->offset, v->width, o, w)) {
+ v->addr = 1;
+ flag = 1;
+ }
+ }
+ }
+
+ switch(et) {
+ case 0:
+ case TFUNC:
+ goto none;
+ }
+
+ if(nvar >= NVAR) {
+ if(debug['w'] > 1 && node != N)
+ fatal("variable not optimized: %#N", node);
+
+ // If we're not tracking a word in a variable, mark the rest as
+ // having its address taken, so that we keep the whole thing
+ // live at all calls. otherwise we might optimize away part of
+ // a variable but not all of it.
+ for(i=0; i<nvar; i++) {
+ v = var+i;
+ if(v->node == node)
+ v->addr = 1;
+ }
+ goto none;
+ }
+
+ i = nvar;
+ nvar++;
+ v = var+i;
+ v->offset = o;
+ v->name = n;
+ v->etype = et;
+ v->width = w;
+ v->addr = flag; // funny punning
+ v->node = node;
+
+ // node->opt is the head of a linked list
+ // of Vars within the given Node, so that
+ // we can start at a Var and find all the other
+ // Vars in the same Go variable.
+ v->nextinnode = node->opt;
+ node->opt = v;
+
+ bit = blsh(i);
+ if(n == D_EXTERN || n == D_STATIC)
+ for(z=0; z<BITS; z++)
+ externs.b[z] |= bit.b[z];
+ if(n == D_PARAM)
+ for(z=0; z<BITS; z++)
+ params.b[z] |= bit.b[z];
+
+ if(node->class == PPARAM)
+ for(z=0; z<BITS; z++)
+ ivar.b[z] |= bit.b[z];
+ if(node->class == PPARAMOUT)
+ for(z=0; z<BITS; z++)
+ ovar.b[z] |= bit.b[z];
+
+ // Treat values with their address taken as live at calls,
+ // because the garbage collector's liveness analysis in ../gc/plive.c does.
+ // These must be consistent or else we will elide stores and the garbage
+ // collector will see uninitialized data.
+ // The typical case where our own analysis is out of sync is when the
+ // node appears to have its address taken but that code doesn't actually
+ // get generated and therefore doesn't show up as an address being
+ // taken when we analyze the instruction stream.
+ // One instance of this case is when a closure uses the same name as
+ // an outer variable for one of its own variables declared with :=.
+ // The parser flags the outer variable as possibly shared, and therefore
+ // sets addrtaken, even though it ends up not being actually shared.
+ // If we were better about _ elision, _ = &x would suffice too.
+ // The broader := in a closure problem is mentioned in a comment in
+ // closure.c:/^typecheckclosure and dcl.c:/^oldname.
+ if(node->addrtaken)
+ v->addr = 1;
+
+ // Disable registerization for globals, because:
+ // (1) we might panic at any time and we want the recovery code
+ // to see the latest values (issue 1304).
+ // (2) we don't know what pointers might point at them and we want
+ // loads via those pointers to see updated values and vice versa (issue 7995).
+ //
+ // Disable registerization for results if using defer, because the deferred func
+ // might recover and return, causing the current values to be used.
+ if(node->class == PEXTERN || (hasdefer && node->class == PPARAMOUT))
+ v->addr = 1;
+
+ if(debug['R'])
+ print("bit=%2d et=%2E w=%lld+%lld %#N %D flag=%d\n", i, et, o, w, node, a, v->addr);
+ ostats.nvar++;
+
+ return bit;
+
+none:
+ return zbits;
+}
+
+void
+prop(Reg *r, Bits ref, Bits cal)
+{
+ Reg *r1, *r2;
+ int z, i, j;
+ Var *v, *v1;
+
+ for(r1 = r; r1 != R; r1 = (Reg*)r1->f.p1) {
+ for(z=0; z<BITS; z++) {
+ ref.b[z] |= r1->refahead.b[z];
+ if(ref.b[z] != r1->refahead.b[z]) {
+ r1->refahead.b[z] = ref.b[z];
+ change++;
+ }
+ cal.b[z] |= r1->calahead.b[z];
+ if(cal.b[z] != r1->calahead.b[z]) {
+ r1->calahead.b[z] = cal.b[z];
+ change++;
+ }
+ }
+ switch(r1->f.prog->as) {
+ case ABL:
+ if(noreturn(r1->f.prog))
+ break;
+
+ // Mark all input variables (ivar) as used, because that's what the
+ // liveness bitmaps say. The liveness bitmaps say that so that a
+ // panic will not show stale values in the parameter dump.
+ // Mark variables with a recent VARDEF (r1->act) as used,
+ // so that the optimizer flushes initializations to memory,
+ // so that if a garbage collection happens during this CALL,
+ // the collector will see initialized memory. Again this is to
+ // match what the liveness bitmaps say.
+ for(z=0; z<BITS; z++) {
+ cal.b[z] |= ref.b[z] | externs.b[z] | ivar.b[z] | r1->act.b[z];
+ ref.b[z] = 0;
+ }
+
+ // cal.b is the current approximation of what's live across the call.
+ // Every bit in cal.b is a single stack word. For each such word,
+ // find all the other tracked stack words in the same Go variable
+ // (struct/slice/string/interface) and mark them live too.
+ // This is necessary because the liveness analysis for the garbage
+ // collector works at variable granularity, not at word granularity.
+ // It is fundamental for slice/string/interface: the garbage collector
+ // needs the whole value, not just some of the words, in order to
+ // interpret the other bits correctly. Specifically, slice needs a consistent
+ // ptr and cap, string needs a consistent ptr and len, and interface
+ // needs a consistent type word and data word.
+ for(z=0; z<BITS; z++) {
+ if(cal.b[z] == 0)
+ continue;
+ for(i=0; i<64; i++) {
+ if(z*64+i >= nvar || ((cal.b[z]>>i)&1) == 0)
+ continue;
+ v = var+z*64+i;
+ if(v->node->opt == nil) // v represents fixed register, not Go variable
+ continue;
+
+ // v->node->opt is the head of a linked list of Vars
+ // corresponding to tracked words from the Go variable v->node.
+ // Walk the list and set all the bits.
+ // For a large struct this could end up being quadratic:
+ // after the first setting, the outer loop (for z, i) would see a 1 bit
+ // for all of the remaining words in the struct, and for each such
+ // word would go through and turn on all the bits again.
+ // To avoid the quadratic behavior, we only turn on the bits if
+ // v is the head of the list or if the head's bit is not yet turned on.
+ // This will set the bits at most twice, keeping the overall loop linear.
+ v1 = v->node->opt;
+ j = v1 - var;
+ if(v == v1 || !btest(&cal, j)) {
+ for(; v1 != nil; v1 = v1->nextinnode) {
+ j = v1 - var;
+ biset(&cal, j);
+ }
+ }
+ }
+ }
+ break;
+
+ case ATEXT:
+ for(z=0; z<BITS; z++) {
+ cal.b[z] = 0;
+ ref.b[z] = 0;
+ }
+ break;
+
+ case ARET:
+ for(z=0; z<BITS; z++) {
+ cal.b[z] = externs.b[z] | ovar.b[z];
+ ref.b[z] = 0;
+ }
+ break;
+ }
+ for(z=0; z<BITS; z++) {
+ ref.b[z] = (ref.b[z] & ~r1->set.b[z]) |
+ r1->use1.b[z] | r1->use2.b[z];
+ cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]);
+ r1->refbehind.b[z] = ref.b[z];
+ r1->calbehind.b[z] = cal.b[z];
+ }
+ if(r1->f.active)
+ break;
+ r1->f.active = 1;
+ }
+ for(; r != r1; r = (Reg*)r->f.p1)
+ for(r2 = (Reg*)r->f.p2; r2 != R; r2 = (Reg*)r2->f.p2link)
+ prop(r2, r->refbehind, r->calbehind);
+}
+
+void
+synch(Reg *r, Bits dif)
+{
+ Reg *r1;
+ int z;
+
+ for(r1 = r; r1 != R; r1 = (Reg*)r1->f.s1) {
+ for(z=0; z<BITS; z++) {
+ dif.b[z] = (dif.b[z] &
+ ~(~r1->refbehind.b[z] & r1->refahead.b[z])) |
+ r1->set.b[z] | r1->regdiff.b[z];
+ if(dif.b[z] != r1->regdiff.b[z]) {
+ r1->regdiff.b[z] = dif.b[z];
+ change++;
+ }
+ }
+ if(r1->f.active)
+ break;
+ r1->f.active = 1;
+ for(z=0; z<BITS; z++)
+ dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]);
+ if(r1->f.s2 != nil)
+ synch((Reg*)r1->f.s2, dif);
+ }
+}
+
+uint64
+allreg(uint64 b, Rgn *r)
+{
+ Var *v;
+ int i;
+
+ v = var + r->varno;
+ r->regno = 0;
+ switch(v->etype) {
+
+ default:
+ fatal("unknown etype %d/%E", bitno(b), v->etype);
+ break;
+
+ case TINT8:
+ case TUINT8:
+ case TINT16:
+ case TUINT16:
+ case TINT32:
+ case TUINT32:
+ case TINT64:
+ case TUINT64:
+ case TINT:
+ case TUINT:
+ case TUINTPTR:
+ case TBOOL:
+ case TPTR32:
+ case TPTR64:
+ i = BtoR(~b);
+ if(i && r->cost > 0) {
+ r->regno = i;
+ return RtoB(i);
+ }
+ break;
+
+ case TFLOAT32:
+ case TFLOAT64:
+ i = BtoF(~b);
+ if(i && r->cost > 0) {
+ r->regno = i;
+ return FtoB(i);
+ }
+ break;
+ }
+ return 0;
+}
+
+void
+paint1(Reg *r, int bn)
+{
+ Reg *r1;
+ int z;
+ uint64 bb;
+
+ z = bn/64;
+ bb = 1LL<<(bn%64);
+ if(r->act.b[z] & bb)
+ return;
+ for(;;) {
+ if(!(r->refbehind.b[z] & bb))
+ break;
+ r1 = (Reg*)r->f.p1;
+ if(r1 == R)
+ break;
+ if(!(r1->refahead.b[z] & bb))
+ break;
+ if(r1->act.b[z] & bb)
+ break;
+ r = r1;
+ }
+
+ if(LOAD(r) & ~(r->set.b[z]&~(r->use1.b[z]|r->use2.b[z])) & bb) {
+ change -= CLOAD * r->f.loop;
+ }
+ for(;;) {
+ r->act.b[z] |= bb;
+
+ if(r->f.prog->as != ANOP) { // don't give credit for NOPs
+ if(r->use1.b[z] & bb)
+ change += CREF * r->f.loop;
+ if((r->use2.b[z]|r->set.b[z]) & bb)
+ change += CREF * r->f.loop;
+ }
+
+ if(STORE(r) & r->regdiff.b[z] & bb) {
+ change -= CLOAD * r->f.loop;
+ }
+
+ if(r->refbehind.b[z] & bb)
+ for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link)
+ if(r1->refahead.b[z] & bb)
+ paint1(r1, bn);
+
+ if(!(r->refahead.b[z] & bb))
+ break;
+ r1 = (Reg*)r->f.s2;
+ if(r1 != R)
+ if(r1->refbehind.b[z] & bb)
+ paint1(r1, bn);
+ r = (Reg*)r->f.s1;
+ if(r == R)
+ break;
+ if(r->act.b[z] & bb)
+ break;
+ if(!(r->refbehind.b[z] & bb))
+ break;
+ }
+}
+
+uint64
+paint2(Reg *r, int bn, int depth)
+{
+ Reg *r1;
+ int z;
+ uint64 bb, vreg;
+
+ z = bn/64;
+ bb = 1LL << (bn%64);
+ vreg = regbits;
+ if(!(r->act.b[z] & bb))
+ return vreg;
+ for(;;) {
+ if(!(r->refbehind.b[z] & bb))
+ break;
+ r1 = (Reg*)r->f.p1;
+ if(r1 == R)
+ break;
+ if(!(r1->refahead.b[z] & bb))
+ break;
+ if(!(r1->act.b[z] & bb))
+ break;
+ r = r1;
+ }
+ for(;;) {
+ if(debug['R'] && debug['v'])
+ print(" paint2 %d %P\n", depth, r->f.prog);
+
+ r->act.b[z] &= ~bb;
+
+ vreg |= r->regu;
+
+ if(r->refbehind.b[z] & bb)
+ for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link)
+ if(r1->refahead.b[z] & bb)
+ vreg |= paint2(r1, bn, depth+1);
+
+ if(!(r->refahead.b[z] & bb))
+ break;
+ r1 = (Reg*)r->f.s2;
+ if(r1 != R)
+ if(r1->refbehind.b[z] & bb)
+ vreg |= paint2(r1, bn, depth+1);
+ r = (Reg*)r->f.s1;
+ if(r == R)
+ break;
+ if(!(r->act.b[z] & bb))
+ break;
+ if(!(r->refbehind.b[z] & bb))
+ break;
+ }
+ return vreg;
+}
+
+void
+paint3(Reg *r, int bn, uint64 rb, int rn)
+{
+ Reg *r1;
+ Prog *p;
+ int z;
+ uint64 bb;
+
+ z = bn/64;
+ bb = 1LL << (bn%64);
+ if(r->act.b[z] & bb)
+ return;
+ for(;;) {
+ if(!(r->refbehind.b[z] & bb))
+ break;
+ r1 = (Reg*)r->f.p1;
+ if(r1 == R)
+ break;
+ if(!(r1->refahead.b[z] & bb))
+ break;
+ if(r1->act.b[z] & bb)
+ break;
+ r = r1;
+ }
+
+ if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb)
+ addmove(r, bn, rn, 0);
+ for(;;) {
+ r->act.b[z] |= bb;
+ p = r->f.prog;
+
+ if(r->use1.b[z] & bb) {
+ if(debug['R'] && debug['v'])
+ print("%P", p);
+ addreg(&p->from, rn);
+ if(debug['R'] && debug['v'])
+ print(" ===change== %P\n", p);
+ }
+ if((r->use2.b[z]|r->set.b[z]) & bb) {
+ if(debug['R'] && debug['v'])
+ print("%P", p);
+ addreg(&p->to, rn);
+ if(debug['R'] && debug['v'])
+ print(" ===change== %P\n", p);
+ }
+
+ if(STORE(r) & r->regdiff.b[z] & bb)
+ addmove(r, bn, rn, 1);
+ r->regu |= rb;
+
+ if(r->refbehind.b[z] & bb)
+ for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link)
+ if(r1->refahead.b[z] & bb)
+ paint3(r1, bn, rb, rn);
+
+ if(!(r->refahead.b[z] & bb))
+ break;
+ r1 = (Reg*)r->f.s2;
+ if(r1 != R)
+ if(r1->refbehind.b[z] & bb)
+ paint3(r1, bn, rb, rn);
+ r = (Reg*)r->f.s1;
+ if(r == R)
+ break;
+ if(r->act.b[z] & bb)
+ break;
+ if(!(r->refbehind.b[z] & bb))
+ break;
+ }
+}
+
+void
+addreg(Adr *a, int rn)
+{
+ a->sym = nil;
+ a->node = nil;
+ a->name = D_NONE;
+ a->type = D_REG;
+ a->reg = rn;
+ if(rn >= NREG) {
+ a->type = D_FREG;
+ a->reg = rn-NREG;
+ }
+
+ ostats.ncvtreg++;
+}
+
/*
* track register variables including external registers:
* bit reg
@@ -56,7 +1239,7 @@ regopt(Prog *p)
uint64
RtoB(int r)
{
- if(r >= D_R0 && r <= D_R0+31)
+ if(r > D_R0 && r <= D_R0+31)
return 1ULL << (r - D_R0);
return 0;
}
@@ -64,7 +1247,7 @@ RtoB(int r)
int
BtoR(uint64 b)
{
- b &= 0xffffffff;
+ b &= 0xffffffffull;
if(b == 0)
return 0;
return bitno(b) + D_R0;
@@ -139,6 +1322,7 @@ void
dumpit(char *str, Flow *r0, int isreg)
{
Flow *r, *r1;
+ int s1v, s2v;
print("\n%s\n", str);
for(r = r0; r != nil; r = r->link) {
@@ -150,12 +1334,16 @@ dumpit(char *str, Flow *r0, int isreg)
print(" %.4ud", (int)r1->prog->pc);
print("\n");
}
-// r1 = r->s1;
-// if(r1 != R) {
-// print(" succ:");
-// for(; r1 != R; r1 = r1->s1)
-// print(" %.4ud", (int)r1->prog->pc);
-// print("\n");
-// }
+ // If at least one successor is "interesting", print both
+ s1v = (r->s1 != nil) && (r->s1->prog != r->prog->link);
+ s2v = (r->s2 != nil) && (r->s2->prog != r->prog->link);
+ if(s1v || s2v) {
+ print(" succ:");
+ if(r->s1 != nil)
+ print(" %.4ud", (int)r->s1->prog->pc);
+ if(r->s2 != nil)
+ print(" %.4ud", (int)r->s2->prog->pc);
+ print("\n");
+ }
}
}
diff --git a/src/cmd/9l/9.out.h b/src/cmd/9l/9.out.h
index e494e90ca..08a339318 100644
--- a/src/cmd/9l/9.out.h
+++ b/src/cmd/9l/9.out.h
@@ -131,7 +131,7 @@ enum
C_NCLASS, /* must be the last */
};
-enum as
+enum
{
AXXX,
AADD,
@@ -501,7 +501,7 @@ enum
D_R0 = 0, // type is D_REG
D_F0 = D_R0+NREG, // type is D_FREG
-/* reg names iff type is D_SPR */
+/* reg names in offset field iff type is D_SPR */
D_XER = 1,
D_LR = 8,
D_CTR = 9
diff --git a/src/cmd/gc/bits.c b/src/cmd/gc/bits.c
index 2e79f6f1d..fe9a168dc 100644
--- a/src/cmd/gc/bits.c
+++ b/src/cmd/gc/bits.c
@@ -95,11 +95,11 @@ int
bnum(Bits a)
{
int i;
- int32 b;
+ uint64 b;
for(i=0; i<BITS; i++)
if(b = a.b[i])
- return 32*i + bitno(b);
+ return 64*i + bitno(b);
fatal("bad in bnum");
return 0;
}
@@ -110,27 +110,35 @@ blsh(uint n)
Bits c;
c = zbits;
- c.b[n/32] = 1L << (n%32);
+ c.b[n/64] = 1LL << (n%64);
return c;
}
-/*
int
-bset(Bits a, uint n)
+btest(Bits *a, uint n)
{
- if(a.b[n/32] & (1L << (n%32)))
- return 1;
- return 0;
+ return (a->b[n/64] & (1LL << (n%64))) != 0;
+}
+
+void
+biset(Bits *a, uint n)
+{
+ a->b[n/64] |= 1LL << (n%64);
+}
+
+void
+biclr(Bits *a, uint n)
+{
+ a->b[n/64] &= ~(1LL << (n%64));
}
-*/
int
-bitno(int32 b)
+bitno(uint64 b)
{
int i;
- for(i=0; i<32; i++)
- if(b & (1L<<i))
+ for(i=0; i<64; i++)
+ if(b & (1LL<<i))
return i;
fatal("bad in bitno");
return 0;
@@ -157,7 +165,7 @@ Qconv(Fmt *fp)
if(var[i].offset != 0)
fmtprint(fp, "%+lld", (vlong)var[i].offset);
}
- bits.b[i/32] &= ~(1L << (i%32));
+ biclr(&bits, i);
}
return 0;
}
diff --git a/src/cmd/gc/go.h b/src/cmd/gc/go.h
index cc590416b..c695c5bf3 100644
--- a/src/cmd/gc/go.h
+++ b/src/cmd/gc/go.h
@@ -704,13 +704,13 @@ enum
Ecomplit = 1<<11, // type in composite literal
};
-#define BITS 5
-#define NVAR (BITS*sizeof(uint32)*8)
+#define BITS 3
+#define NVAR (BITS*sizeof(uint64)*8)
typedef struct Bits Bits;
struct Bits
{
- uint32 b[BITS];
+ uint64 b[BITS];
};
EXTERN Bits zbits;
@@ -1027,12 +1027,14 @@ int Qconv(Fmt *fp);
Bits band(Bits a, Bits b);
int bany(Bits *a);
int beq(Bits a, Bits b);
-int bitno(int32 b);
+int bitno(uint64 b);
Bits blsh(uint n);
Bits bnot(Bits a);
int bnum(Bits a);
Bits bor(Bits a, Bits b);
-int bset(Bits a, uint n);
+int btest(Bits *a, uint n);
+void biset(Bits *a, uint n);
+void biclr(Bits *a, uint n);
/*
* bv.c
diff --git a/src/cmd/objdump/objdump_test.go b/src/cmd/objdump/objdump_test.go
index 2bb74663c..bd09ae9f9 100644
--- a/src/cmd/objdump/objdump_test.go
+++ b/src/cmd/objdump/objdump_test.go
@@ -101,6 +101,10 @@ func testDisasm(t *testing.T, flags ...string) {
}
func TestDisasm(t *testing.T) {
+ switch runtime.GOARCH {
+ case "power64", "power64le":
+ t.Skipf("skipping on %s, issue 9039", runtime.GOARCH)
+ }
testDisasm(t)
}
@@ -109,5 +113,9 @@ func TestDisasmExtld(t *testing.T) {
case "plan9", "windows":
t.Skipf("skipping on %s", runtime.GOOS)
}
+ switch runtime.GOARCH {
+ case "power64", "power64le":
+ t.Skipf("skipping on %s, no support for external linking, issue 9038", runtime.GOARCH)
+ }
testDisasm(t, "-ldflags=-linkmode=external")
}
diff --git a/src/liblink/list9.c b/src/liblink/list9.c
index 041c6884f..c9190d894 100644
--- a/src/liblink/list9.c
+++ b/src/liblink/list9.c
@@ -259,11 +259,12 @@ Dconv(Fmt *fp)
sprint(str, "%s+%.5lux(BRANCH)", a->sym->name, v);
else
sprint(str, "%.5lux(BRANCH)", v);
- } else
- if(a->sym != nil)
- sprint(str, "%s+%lld(APC)", a->sym->name, a->offset);
- else
- sprint(str, "%lld(APC)", a->offset);
+ } else if(a->u.branch != nil)
+ sprint(str, "%lld", a->u.branch->pc);
+ else if(a->sym != nil)
+ sprint(str, "%s+%lld(APC)", a->sym->name, a->offset);
+ else
+ sprint(str, "%lld(APC)", a->offset);
break;
case D_FCONST:
diff --git a/src/reflect/asm_power64x.s b/src/reflect/asm_power64x.s
index e430cdf04..472063824 100644
--- a/src/reflect/asm_power64x.s
+++ b/src/reflect/asm_power64x.s
@@ -5,12 +5,14 @@
// +build power64 power64le
#include "textflag.h"
+#include "funcdata.h"
// makeFuncStub is the code half of the function returned by MakeFunc.
// See the comment on the declaration of makeFuncStub in makefunc.go
// for more details.
-// No argsize here, gc generates argsize info at call site.
+// No arg size here, runtime pulls arg map out of the func value.
TEXT ·makeFuncStub(SB),(NOSPLIT|WRAPPER),$16
+ NO_LOCAL_POINTERS
MOVD R11, 8(R1)
MOVD $argframe+0(FP), R3
MOVD R3, 16(R1)
@@ -20,8 +22,9 @@ TEXT ·makeFuncStub(SB),(NOSPLIT|WRAPPER),$16
// methodValueCall is the code half of the function returned by makeMethodValue.
// See the comment on the declaration of methodValueCall in makefunc.go
// for more details.
-// No argsize here, gc generates argsize info at call site.
+// No arg size here; runtime pulls arg map out of the func value.
TEXT ·methodValueCall(SB),(NOSPLIT|WRAPPER),$16
+ NO_LOCAL_POINTERS
MOVD R11, 8(R1)
MOVD $argframe+0(FP), R3
MOVD R3, 16(R1)
diff --git a/src/runtime/asm_power64x.s b/src/runtime/asm_power64x.s
index fd0c6be16..ba900c2b3 100644
--- a/src/runtime/asm_power64x.s
+++ b/src/runtime/asm_power64x.s
@@ -86,7 +86,7 @@ TEXT runtime·reginit(SB),NOSPLIT,$-8-0
// void gosave(Gobuf*)
// save state in Gobuf; setjmp
TEXT runtime·gosave(SB), NOSPLIT, $-8-8
- MOVD gobuf+0(FP), R3
+ MOVD buf+0(FP), R3
MOVD R1, gobuf_sp(R3)
MOVD LR, R31
MOVD R31, gobuf_pc(R3)
@@ -99,7 +99,7 @@ TEXT runtime·gosave(SB), NOSPLIT, $-8-8
// void gogo(Gobuf*)
// restore state from Gobuf; longjmp
TEXT runtime·gogo(SB), NOSPLIT, $-8-8
- MOVD gobuf+0(FP), R5
+ MOVD buf+0(FP), R5
MOVD gobuf_g(R5), g // make sure g is not nil
MOVD 0(g), R4
MOVD gobuf_sp(R5), R1
@@ -299,7 +299,7 @@ TEXT runtime·morestack_noctxt(SB),NOSPLIT,$-8-0
// Note: can't just "BR NAME(SB)" - bad inlining results.
TEXT ·reflectcall(SB), NOSPLIT, $-8-24
- MOVW argsize+16(FP), R3
+ MOVWZ n+16(FP), R3
DISPATCH(runtime·call16, 16)
DISPATCH(runtime·call32, 32)
DISPATCH(runtime·call64, 64)
@@ -335,8 +335,8 @@ TEXT ·reflectcall(SB), NOSPLIT, $-8-24
TEXT NAME(SB), WRAPPER, $MAXSIZE-24; \
NO_LOCAL_POINTERS; \
/* copy arguments to stack */ \
- MOVD argptr+8(FP), R3; \
- MOVW argsize+16(FP), R4; \
+ MOVD arg+8(FP), R3; \
+ MOVWZ n+16(FP), R4; \
MOVD R1, R5; \
ADD $(8-1), R5; \
SUB $1, R3; \
@@ -353,9 +353,9 @@ TEXT NAME(SB), WRAPPER, $MAXSIZE-24; \
PCDATA $PCDATA_StackMapIndex, $0; \
BL (CTR); \
/* copy return values back */ \
- MOVD argptr+8(FP), R3; \
- MOVW argsize+16(FP), R4; \
- MOVW retoffset+20(FP), R6; \
+ MOVD arg+8(FP), R3; \
+ MOVWZ n+16(FP), R4; \
+ MOVWZ retoffset+20(FP), R6; \
MOVD R1, R5; \
ADD R6, R5; \
ADD R6, R3; \
@@ -398,7 +398,7 @@ CALLFN(·call268435456, 268435456)
CALLFN(·call536870912, 536870912)
CALLFN(·call1073741824, 1073741824)
-// bool cas(int32 *val, int32 old, int32 new)
+// bool cas(uint32 *ptr, uint32 old, uint32 new)
// Atomically:
// if(*val == old){
// *val = new;
@@ -406,9 +406,9 @@ CALLFN(·call1073741824, 1073741824)
// } else
// return 0;
TEXT runtime·cas(SB), NOSPLIT, $0-17
- MOVD p+0(FP), R3
- MOVW old+8(FP), R4
- MOVW new+12(FP), R5
+ MOVD ptr+0(FP), R3
+ MOVWZ old+8(FP), R4
+ MOVWZ new+12(FP), R5
cas_again:
SYNC
LWAR (R3), R6
@@ -425,7 +425,7 @@ cas_fail:
MOVD $0, R3
BR -5(PC)
-// bool runtime·cas64(uint64 *val, uint64 old, uint64 new)
+// bool runtime·cas64(uint64 *ptr, uint64 old, uint64 new)
// Atomically:
// if(*val == *old){
// *val = new;
@@ -434,7 +434,7 @@ cas_fail:
// return 0;
// }
TEXT runtime·cas64(SB), NOSPLIT, $0-25
- MOVD p+0(FP), R3
+ MOVD ptr+0(FP), R3
MOVD old+8(FP), R4
MOVD new+16(FP), R5
cas64_again:
@@ -475,12 +475,12 @@ TEXT runtime·atomicstoreuintptr(SB), NOSPLIT, $0-16
TEXT runtime·casp1(SB), NOSPLIT, $0-25
BR runtime·cas64(SB)
-// uint32 xadd(uint32 volatile *val, int32 delta)
+// uint32 xadd(uint32 volatile *ptr, int32 delta)
// Atomically:
// *val += delta;
// return *val;
TEXT runtime·xadd(SB), NOSPLIT, $0-20
- MOVD p+0(FP), R4
+ MOVD ptr+0(FP), R4
MOVW delta+8(FP), R5
SYNC
LWAR (R4), R3
@@ -493,7 +493,7 @@ TEXT runtime·xadd(SB), NOSPLIT, $0-20
RETURN
TEXT runtime·xadd64(SB), NOSPLIT, $0-24
- MOVD p+0(FP), R4
+ MOVD ptr+0(FP), R4
MOVD delta+8(FP), R5
SYNC
LDAR (R4), R3
@@ -506,7 +506,7 @@ TEXT runtime·xadd64(SB), NOSPLIT, $0-24
RETURN
TEXT runtime·xchg(SB), NOSPLIT, $0-20
- MOVD p+0(FP), R4
+ MOVD ptr+0(FP), R4
MOVW new+8(FP), R5
SYNC
LWAR (R4), R3
@@ -518,7 +518,7 @@ TEXT runtime·xchg(SB), NOSPLIT, $0-20
RETURN
TEXT runtime·xchg64(SB), NOSPLIT, $0-24
- MOVD p+0(FP), R4
+ MOVD ptr+0(FP), R4
MOVD new+8(FP), R5
SYNC
LDAR (R4), R3
@@ -651,7 +651,7 @@ TEXT runtime·setcallerpc(SB),NOSPLIT,$-8-16
RETURN
TEXT runtime·getcallersp(SB),NOSPLIT,$0-16
- MOVD sp+0(FP), R3
+ MOVD argp+0(FP), R3
SUB $8, R3
MOVD R3, ret+8(FP)
RETURN
@@ -695,16 +695,17 @@ TEXT runtime·aeshashstr(SB),NOSPLIT,$-8-0
TEXT runtime·memeq(SB),NOSPLIT,$-8-25
MOVD a+0(FP), R3
MOVD b+8(FP), R4
- MOVD count+16(FP), R5
+ MOVD size+16(FP), R5
SUB $1, R3
SUB $1, R4
ADD R3, R5, R8
loop:
CMP R3, R8
- BNE 4(PC)
+ BNE test
MOVD $1, R3
MOVB R3, ret+24(FP)
RETURN
+test:
MOVBZU 1(R3), R6
MOVBZU 1(R4), R7
CMP R6, R7
@@ -828,7 +829,7 @@ notfound:
// in ../../cmd/9g/ggen.c:/^clearfat.
// R0: always zero
// R3 (aka REGRT1): ptr to memory to be zeroed - 8
-// R3 is updated as a side effect.
+// On return, R3 points to the last zeroed dword.
TEXT runtime·duffzero(SB), NOSPLIT, $-8-0
MOVDU R0, 8(R3)
MOVDU R0, 8(R3)
@@ -964,7 +965,7 @@ TEXT runtime·fastrand1(SB), NOSPLIT, $0-4
MOVD g_m(g), R4
MOVWZ m_fastrand(R4), R3
ADD R3, R3
- CMP R3, $0
+ CMPW R3, $0
BGE 2(PC)
XOR $0x88888eef, R3
MOVW R3, m_fastrand(R4)
@@ -979,3 +980,9 @@ TEXT runtime·return0(SB), NOSPLIT, $0
// Must obey the gcc calling convention.
TEXT _cgo_topofstack(SB),NOSPLIT,$0
MOVD R0, 26(R0)
+
+// The top-most function running on a goroutine
+// returns to goexit+PCQuantum.
+TEXT runtime·goexit(SB),NOSPLIT,$-8-0
+ MOVD R0, R0 // NOP
+ BL runtime·goexit1(SB) // does not return
diff --git a/src/runtime/gcinfo_test.go b/src/runtime/gcinfo_test.go
index 1a33f3b3b..662b7546d 100644
--- a/src/runtime/gcinfo_test.go
+++ b/src/runtime/gcinfo_test.go
@@ -137,7 +137,7 @@ func infoBigStruct() []byte {
BitsScalar, BitsScalar, BitsScalar, BitsScalar, // t int; y uint16; u uint64
BitsPointer, BitsDead, // i string
}
- case "amd64":
+ case "amd64", "power64", "power64le":
return []byte{
BitsPointer, // q *int
BitsScalar, BitsScalar, BitsScalar, // w byte; e [17]byte
@@ -153,12 +153,6 @@ func infoBigStruct() []byte {
BitsScalar, BitsScalar, BitsDead, BitsScalar, BitsScalar, // t int; y uint16; u uint64
BitsPointer, BitsDead, // i string
}
- case "power64", "power64le":
- return []byte{
- BitsPointer, BitsScalar, BitsScalar, BitsScalar,
- BitsMultiWord, BitsSlice, BitsScalar, BitsScalar,
- BitsScalar, BitsScalar, BitsMultiWord, BitsString,
- }
default:
panic("unknown arch")
}
diff --git a/src/runtime/mgc0.c b/src/runtime/mgc0.c
index 3248b0f49..f37c01af0 100644
--- a/src/runtime/mgc0.c
+++ b/src/runtime/mgc0.c
@@ -122,6 +122,7 @@
enum {
Debug = 0,
+ DebugPtrs = 0, // if 1, print trace of every pointer load during GC
ConcurrentSweep = 1,
FinBlockSize = 4*1024,
diff --git a/src/runtime/panic.c b/src/runtime/panic.c
index 46683b2b0..b19fdd0e1 100644
--- a/src/runtime/panic.c
+++ b/src/runtime/panic.c
@@ -69,7 +69,7 @@ runtime·recovery_m(G *gp)
// each call to deferproc.
// (The pc we're returning to does pop pop
// before it tests the return value.)
- // On the arm there are 2 saved LRs mixed in too.
+ // On the arm and power there are 2 saved LRs mixed in too.
if(thechar == '5' || thechar == '9')
gp->sched.sp = (uintptr)argp - 4*sizeof(uintptr);
else
diff --git a/src/runtime/runtime.c b/src/runtime/runtime.c
index f19f8e4be..a68414284 100644
--- a/src/runtime/runtime.c
+++ b/src/runtime/runtime.c
@@ -226,6 +226,12 @@ runtime·check(void)
if(z != 4)
runtime·throw("cas4");
+ z = 0xffffffff;
+ if(!runtime·cas(&z, 0xffffffff, 0xfffffffe))
+ runtime·throw("cas5");
+ if(z != 0xfffffffe)
+ runtime·throw("cas6");
+
k = (byte*)0xfedcb123;
if(sizeof(void*) == 8)
k = (byte*)((uintptr)k<<10);
diff --git a/src/runtime/signal_power64x.c b/src/runtime/signal_power64x.c
index 89c5c7848..c0bf1c4a5 100644
--- a/src/runtime/signal_power64x.c
+++ b/src/runtime/signal_power64x.c
@@ -124,7 +124,7 @@ runtime·sighandler(int32 sig, Siginfo *info, void *ctxt, G *gp)
if(runtime·gotraceback(&crash)){
runtime·goroutineheader(gp);
- runtime·traceback(SIG_PC(info, ctxt), SIG_SP(info, ctxt), SIG_LINK(info, ctxt), gp);
+ runtime·tracebacktrap(SIG_PC(info, ctxt), SIG_SP(info, ctxt), SIG_LINK(info, ctxt), gp);
runtime·tracebackothers(gp);
runtime·printf("\n");
runtime·dumpregs(info, ctxt);