summaryrefslogtreecommitdiff
path: root/src/cmd/gc
diff options
context:
space:
mode:
authorDmitriy Vyukov <dvyukov@google.com>2014-07-29 11:01:02 +0400
committerDmitriy Vyukov <dvyukov@google.com>2014-07-29 11:01:02 +0400
commit426772a95d353d6ef24475e0c7ec2d4591d080ae (patch)
treeaafdd54d31f1df5f311ac476ae77927d6573856d /src/cmd/gc
parent670466819fb227cda4ca8f8c1f4f3ce539ebfefd (diff)
downloadgo-426772a95d353d6ef24475e0c7ec2d4591d080ae.tar.gz
runtime: simpler and faster GC
Implement the design described in: https://docs.google.com/document/d/1v4Oqa0WwHunqlb8C3ObL_uNQw3DfSY-ztoA-4wWbKcg/pub Summary of the changes: GC uses "2-bits per word" pointer type info embed directly into bitmap. Scanning of stacks/data/heap is unified. The old spans types go away. Compiler generates "sparse" 4-bits type info for GC (directly for GC bitmap). Linker generates "dense" 2-bits type info for data/bss (the same as stacks use). Summary of results: -1680 lines of code total (-1000+ in mgc0.c only) -25% memory consumption -3-7% binary size -15% GC pause reduction -7% run time reduction LGTM=khr R=golang-codereviews, rsc, christoph, khr CC=golang-codereviews, rlh https://codereview.appspot.com/106260045
Diffstat (limited to 'src/cmd/gc')
-rw-r--r--src/cmd/gc/go.h2
-rw-r--r--src/cmd/gc/plive.c5
-rw-r--r--src/cmd/gc/reflect.c466
3 files changed, 260 insertions, 213 deletions
diff --git a/src/cmd/gc/go.h b/src/cmd/gc/go.h
index aaa22d1b1..30b210f92 100644
--- a/src/cmd/gc/go.h
+++ b/src/cmd/gc/go.h
@@ -381,7 +381,6 @@ enum
SymExported = 1<<2, // already written out by export
SymUniq = 1<<3,
SymSiggen = 1<<4,
- SymGcgen = 1<<5,
};
struct Sym
@@ -1515,6 +1514,7 @@ void movelarge(NodeList*);
int isfat(Type*);
void linkarchinit(void);
void liveness(Node*, Prog*, Sym*, Sym*);
+void twobitwalktype1(Type*, vlong*, Bvec*);
void markautoused(Prog*);
Plist* newplist(void);
Node* nodarg(Type*, int);
diff --git a/src/cmd/gc/plive.c b/src/cmd/gc/plive.c
index d3f1cfbc6..9026f6003 100644
--- a/src/cmd/gc/plive.c
+++ b/src/cmd/gc/plive.c
@@ -19,8 +19,7 @@
#include "opt.h"
#include "../ld/textflag.h"
#include "../../pkg/runtime/funcdata.h"
-
-enum { BitsPerPointer = 2 };
+#include "../../pkg/runtime/mgc0.h"
enum {
UNVISITED = 0,
@@ -1040,7 +1039,7 @@ checkptxt(Node *fn, Prog *firstp)
// and then simply copied into bv at the correct offset on future calls with
// the same type t. On https://rsc.googlecode.com/hg/testdata/slow.go, twobitwalktype1
// accounts for 40% of the 6g execution time.
-static void
+void
twobitwalktype1(Type *t, vlong *xoffset, Bvec *bv)
{
vlong fieldoffset;
diff --git a/src/cmd/gc/reflect.c b/src/cmd/gc/reflect.c
index fdcd76be0..934289e08 100644
--- a/src/cmd/gc/reflect.c
+++ b/src/cmd/gc/reflect.c
@@ -7,6 +7,7 @@
#include "go.h"
#include "../ld/textflag.h"
#include "../../pkg/runtime/mgc0.h"
+#include "../../pkg/runtime/typekind.h"
/*
* runtime interface and reflection data structures
@@ -16,7 +17,9 @@ static NodeList* signatlist;
static Sym* dtypesym(Type*);
static Sym* weaktypesym(Type*);
static Sym* dalgsym(Type*);
-static Sym* dgcsym(Type*);
+static int usegcprog(Type*);
+static void gengcprog(Type*, Sym**, Sym**);
+static void gengcmask(Type*, uint8[16]);
static int
sigcmp(Sig *a, Sig *b)
@@ -612,37 +615,6 @@ dextratype(Sym *sym, int off, Type *t, int ptroff)
return ot;
}
-enum {
- KindBool = 1,
- KindInt,
- KindInt8,
- KindInt16,
- KindInt32,
- KindInt64,
- KindUint,
- KindUint8,
- KindUint16,
- KindUint32,
- KindUint64,
- KindUintptr,
- KindFloat32,
- KindFloat64,
- KindComplex64,
- KindComplex128,
- KindArray,
- KindChan,
- KindFunc,
- KindInterface,
- KindMap,
- KindPtr,
- KindSlice,
- KindString,
- KindStruct,
- KindUnsafePointer,
-
- KindNoPointers = 1<<7,
-};
-
static int
kinds[] =
{
@@ -746,8 +718,9 @@ haspointers(Type *t)
static int
dcommontype(Sym *s, int ot, Type *t)
{
- int i, alg, sizeofAlg;
- Sym *sptr, *algsym, *zero;
+ int i, alg, sizeofAlg, gcprog;
+ Sym *sptr, *algsym, *zero, *gcprog0, *gcprog1;
+ uint8 gcmask[16];
static Sym *algarray;
char *p;
@@ -809,17 +782,32 @@ dcommontype(Sym *s, int ot, Type *t)
ot = duint8(s, ot, t->align); // align
ot = duint8(s, ot, t->align); // fieldAlign
+ gcprog = usegcprog(t);
i = kinds[t->etype];
if(t->etype == TARRAY && t->bound < 0)
i = KindSlice;
if(!haspointers(t))
i |= KindNoPointers;
+ if(gcprog)
+ i |= KindGCProg;
ot = duint8(s, ot, i); // kind
if(alg >= 0)
ot = dsymptr(s, ot, algarray, alg*sizeofAlg);
else
ot = dsymptr(s, ot, algsym, 0);
- ot = dsymptr(s, ot, dgcsym(t), 0); // gc
+ // gc
+ if(gcprog) {
+ gengcprog(t, &gcprog0, &gcprog1);
+ if(gcprog0 != S)
+ ot = dsymptr(s, ot, gcprog0, 0);
+ else
+ ot = duintptr(s, ot, 0);
+ ot = dsymptr(s, ot, gcprog1, 0);
+ } else {
+ gengcmask(t, gcmask);
+ for(i = 0; i < 2*widthptr; i++)
+ ot = duint8(s, ot, gcmask[i]);
+ }
p = smprint("%-uT", t);
//print("dcommontype: %s\n", p);
ot = dgostringptr(s, ot, p); // string
@@ -1275,31 +1263,207 @@ dalgsym(Type *t)
}
static int
-gcinline(Type *t)
+usegcprog(Type *t)
{
- switch(t->etype) {
- case TARRAY:
- if(t->bound == 1)
- return 1;
- if(t->width <= 4*widthptr)
- return 1;
- break;
+ vlong size, nptr;
+
+ if(!haspointers(t))
+ return 0;
+ if(t->width == BADWIDTH)
+ dowidth(t);
+ // Calculate size of the unrolled GC mask.
+ nptr = (t->width+widthptr-1)/widthptr;
+ size = nptr;
+ if(size%2)
+ size *= 2; // repeated
+ size = size*gcBits/8; // 4 bits per word
+ // Decide whether to use unrolled GC mask or GC program.
+ // We could use a more elaborate condition, but this seems to work well in practice.
+ // For small objects GC program can't give significant reduction.
+ // While large objects usually contain arrays; and even if it don't
+ // the program uses 2-bits per word while mask uses 4-bits per word,
+ // so the program is still smaller.
+ return size > 2*widthptr;
+}
+
+// Generates sparse GC bitmask (4 bits per word).
+static void
+gengcmask(Type *t, uint8 gcmask[16])
+{
+ Bvec *vec;
+ vlong xoffset, nptr, i, j;
+ int half, mw;
+ uint8 bits, *pos;
+
+ memset(gcmask, 0, 16);
+ if(!haspointers(t))
+ return;
+
+ // Generate compact mask as stacks use.
+ xoffset = 0;
+ vec = bvalloc(2*widthptr*8);
+ twobitwalktype1(t, &xoffset, vec);
+
+ // Unfold the mask for the GC bitmap format:
+ // 4 bits per word, 2 high bits encode pointer info.
+ pos = (uint8*)gcmask;
+ nptr = (t->width+widthptr-1)/widthptr;
+ half = 0;
+ mw = 0;
+ // If number of words is odd, repeat the mask.
+ // This makes simpler handling of arrays in runtime.
+ for(j=0; j<=(nptr%2); j++) {
+ for(i=0; i<nptr; i++) {
+ bits = bvget(vec, i*BitsPerPointer) | bvget(vec, i*BitsPerPointer+1)<<1;
+ // Some fake types (e.g. Hmap) has missing fileds.
+ // twobitwalktype1 generates BitsDead for that holes,
+ // replace BitsDead with BitsScalar.
+ if(!mw && bits == BitsDead)
+ bits = BitsScalar;
+ mw = !mw && bits == BitsMultiWord;
+ bits <<= 2;
+ if(half)
+ bits <<= 4;
+ *pos |= bits;
+ half = !half;
+ if(!half)
+ pos++;
+ }
}
- return 0;
}
-static int
-dgcsym1(Sym *s, int ot, Type *t, vlong *off, int stack_size)
+// Helper object for generation of GC programs.
+typedef struct ProgGen ProgGen;
+struct ProgGen
{
- Type *t1;
- vlong o, off2, fieldoffset, i;
+ Sym* s;
+ int32 datasize;
+ uint8 data[256/PointersPerByte];
+ vlong ot;
+};
- if(t->align > 0 && (*off % t->align) != 0)
- fatal("dgcsym1: invalid initial alignment, %T", t);
+static void
+proggeninit(ProgGen *g, Sym *s)
+{
+ g->s = s;
+ g->datasize = 0;
+ g->ot = 0;
+ memset(g->data, 0, sizeof(g->data));
+}
+
+static void
+proggenemit(ProgGen *g, uint8 v)
+{
+ g->ot = duint8(g->s, g->ot, v);
+}
+
+// Emits insData block from g->data.
+static void
+proggendataflush(ProgGen *g)
+{
+ int32 i, s;
+
+ if(g->datasize == 0)
+ return;
+ proggenemit(g, insData);
+ proggenemit(g, g->datasize);
+ s = (g->datasize + PointersPerByte - 1)/PointersPerByte;
+ for(i = 0; i < s; i++)
+ proggenemit(g, g->data[i]);
+ g->datasize = 0;
+ memset(g->data, 0, sizeof(g->data));
+}
+
+static void
+proggendata(ProgGen *g, uint8 d)
+{
+ g->data[g->datasize/PointersPerByte] |= d << ((g->datasize%PointersPerByte)*BitsPerPointer);
+ g->datasize++;
+ if(g->datasize == 255)
+ proggendataflush(g);
+}
+
+// Skip v bytes due to alignment, etc.
+static void
+proggenskip(ProgGen *g, vlong off, vlong v)
+{
+ vlong i;
+
+ for(i = off; i < off+v; i++) {
+ if((i%widthptr) == 0)
+ proggendata(g, BitsScalar);
+ }
+}
+
+// Emit insArray instruction.
+static void
+proggenarray(ProgGen *g, vlong len)
+{
+ int32 i;
+
+ proggendataflush(g);
+ proggenemit(g, insArray);
+ for(i = 0; i < widthptr; i++, len >>= 8)
+ proggenemit(g, len);
+}
+
+static void
+proggenarrayend(ProgGen *g)
+{
+ proggendataflush(g);
+ proggenemit(g, insArrayEnd);
+}
+
+static vlong
+proggenfini(ProgGen *g)
+{
+ proggendataflush(g);
+ proggenemit(g, insEnd);
+ return g->ot;
+}
+
+static void gengcprog1(ProgGen *g, Type *t, vlong *xoffset);
+
+// Generates GC program for large types.
+static void
+gengcprog(Type *t, Sym **pgc0, Sym **pgc1)
+{
+ Sym *gc0, *gc1;
+ vlong nptr, size, ot, xoffset;
+ ProgGen g;
+
+ nptr = (t->width+widthptr-1)/widthptr;
+ size = nptr;
+ if(size%2)
+ size *= 2; // repeated twice
+ size = size*PointersPerByte/8; // 4 bits per word
+ size++; // unroll flag in the beginning, used by runtime (see runtime.markallocated)
+ // emity space in BSS for unrolled program
+ *pgc0 = S;
+ // Don't generate it if it's too large, runtime will unroll directly into GC bitmap.
+ if(size <= MaxGCMask) {
+ gc0 = typesymprefix(".gc", t);
+ ggloblsym(gc0, size, DUPOK|NOPTR);
+ *pgc0 = gc0;
+ }
+
+ // program in RODATA
+ gc1 = typesymprefix(".gcprog", t);
+ proggeninit(&g, gc1);
+ xoffset = 0;
+ gengcprog1(&g, t, &xoffset);
+ ot = proggenfini(&g);
+ ggloblsym(gc1, ot, DUPOK|RODATA);
+ *pgc1 = gc1;
+}
+
+// Recursively walks type t and writes GC program into g.
+static void
+gengcprog1(ProgGen *g, Type *t, vlong *xoffset)
+{
+ vlong fieldoffset, i, o, n;
+ Type *t1;
- if(t->width == BADWIDTH)
- dowidth(t);
-
switch(t->etype) {
case TINT8:
case TUINT8:
@@ -1317,187 +1481,71 @@ dgcsym1(Sym *s, int ot, Type *t, vlong *off, int stack_size)
case TFLOAT64:
case TCOMPLEX64:
case TCOMPLEX128:
- *off += t->width;
+ proggenskip(g, *xoffset, t->width);
+ *xoffset += t->width;
break;
-
case TPTR32:
case TPTR64:
- // NOTE: Any changes here need to be made to reflect.PtrTo as well.
- if(*off % widthptr != 0)
- fatal("dgcsym1: invalid alignment, %T", t);
-
- // NOTE(rsc): Emitting GC_APTR here for *nonptrtype
- // (pointer to non-pointer-containing type) means that
- // we do not record 'nonptrtype' and instead tell the
- // garbage collector to look up the type of the memory in
- // type information stored in the heap. In effect we are telling
- // the collector "we don't trust our information - use yours".
- // It's not completely clear why we want to do this.
- // It does have the effect that if you have a *SliceHeader and a *[]int
- // pointing at the same actual slice header, *SliceHeader will not be
- // used as an authoritative type for the memory, which is good:
- // if the collector scanned the memory as type *SliceHeader, it would
- // see no pointers inside but mark the block as scanned, preventing
- // the seeing of pointers when we followed the *[]int pointer.
- // Perhaps that kind of situation is the rationale.
- if(!haspointers(t->type)) {
- ot = duintptr(s, ot, GC_APTR);
- ot = duintptr(s, ot, *off);
- } else {
- ot = duintptr(s, ot, GC_PTR);
- ot = duintptr(s, ot, *off);
- ot = dsymptr(s, ot, dgcsym(t->type), 0);
- }
- *off += t->width;
- break;
-
case TUNSAFEPTR:
case TFUNC:
- if(*off % widthptr != 0)
- fatal("dgcsym1: invalid alignment, %T", t);
- ot = duintptr(s, ot, GC_APTR);
- ot = duintptr(s, ot, *off);
- *off += t->width;
- break;
-
- // struct Hchan*
case TCHAN:
- // NOTE: Any changes here need to be made to reflect.ChanOf as well.
- if(*off % widthptr != 0)
- fatal("dgcsym1: invalid alignment, %T", t);
- ot = duintptr(s, ot, GC_CHAN_PTR);
- ot = duintptr(s, ot, *off);
- ot = dsymptr(s, ot, dtypesym(t), 0);
- *off += t->width;
- break;
-
- // struct Hmap*
case TMAP:
- // NOTE: Any changes here need to be made to reflect.MapOf as well.
- if(*off % widthptr != 0)
- fatal("dgcsym1: invalid alignment, %T", t);
- ot = duintptr(s, ot, GC_PTR);
- ot = duintptr(s, ot, *off);
- ot = dsymptr(s, ot, dgcsym(hmap(t)), 0);
- *off += t->width;
+ proggendata(g, BitsPointer);
+ *xoffset += t->width;
break;
-
- // struct { byte *str; int32 len; }
case TSTRING:
- if(*off % widthptr != 0)
- fatal("dgcsym1: invalid alignment, %T", t);
- ot = duintptr(s, ot, GC_STRING);
- ot = duintptr(s, ot, *off);
- *off += t->width;
+ proggendata(g, BitsMultiWord);
+ proggendata(g, BitsString);
+ *xoffset += t->width;
break;
-
- // struct { Itab* tab; void* data; }
- // struct { Type* type; void* data; } // When isnilinter(t)==true
case TINTER:
- if(*off % widthptr != 0)
- fatal("dgcsym1: invalid alignment, %T", t);
- if(isnilinter(t)) {
- ot = duintptr(s, ot, GC_EFACE);
- ot = duintptr(s, ot, *off);
- } else {
- ot = duintptr(s, ot, GC_IFACE);
- ot = duintptr(s, ot, *off);
- }
- *off += t->width;
+ proggendata(g, BitsMultiWord);
+ if(isnilinter(t))
+ proggendata(g, BitsEface);
+ else
+ proggendata(g, BitsIface);
+ *xoffset += t->width;
break;
-
case TARRAY:
- if(t->bound < -1)
- fatal("dgcsym1: invalid bound, %T", t);
- if(t->type->width == BADWIDTH)
- dowidth(t->type);
if(isslice(t)) {
- // NOTE: Any changes here need to be made to reflect.SliceOf as well.
- // struct { byte* array; uint32 len; uint32 cap; }
- if(*off % widthptr != 0)
- fatal("dgcsym1: invalid alignment, %T", t);
- if(t->type->width != 0) {
- ot = duintptr(s, ot, GC_SLICE);
- ot = duintptr(s, ot, *off);
- ot = dsymptr(s, ot, dgcsym(t->type), 0);
- } else {
- ot = duintptr(s, ot, GC_APTR);
- ot = duintptr(s, ot, *off);
- }
- *off += t->width;
+ proggendata(g, BitsMultiWord);
+ proggendata(g, BitsSlice);
+ proggendata(g, BitsScalar);
} else {
- // NOTE: Any changes here need to be made to reflect.ArrayOf as well,
- // at least once ArrayOf's gc info is implemented and ArrayOf is exported.
- // struct { byte* array; uint32 len; uint32 cap; }
- if(t->bound < 1 || !haspointers(t->type)) {
- *off += t->width;
- } else if(gcinline(t)) {
- for(i=0; i<t->bound; i++)
- ot = dgcsym1(s, ot, t->type, off, stack_size); // recursive call of dgcsym1
+ t1 = t->type;
+ if(t1->width == 0) {
+ // ignore
+ } if(t->bound <= 1 || t->bound*t1->width < 32*widthptr) {
+ for(i = 0; i < t->bound; i++)
+ gengcprog1(g, t1, xoffset);
+ } else if(!haspointers(t1)) {
+ n = t->width;
+ n -= -*xoffset&(widthptr-1); // skip to next ptr boundary
+ proggenarray(g, (n+widthptr-1)/widthptr);
+ proggendata(g, BitsScalar);
+ proggenarrayend(g);
+ *xoffset -= (n+widthptr-1)/widthptr*widthptr - t->width;
} else {
- if(stack_size < GC_STACK_CAPACITY) {
- ot = duintptr(s, ot, GC_ARRAY_START); // a stack push during GC
- ot = duintptr(s, ot, *off);
- ot = duintptr(s, ot, t->bound);
- ot = duintptr(s, ot, t->type->width);
- off2 = 0;
- ot = dgcsym1(s, ot, t->type, &off2, stack_size+1); // recursive call of dgcsym1
- ot = duintptr(s, ot, GC_ARRAY_NEXT); // a stack pop during GC
- } else {
- ot = duintptr(s, ot, GC_REGION);
- ot = duintptr(s, ot, *off);
- ot = duintptr(s, ot, t->width);
- ot = dsymptr(s, ot, dgcsym(t), 0);
- }
- *off += t->width;
+ proggenarray(g, t->bound);
+ gengcprog1(g, t1, xoffset);
+ *xoffset += (t->bound-1)*t1->width;
+ proggenarrayend(g);
}
}
break;
-
case TSTRUCT:
o = 0;
- for(t1=t->type; t1!=T; t1=t1->down) {
+ for(t1 = t->type; t1 != T; t1 = t1->down) {
fieldoffset = t1->width;
- *off += fieldoffset - o;
- ot = dgcsym1(s, ot, t1->type, off, stack_size); // recursive call of dgcsym1
+ proggenskip(g, *xoffset, fieldoffset - o);
+ *xoffset += fieldoffset - o;
+ gengcprog1(g, t1->type, xoffset);
o = fieldoffset + t1->type->width;
}
- *off += t->width - o;
+ proggenskip(g, *xoffset, t->width - o);
+ *xoffset += t->width - o;
break;
-
default:
- fatal("dgcsym1: unexpected type %T", t);
+ fatal("gengcprog1: unexpected type, %T", t);
}
-
- return ot;
-}
-
-static Sym*
-dgcsym(Type *t)
-{
- int ot;
- vlong off;
- Sym *s;
-
- s = typesymprefix(".gc", t);
- if(s->flags & SymGcgen)
- return s;
- s->flags |= SymGcgen;
-
- if(t->width == BADWIDTH)
- dowidth(t);
-
- ot = 0;
- off = 0;
- ot = duintptr(s, ot, t->width);
- ot = dgcsym1(s, ot, t, &off, 0);
- ot = duintptr(s, ot, GC_END);
- ggloblsym(s, ot, DUPOK|RODATA);
-
- if(t->align > 0)
- off = rnd(off, t->align);
- if(off != t->width)
- fatal("dgcsym: off=%lld, size=%lld, type %T", off, t->width, t);
-
- return s;
}