From c17ee649b04ecdbe973d13f4b6ef087947ba80ea Mon Sep 17 00:00:00 2001 From: Russ Cox Date: Wed, 1 Oct 2014 17:38:09 -0400 Subject: [dev.garbage] cmd/gc: never generate BitsMultiWord LGTM=rlh R=rlh, minux CC=golang-codereviews https://codereview.appspot.com/151940043 --- src/cmd/gc/reflect.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'src') diff --git a/src/cmd/gc/reflect.c b/src/cmd/gc/reflect.c index 4892ab757..e229b3075 100644 --- a/src/cmd/gc/reflect.c +++ b/src/cmd/gc/reflect.c @@ -1506,11 +1506,9 @@ gengcprog1(ProgGen *g, Type *t, vlong *xoffset) *xoffset += t->width; break; case TINTER: - proggendata(g, BitsMultiWord); - if(isnilinter(t)) - proggendata(g, BitsEface); - else - proggendata(g, BitsIface); + // Assuming IfacePointerOnly=1. + proggendata(g, BitsPointer); + proggendata(g, BitsPointer); *xoffset += t->width; break; case TARRAY: -- cgit v1.2.1 From 59a42ca068d7cbe0b92f2f68768129599a6c42f8 Mon Sep 17 00:00:00 2001 From: Russ Cox Date: Thu, 2 Oct 2014 14:26:04 -0400 Subject: [dev.garbage] runtime: remove another BitsMultiWord Not found because it was not used by name. Add name in comments for what's left behind. LGTM=rlh R=rlh CC=golang-codereviews https://codereview.appspot.com/148430043 --- src/cmd/gc/plive.c | 19 ++++++------------- src/runtime/gcinfo_test.go | 4 ++-- 2 files changed, 8 insertions(+), 15 deletions(-) (limited to 'src') diff --git a/src/cmd/gc/plive.c b/src/cmd/gc/plive.c index 0feb2c710..3bfa69b1f 100644 --- a/src/cmd/gc/plive.c +++ b/src/cmd/gc/plive.c @@ -1092,7 +1092,7 @@ twobitwalktype1(Type *t, vlong *xoffset, Bvec *bv) case TCOMPLEX64: case TCOMPLEX128: for(i = 0; i < t->width; i++) { - bvset(bv, ((*xoffset + i) / widthptr) * BitsPerPointer); // 1 = live scalar + bvset(bv, ((*xoffset + i) / widthptr) * BitsPerPointer); // 1 = live scalar (BitsScalar) } *xoffset += t->width; break; @@ -1105,7 +1105,7 @@ twobitwalktype1(Type *t, vlong *xoffset, Bvec *bv) case TMAP: if((*xoffset & (widthptr-1)) != 0) fatal("twobitwalktype1: invalid alignment, %T", t); - bvset(bv, (*xoffset / widthptr) * BitsPerPointer + 1); // 2 = live ptr + bvset(bv, (*xoffset / widthptr) * BitsPerPointer + 1); // 2 = live ptr (BitsPointer) *xoffset += t->width; break; @@ -1113,7 +1113,7 @@ twobitwalktype1(Type *t, vlong *xoffset, Bvec *bv) // struct { byte *str; intgo len; } if((*xoffset & (widthptr-1)) != 0) fatal("twobitwalktype1: invalid alignment, %T", t); - bvset(bv, (*xoffset / widthptr) * BitsPerPointer + 1); // 2 = live ptr in first slot + bvset(bv, (*xoffset / widthptr) * BitsPerPointer + 1); // 2 = live ptr in first slot (BitsPointer) *xoffset += t->width; break; @@ -1123,15 +1123,8 @@ twobitwalktype1(Type *t, vlong *xoffset, Bvec *bv) // struct { Type *type; union { void *ptr, uintptr val } data; } if((*xoffset & (widthptr-1)) != 0) fatal("twobitwalktype1: invalid alignment, %T", t); - bvset(bv, ((*xoffset / widthptr) * BitsPerPointer) + 0); - bvset(bv, ((*xoffset / widthptr) * BitsPerPointer) + 1); // 3 = multiword - // next word contains 2 = Iface, 3 = Eface - if(isnilinter(t)) { - bvset(bv, ((*xoffset / widthptr) * BitsPerPointer) + 2); - bvset(bv, ((*xoffset / widthptr) * BitsPerPointer) + 3); - } else { - bvset(bv, ((*xoffset / widthptr) * BitsPerPointer) + 3); - } + bvset(bv, (*xoffset / widthptr) * BitsPerPointer + 1); // 2 = live ptr in first slot (BitsPointer) + bvset(bv, (*xoffset / widthptr) * BitsPerPointer + 3); // 2 = live ptr in second slot (BitsPointer) *xoffset += t->width; break; @@ -1144,7 +1137,7 @@ twobitwalktype1(Type *t, vlong *xoffset, Bvec *bv) // struct { byte *array; uintgo len; uintgo cap; } if((*xoffset & (widthptr-1)) != 0) fatal("twobitwalktype1: invalid TARRAY alignment, %T", t); - bvset(bv, (*xoffset / widthptr) * BitsPerPointer + 1); // 2 = live ptr in first slot + bvset(bv, (*xoffset / widthptr) * BitsPerPointer + 1); // 2 = live ptr in first slot (BitsPointer) *xoffset += t->width; } else for(i = 0; i < t->bound; i++) diff --git a/src/runtime/gcinfo_test.go b/src/runtime/gcinfo_test.go index 88f6703f9..e74d8c2c0 100644 --- a/src/runtime/gcinfo_test.go +++ b/src/runtime/gcinfo_test.go @@ -188,6 +188,6 @@ var ( infoString = []byte{BitsPointer, BitsDead} infoSlice = []byte{BitsPointer, BitsDead, BitsDead} - infoEface = []byte{BitsMultiWord, BitsEface} - infoIface = []byte{BitsMultiWord, BitsIface} + infoEface = []byte{BitsPointer, BitsPointer} + infoIface = []byte{BitsPointer, BitsPointer} ) -- cgit v1.2.1 From 5fc8be8db6a4e7a0e43718e6b21d2b78e5aacf5f Mon Sep 17 00:00:00 2001 From: Russ Cox Date: Thu, 2 Oct 2014 16:49:11 -0400 Subject: [dev.garbage] runtime: make sure G.param and SudoG.elem do not hold stale pointers In old conservative Go, this could cause memory leaks. A new pickier collector might reasonably crash when it saw one of these. LGTM=rlh R=rlh CC=golang-codereviews https://codereview.appspot.com/147480043 --- src/runtime/chan.go | 11 +++++++++-- src/runtime/proc.go | 13 +++++++++++++ src/runtime/select.go | 7 +++++++ src/runtime/sema.go | 1 + 4 files changed, 30 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/runtime/chan.go b/src/runtime/chan.go index 48925b2e3..10503f4e1 100644 --- a/src/runtime/chan.go +++ b/src/runtime/chan.go @@ -140,10 +140,11 @@ func chansend(t *chantype, c *hchan, ep unsafe.Pointer, block bool, callerpc uin unlock(&c.lock) recvg := sg.g - recvg.param = unsafe.Pointer(sg) if sg.elem != nil { memmove(unsafe.Pointer(sg.elem), ep, uintptr(c.elemsize)) + sg.elem = nil } + recvg.param = unsafe.Pointer(sg) if sg.releasetime != 0 { sg.releasetime = cputicks() } @@ -179,6 +180,7 @@ func chansend(t *chantype, c *hchan, ep unsafe.Pointer, block bool, callerpc uin } panic("send on closed channel") } + gp.param = nil if mysg.releasetime > 0 { blockevent(int64(mysg.releasetime)-t0, 2) } @@ -278,6 +280,7 @@ func closechan(c *hchan) { break } gp := sg.g + sg.elem = nil gp.param = nil if sg.releasetime != 0 { sg.releasetime = cputicks() @@ -292,6 +295,7 @@ func closechan(c *hchan) { break } gp := sg.g + sg.elem = nil gp.param = nil if sg.releasetime != 0 { sg.releasetime = cputicks() @@ -372,6 +376,7 @@ func chanrecv(t *chantype, c *hchan, ep unsafe.Pointer, block bool) (selected, r if ep != nil { memmove(ep, sg.elem, uintptr(c.elemsize)) } + sg.elem = nil gp := sg.g gp.param = unsafe.Pointer(sg) if sg.releasetime != 0 { @@ -409,9 +414,11 @@ func chanrecv(t *chantype, c *hchan, ep unsafe.Pointer, block bool) (selected, r if mysg.releasetime > 0 { blockevent(mysg.releasetime-t0, 2) } + haveData := gp.param != nil + gp.param = nil releaseSudog(mysg) - if gp.param != nil { + if haveData { // a sender sent us some data. It already wrote to ep. selected = true received = true diff --git a/src/runtime/proc.go b/src/runtime/proc.go index 9b9586859..eefe8239f 100644 --- a/src/runtime/proc.go +++ b/src/runtime/proc.go @@ -148,6 +148,9 @@ func acquireSudog() *sudog { c := gomcache() s := c.sudogcache if s != nil { + if s.elem != nil { + gothrow("acquireSudog: found s.elem != nil in cache") + } c.sudogcache = s.next return s } @@ -162,12 +165,22 @@ func acquireSudog() *sudog { // which keeps the garbage collector from being invoked. mp := acquirem() p := new(sudog) + if p.elem != nil { + gothrow("acquireSudog: found p.elem != nil after new") + } releasem(mp) return p } //go:nosplit func releaseSudog(s *sudog) { + if s.elem != nil { + gothrow("runtime: sudog with non-nil elem") + } + gp := getg() + if gp.param != nil { + gothrow("runtime: releaseSudog with non-nil gp.param") + } c := gomcache() s.next = c.sudogcache c.sudogcache = s diff --git a/src/runtime/select.go b/src/runtime/select.go index 7716d2d4b..1bcea8c4b 100644 --- a/src/runtime/select.go +++ b/src/runtime/select.go @@ -368,6 +368,7 @@ loop: // someone woke us up sellock(sel) sg = (*sudog)(gp.param) + gp.param = nil // pass 3 - dequeue from unsuccessful chans // otherwise they stack up on quiet channels @@ -376,6 +377,10 @@ loop: // iterating through the linked list they are in reverse order. cas = nil sglist = gp.waiting + // Clear all elem before unlinking from gp.waiting. + for sg1 := gp.waiting; sg1 != nil; sg1 = sg1.waitlink { + sg1.elem = nil + } gp.waiting = nil for i := int(sel.ncase) - 1; i >= 0; i-- { k = &scases[pollorder[i]] @@ -506,6 +511,7 @@ syncrecv: if cas.elem != nil { memmove(cas.elem, sg.elem, uintptr(c.elemsize)) } + sg.elem = nil gp = sg.g gp.param = unsafe.Pointer(sg) if sg.releasetime != 0 { @@ -541,6 +547,7 @@ syncsend: if sg.elem != nil { memmove(sg.elem, cas.elem, uintptr(c.elemsize)) } + sg.elem = nil gp = sg.g gp.param = unsafe.Pointer(sg) if sg.releasetime != 0 { diff --git a/src/runtime/sema.go b/src/runtime/sema.go index beacd6716..142d3082c 100644 --- a/src/runtime/sema.go +++ b/src/runtime/sema.go @@ -168,6 +168,7 @@ func (root *semaRoot) dequeue(s *sudog) { } else { root.head = s.next } + s.elem = nil s.next = nil s.prev = nil } -- cgit v1.2.1 From 4eb6792aa572c7e6d3448d4cf22223b61b65724f Mon Sep 17 00:00:00 2001 From: Rick Hudson Date: Fri, 3 Oct 2014 11:33:57 -0400 Subject: [dev.garbage] runtime: scan and mark phase refactoring Refactoring of the scan and mark phase so that concurrent GC, in particular the write barrier, can share a common infrastructure. Now that the scan and mark phases have been separated we will be able to scan stacks without blackening any objects. This in turn will allow us to delay installing expensive write barrier code. LGTM=rsc R=rsc, khr, dvyukov CC=golang-codereviews https://codereview.appspot.com/145640044 Committer: Russ Cox --- src/runtime/malloc.h | 10 +- src/runtime/mgc0.c | 627 ++++++++++++++++++++++++++++++-------------------- src/runtime/proc.c | 3 +- src/runtime/runtime.h | 27 ++- 4 files changed, 404 insertions(+), 263 deletions(-) (limited to 'src') diff --git a/src/runtime/malloc.h b/src/runtime/malloc.h index 3f1981f70..413870c9f 100644 --- a/src/runtime/malloc.h +++ b/src/runtime/malloc.h @@ -86,6 +86,7 @@ typedef struct MSpan MSpan; typedef struct MStats MStats; typedef struct MLink MLink; typedef struct GCStats GCStats; +typedef struct Workbuf Workbuf; enum { @@ -337,8 +338,11 @@ struct MCache StackFreeList stackcache[NumStackOrders]; SudoG* sudogcache; - - void* gcworkbuf; + // Cached P local buffer holding grey objects (marked by not yet scanned) + // Used by mutator for write barrier work. + // GC uses the mcache of the P it is running on for stack and global scanning + // work as well marking. + Workbuf* gcworkbuf; // Local allocator stats, flushed during GC. uintptr local_nlookup; // number of pointer lookups @@ -350,7 +354,7 @@ struct MCache MSpan* runtime·MCache_Refill(MCache *c, int32 sizeclass); void runtime·MCache_ReleaseAll(MCache *c); void runtime·stackcache_clear(MCache *c); -void runtime·gcworkbuffree(void *b); +void runtime·gcworkbuffree(Workbuf *b); enum { diff --git a/src/runtime/mgc0.c b/src/runtime/mgc0.c index 7a3498ae1..b4cd3474d 100644 --- a/src/runtime/mgc0.c +++ b/src/runtime/mgc0.c @@ -66,7 +66,6 @@ enum { Debug = 0, ConcurrentSweep = 1, - WorkbufSize = 4*1024, FinBlockSize = 4*1024, RootData = 0, RootBss = 1, @@ -97,12 +96,12 @@ extern int32 runtime·gcpercent; // uint32 runtime·worldsema = 1; -typedef struct Workbuf Workbuf; -struct Workbuf -{ - LFNode node; // must be first - uintptr nobj; - byte* obj[(WorkbufSize-sizeof(LFNode)-sizeof(uintptr))/PtrSize]; +typedef struct Markbits Markbits; +struct Markbits { + byte *bitp; // pointer to the byte holding xbits + byte shift; // bits xbits needs to be shifted to get bits + byte xbits; // byte holding all the bits from *bitp + byte bits; // bits relevant to corresponding slot. }; extern byte runtime·data[]; @@ -127,15 +126,22 @@ BitVector runtime·gcbssmask; Mutex runtime·gclock; +static Workbuf* getpartial(void); +static void putpartial(Workbuf*); static Workbuf* getempty(Workbuf*); static Workbuf* getfull(Workbuf*); static void putempty(Workbuf*); static Workbuf* handoff(Workbuf*); static void gchelperstart(void); static void flushallmcaches(void); -static bool scanframe(Stkframe *frame, void *unused); -static void scanstack(G *gp); -static BitVector unrollglobgcprog(byte *prog, uintptr size); +static bool scanframe(Stkframe*, void*); +static void scanstack(G*); +static BitVector unrollglobgcprog(byte*, uintptr); +static void scanblock(byte*, uintptr, byte*); +static byte* objectstart(byte*, Markbits*); +static Workbuf* greyobject(byte*, Markbits*, Workbuf*); +static bool inheap(byte*); +static void slottombits(byte*, Markbits*); void runtime·bgsweep(void); static FuncVal bgsweepv = {runtime·bgsweep}; @@ -156,258 +162,279 @@ static struct { uint32 nspan; } work; -// scanblock scans a block of n bytes starting at pointer b for references -// to other objects, scanning any it finds recursively until there are no -// unscanned objects left. Instead of using an explicit recursion, it keeps -// a work list in the Workbuf* structures and loops in the main function -// body. Keeping an explicit work list is easier on the stack allocator and -// more efficient. +// Is address b in the known heap. If it doesn't have a valid gcmap +// returns false. For example pointers into stacks will return false. +static bool +inheap(byte *b) +{ + MSpan *s; + pageID k; + uintptr x; + + if(b == nil || b < runtime·mheap.arena_start || b >= runtime·mheap.arena_used) + return false; + // Not a beginning of a block, consult span table to find the block beginning. + k = (uintptr)b>>PageShift; + x = k; + x -= (uintptr)runtime·mheap.arena_start>>PageShift; + s = runtime·mheap.spans[x]; + if(s == nil || k < s->start || b >= s->limit || s->state != MSpanInUse) + return false; + return true; +} + +// Given an address in the heap return the relevant byte from the gcmap. This routine +// can be used on addresses to the start of an object or to the interior of the an object. static void -scanblock(byte *b, uintptr n, byte *ptrmask) +slottombits(byte *obj, Markbits *mbits) { - byte *obj, *p, *arena_start, *arena_used, **wp, *scanbuf[8], *ptrbitp, *bitp, bits, xbits, shift, cached; - uintptr i, nobj, size, idx, x, off, scanbufpos; - intptr ncached; - Workbuf *wbuf; - Iface *iface; - Eface *eface; - Type *typ; + uintptr off; + + off = (uintptr*)((uintptr)obj&~(PtrSize-1)) - (uintptr*)runtime·mheap.arena_start; + mbits->bitp = runtime·mheap.arena_start - off/wordsPerBitmapByte - 1; + mbits->shift = (off % wordsPerBitmapByte) * gcBits; + mbits->xbits = *mbits->bitp; + mbits->bits = (mbits->xbits >> mbits->shift) & bitMask; +} + +// b is a pointer into the heap. +// Find the start of the object refered to by b. +// Set mbits to the associated bits from the bit map. +static byte* +objectstart(byte *b, Markbits *mbits) +{ + byte *obj, *p; MSpan *s; pageID k; - bool keepworking; + uintptr x, size, idx; - // Cache memory arena parameters in local vars. - arena_start = runtime·mheap.arena_start; - arena_used = runtime·mheap.arena_used; + obj = (byte*)((uintptr)b&~(PtrSize-1)); + for(;;) { + slottombits(obj, mbits); + if(mbits->bits&bitBoundary == bitBoundary) + break; + + // Not a beginning of a block, consult span table to find the block beginning. + k = (uintptr)obj>>PageShift; + x = k; + x -= (uintptr)runtime·mheap.arena_start>>PageShift; + s = runtime·mheap.spans[x]; + if(s == nil || k < s->start || obj >= s->limit || s->state != MSpanInUse){ + if(s->state == MSpanStack) + break; // This is legit. + + // The following is catching some bugs left over from + // us not being rigerous about what data structures are + // hold valid pointers and different parts of the system + // considering different structures as roots. For example + // if there is a pointer into a stack that is left in + // a global data structure but that part of the runtime knows that + // those structures will be reinitialized before they are + // reused. Unfortunately the GC believes these roots are valid. + // Typically a stack gets moved and only the structures that part of + // the system knows are alive are updated. The span is freed + // after the stack copy and the pointer is still alive. This + // check is catching that bug but for now we will not throw, + // instead we will simply break out of this routine and depend + // on the caller to recognize that this pointer is not a valid + // heap pointer. I leave the code that catches the bug so that once + // resolved we can turn this check back on and throw. + + //runtime·printf("Runtime: Span weird: obj=%p, k=%p", obj, k); + //if (s == nil) + // runtime·printf(" s=nil\n"); + //else + // runtime·printf(" s->start=%p s->limit=%p, s->state=%d\n", s->start*PageSize, s->limit, s->state); + //runtime·throw("Blowup on weird span"); + break; // We are not in a real block throw?? + } + p = (byte*)((uintptr)s->start<sizeclass != 0) { + size = s->elemsize; + idx = ((byte*)obj - p)/size; + p = p+idx*size; + } + if(p == obj) { + runtime·printf("runtime: failed to find block beginning for %p s=%p s->limit=%p\n", + p, s->start*PageSize, s->limit); + runtime·throw("failed to find block beginning"); + } + obj = p; + } + // if size(obj.firstfield) < PtrSize, the &obj.secondfield could map to the boundary bit + // Clear any low bits to get to the start of the object. + // greyobject depends on this. + return obj; +} - wbuf = getempty(nil); - nobj = wbuf->nobj; - wp = &wbuf->obj[nobj]; - keepworking = b == nil; - scanbufpos = 0; - for(i = 0; i < nelem(scanbuf); i++) - scanbuf[i] = nil; +// obj is the start of an object with mark mbits. +// If it isn't already marked, mark it and enqueue into workbuf. +// Return possibly new workbuf to use. +static Workbuf* +greyobject(byte *obj, Markbits *mbits, Workbuf *wbuf) +{ + // obj should be start of allocation, and so must be at least pointer-aligned. + if(((uintptr)obj & (PtrSize-1)) != 0) + runtime·throw("greyobject: obj not pointer-aligned"); + + // If marked we have nothing to do. + if((mbits->bits&bitMarked) != 0) + return wbuf; + + // Each byte of GC bitmap holds info for two words. + // If the current object is larger than two words, or if the object is one word + // but the object it shares the byte with is already marked, + // then all the possible concurrent updates are trying to set the same bit, + // so we can use a non-atomic update. + if((mbits->xbits&(bitMask|(bitMask<bitp = mbits->xbits | (bitMarked<shift); + else + runtime·atomicor8(mbits->bitp, bitMarked<shift); + + if(((mbits->xbits>>(mbits->shift+2))&BitsMask) == BitsDead) + return wbuf; // noscan object + + // Queue the obj for scanning. The PREFETCH(obj) logic has been removed but + // seems like a nice optimization that can be added back in. + // There needs to be time between the PREFETCH and the use. + // Previously we put the obj in an 8 element buffer that is drained at a rate + // to give the PREFETCH time to do its work. + // Use of PREFETCHNTA might be more appropriate than PREFETCH + + // If workbuf is full, obtain an empty one. + if(wbuf->nobj >= nelem(wbuf->obj)) { + wbuf = getempty(wbuf); + } + + wbuf->obj[wbuf->nobj] = obj; + wbuf->nobj++; + return wbuf; +} +// Scan the object b of size n, adding pointers to wbuf. +// Return possibly new wbuf to use. +// If ptrmask != nil, it specifies where pointers are in b. +// If ptrmask == nil, the GC bitmap should be consulted. +// In this case, n may be an overestimate of the size; the GC bitmap +// must also be used to make sure the scan stops at the end of b. +static Workbuf* +scanobject(byte *b, uintptr n, byte *ptrmask, Workbuf *wbuf) +{ + byte *obj, *arena_start, *arena_used, *ptrbitp, bits, cshift, cached; + uintptr i; + intptr ncached; + Markbits mbits; + + arena_start = (byte*)runtime·mheap.arena_start; + arena_used = runtime·mheap.arena_used; ptrbitp = nil; cached = 0; ncached = 0; + // Find bits of the beginning of the object. + if(ptrmask == nil) { + b = objectstart(b, &mbits); + ptrbitp = mbits.bitp; //arena_start - off/wordsPerBitmapByte - 1; + cshift = mbits.shift; //(off % wordsPerBitmapByte) * gcBits; + cached = *ptrbitp >> cshift; + cached &= ~bitBoundary; + ncached = (8 - cshift)/gcBits; + } + for(i = 0; i < n; i += PtrSize) { + // Find bits for this word. + if(ptrmask != nil) { + // dense mask (stack or data) + bits = (ptrmask[(i/PtrSize)/4]>>(((i/PtrSize)%4)*BitsPerPointer))&BitsMask; + } else { + // Check if we have reached end of span. + if((((uintptr)b+i)%PageSize) == 0 && + runtime·mheap.spans[(b-arena_start)>>PageShift] != runtime·mheap.spans[(b+i-arena_start)>>PageShift]) + break; + // Consult GC bitmap. + if(ncached <= 0) { + // Refill cache. + cached = *--ptrbitp; + ncached = 2; + } + bits = cached; + cached >>= gcBits; + ncached--; + + if((bits&bitBoundary) != 0) + break; // reached beginning of the next object + bits = (bits>>2)&BitsMask; + if(bits == BitsDead) + break; // reached no-scan part of the object + } + + if(bits == BitsScalar || bits == BitsDead) + continue; + if(bits != BitsPointer) + runtime·throw("unexpected garbage collection bits"); + + obj = *(byte**)(b+i); + // At this point we have extracted the next potential pointer. + // Check if it points into heap. + if(obj == nil || obj < arena_start || obj >= arena_used) + continue; + // Mark the object. return some important bits. + // We we combine the following two rotines we don't have to pass mbits or obj around. + obj = objectstart(obj, &mbits); + wbuf = greyobject(obj, &mbits, wbuf); + } + return wbuf; +} + +// scanblock starts by scanning b as scanobject would. +// If the gcphase is GCscan, that's all scanblock does. +// Otherwise it traverses some fraction of the pointers it found in b, recursively. +// As a special case, scanblock(nil, 0, nil) means to scan previously queued work, +// stopping only when no work is left in the system. +static void +scanblock(byte *b, uintptr n, byte *ptrmask) +{ + Workbuf *wbuf; + bool keepworking; + + wbuf = getpartial(); + if(b != nil) { + wbuf = scanobject(b, n, ptrmask, wbuf); + if(runtime·gcphase == GCscan) { + putpartial(wbuf); + return; + } + } + + keepworking = b == nil; + // ptrmask can have 2 possible values: // 1. nil - obtain pointer mask from GC bitmap. // 2. pointer to a compact mask (for stacks and data). - if(b != nil) - goto scanobj; for(;;) { - if(nobj == 0) { - // Out of work in workbuf. - // First, see is there is any work in scanbuf. - for(i = 0; i < nelem(scanbuf); i++) { - b = scanbuf[scanbufpos]; - scanbuf[scanbufpos++] = nil; - if(scanbufpos == nelem(scanbuf)) - scanbufpos = 0; - if(b != nil) { - n = arena_used - b; // scan until bitBoundary or BitsDead - ptrmask = nil; // use GC bitmap for pointer info - goto scanobj; - } - } + if(wbuf->nobj == 0) { if(!keepworking) { putempty(wbuf); return; } // Refill workbuf from global queue. wbuf = getfull(wbuf); - if(wbuf == nil) + if(wbuf == nil) // nil means out of work barrier reached return; - nobj = wbuf->nobj; - wp = &wbuf->obj[nobj]; } // If another proc wants a pointer, give it some. - if(work.nwait > 0 && nobj > 4 && work.full == 0) { - wbuf->nobj = nobj; + if(work.nwait > 0 && wbuf->nobj > 4 && work.full == 0) { wbuf = handoff(wbuf); - nobj = wbuf->nobj; - wp = &wbuf->obj[nobj]; - } - - wp--; - nobj--; - b = *wp; - n = arena_used - b; // scan until next bitBoundary or BitsDead - ptrmask = nil; // use GC bitmap for pointer info - - scanobj: - // Find bits of the beginning of the object. - if(ptrmask == nil) { - off = (uintptr*)b - (uintptr*)arena_start; - ptrbitp = arena_start - off/wordsPerBitmapByte - 1; - shift = (off % wordsPerBitmapByte) * gcBits; - cached = *ptrbitp >> shift; - cached &= ~bitBoundary; - ncached = (8 - shift)/gcBits; - } - for(i = 0; i < n; i += PtrSize) { - obj = nil; - // Find bits for this word. - if(ptrmask == nil) { - // Check is we have reached end of span. - if((((uintptr)b+i)%PageSize) == 0 && - runtime·mheap.spans[(b-arena_start)>>PageShift] != runtime·mheap.spans[(b+i-arena_start)>>PageShift]) - break; - // Consult GC bitmap. - if(ncached <= 0) { - // Refill cache. - cached = *--ptrbitp; - ncached = 2; - } - bits = cached; - cached >>= gcBits; - ncached--; - if((bits&bitBoundary) != 0) - break; // reached beginning of the next object - bits = (bits>>2)&BitsMask; - if(bits == BitsDead) - break; // reached no-scan part of the object - } else // dense mask (stack or data) - bits = (ptrmask[(i/PtrSize)/4]>>(((i/PtrSize)%4)*BitsPerPointer))&BitsMask; - - if(bits == BitsScalar || bits == BitsDead) - continue; - if(bits == BitsPointer) { - obj = *(byte**)(b+i); - goto markobj; - } - - // With those three out of the way, must be multi-word. - if(bits != BitsMultiWord) - runtime·throw("unexpected garbage collection bits"); - // Find the next pair of bits. - if(ptrmask == nil) { - if(ncached <= 0) { - // Refill cache. - cached = *--ptrbitp; - ncached = 2; - } - bits = (cached>>2)&BitsMask; - } else - bits = (ptrmask[((i+PtrSize)/PtrSize)/4]>>((((i+PtrSize)/PtrSize)%4)*BitsPerPointer))&BitsMask; - - switch(bits) { - default: - runtime·throw("unexpected garbage collection bits"); - case BitsIface: - iface = (Iface*)(b+i); - if(iface->tab != nil) { - typ = iface->tab->type; - if(!(typ->kind&KindDirectIface) || !(typ->kind&KindNoPointers)) - obj = iface->data; - } - break; - case BitsEface: - eface = (Eface*)(b+i); - typ = eface->type; - if(typ != nil) { - if(!(typ->kind&KindDirectIface) || !(typ->kind&KindNoPointers)) - obj = eface->data; - } - break; - } - - i += PtrSize; - cached >>= gcBits; - ncached--; - - markobj: - // At this point we have extracted the next potential pointer. - // Check if it points into heap. - if(obj == nil || obj < arena_start || obj >= arena_used) - continue; - // Mark the object. - off = (uintptr*)obj - (uintptr*)arena_start; - bitp = arena_start - off/wordsPerBitmapByte - 1; - shift = (off % wordsPerBitmapByte) * gcBits; - xbits = *bitp; - bits = (xbits >> shift) & bitMask; - if((bits&bitBoundary) == 0) { - // Not a beginning of a block, consult span table to find the block beginning. - k = (uintptr)obj>>PageShift; - x = k; - x -= (uintptr)arena_start>>PageShift; - s = runtime·mheap.spans[x]; - if(s == nil || k < s->start || obj >= s->limit || s->state != MSpanInUse) - continue; - p = (byte*)((uintptr)s->start<sizeclass != 0) { - size = s->elemsize; - idx = ((byte*)obj - p)/size; - p = p+idx*size; - } - if(p == obj) { - runtime·printf("runtime: failed to find block beginning for %p s=%p s->limit=%p\n", - p, s->start*PageSize, s->limit); - runtime·throw("failed to find block beginning"); - } - obj = p; - goto markobj; - } - - // Now we have bits, bitp, and shift correct for - // obj pointing at the base of the object. - // Only care about not marked objects. - if((bits&bitMarked) != 0) - continue; - // If obj size is greater than 8, then each byte of GC bitmap - // contains info for at most one object. In such case we use - // non-atomic byte store to mark the object. This can lead - // to double enqueue of the object for scanning, but scanning - // is an idempotent operation, so it is OK. This cannot lead - // to bitmap corruption because the single marked bit is the - // only thing that can change in the byte. - // For 8-byte objects we use non-atomic store, if the other - // quadruple is already marked. Otherwise we resort to CAS - // loop for marking. - if((xbits&(bitMask|(bitMask<>(shift+2))&BitsMask) == BitsDead) - continue; // noscan object - - // Queue the obj for scanning. - PREFETCH(obj); - obj = (byte*)((uintptr)obj & ~(PtrSize-1)); - p = scanbuf[scanbufpos]; - scanbuf[scanbufpos++] = obj; - if(scanbufpos == nelem(scanbuf)) - scanbufpos = 0; - if(p == nil) - continue; - - // If workbuf is full, obtain an empty one. - if(nobj >= nelem(wbuf->obj)) { - wbuf->nobj = nobj; - wbuf = getempty(wbuf); - nobj = wbuf->nobj; - wp = &wbuf->obj[nobj]; - } - *wp = p; - wp++; - nobj++; } - if(Debug && ptrmask == nil) { - // For heap objects ensure that we did not overscan. - n = 0; - p = nil; - if(!runtime·mlookup(b, &p, &n, nil) || b != p || i > n) { - runtime·printf("runtime: scanned (%p,%p), heap object (%p,%p)\n", b, i, p, n); - runtime·throw("scanblock: scanned invalid object"); - } - } + // This might be a good place to add prefetch code... + // if(wbuf->nobj > 4) { + // PREFETCH(wbuf->obj[wbuf->nobj - 3]; + // } + --wbuf->nobj; + b = wbuf->obj[wbuf->nobj]; + wbuf = scanobject(b, runtime·mheap.arena_used - b, nil, wbuf); } } @@ -460,7 +487,8 @@ markroot(ParFor *desc, uint32 i) spf = (SpecialFinalizer*)sp; // A finalizer can be set for an inner byte of an object, find object beginning. p = (void*)((s->start << PageShift) + spf->special.offset/s->elemsize*s->elemsize); - scanblock(p, s->elemsize, nil); + if(runtime·gcphase != GCscan) + scanblock(p, s->elemsize, nil); // Scanned during mark phase scanblock((void*)&spf->fn, PtrSize, oneptr); } } @@ -477,7 +505,7 @@ markroot(ParFor *desc, uint32 i) gp = runtime·allg[i - RootCount]; // remember when we've first observed the G blocked // needed only to output in traceback - status = runtime·readgstatus(gp); + status = runtime·readgstatus(gp); // We are not in a scan state if((status == Gwaiting || status == Gsyscall) && gp->waitsince == 0) gp->waitsince = work.tstart; // Shrink a stack if not much of it is being used. @@ -487,7 +515,31 @@ markroot(ParFor *desc, uint32 i) else gp->gcworkdone = false; restart = runtime·stopg(gp); - scanstack(gp); + + // goroutine will scan its own stack when it stops running. + // Wait until it has. + while((status = runtime·readgstatus(gp)) == Grunning && !gp->gcworkdone) { + if(status == Gdead) { + // TBD you need to explain why Gdead without gp->gcworkdone + // being true. If there is a race then it needs to be + // explained here. + gp->gcworkdone = true; // scan is a noop + break; + //do nothing, scan not needed. + } + // try again + } + + // scanstack(gp); now done as part of gcphasework + // But to make sure we finished we need to make sure that + // the stack traps have all responded so drop into + // this while loop until they respond. + if(!gp->gcworkdone) + // For some reason a G has not completed its work. This is a bug that + // needs to be investigated. For now I'll just print this message in + // case the bug is benign. + runtime·printf("runtime:markroot: post stack scan work not done gp=%p has status %x\n", gp, status); + if(restart) runtime·restartg(gp); break; @@ -511,8 +563,12 @@ getempty(Workbuf *b) } if(b == nil) b = (Workbuf*)runtime·lfstackpop(&work.empty); - if(b == nil) + if(b == nil) { b = runtime·persistentalloc(sizeof(*b), CacheLineSize, &mstats.gc_sys); + b->nobj = 0; + } + if(b->nobj != 0) + runtime·throw("getempty: b->nobj not 0/n"); b->nobj = 0; return b; } @@ -522,6 +578,8 @@ putempty(Workbuf *b) { MCache *c; + if(b->nobj != 0) + runtime·throw("putempty: b->nobj=%D not 0\n"); c = g->m->mcache; if(c->gcworkbuf == nil) { c->gcworkbuf = b; @@ -530,21 +588,70 @@ putempty(Workbuf *b) runtime·lfstackpush(&work.empty, &b->node); } +// Get an partially empty work buffer from the mcache structure +// and if non is available get an empty one. +static Workbuf* +getpartial(void) +{ + MCache *c; + Workbuf *b; + + c = g->m->mcache; + if(c->gcworkbuf != nil) { + b = c->gcworkbuf; + c->gcworkbuf = nil; + } else { + b = getempty(nil); + } + return b; +} + +static void +putpartial(Workbuf *b) +{ + MCache *c; + + c = g->m->mcache; + if(c->gcworkbuf == nil) { + c->gcworkbuf = b; + return; + } + + runtime·throw("putpartial: c->gcworkbuf is not nil\n"); + + runtime·lfstackpush(&work.full, &b->node); +} + void -runtime·gcworkbuffree(void *b) +runtime·gcworkbuffree(Workbuf *b) { - if(b != nil) + if(b != nil) { + if(b->nobj != 0) + runtime·throw("gcworkbufferfree: b->nobj not 0\n"); putempty(b); + } } + // Get a full work buffer off the work.full list, or return nil. +// getfull acts as a barrier for work.nproc helpers. As long as one +// gchelper is actively marking objects it +// may create a workbuffer that the other helpers can work on. +// The for loop either exits when a work buffer is found +// or when _all_ of the work.nproc gc helpers are in the loop +// looking for work and thus not capable of creating new work. +// This is in fact the termination condition for the STW mark +// phase. static Workbuf* getfull(Workbuf *b) { int32 i; - if(b != nil) + if(b != nil) { + if(b->nobj != 0) + runtime·printf("runtime:getfull: b->nobj=%D not 0.", b->nobj); runtime·lfstackpush(&work.empty, &b->node); + } b = (Workbuf*)runtime·lfstackpop(&work.full); if(b != nil || work.nproc == 1) return b; @@ -674,7 +781,7 @@ scanframe(Stkframe *frame, void *unused) } bv = runtime·stackmapdata(stackmap, pcdata); } - scanblock((byte*)frame->argp, bv.n/BitsPerPointer*PtrSize, bv.bytedata); + scanblock((byte*)frame->argp, bv.n/BitsPerPointer*PtrSize, bv.bytedata); } return true; } @@ -727,12 +834,23 @@ runtime·gcphasework(G *gp) case GCquiesce: case GCstw: case GCsweep: - // No work for now. + // No work. + break; + case GCscan: + // scan the stack, mark the objects, put pointers in work buffers + // hanging off the P where this is being run. + scanstack(gp); break; case GCmark: + case GCmarktermination: + // // Disabled until concurrent GC is implemented // but indicate the scan has been done. - // scanstack(gp); + scanstack(gp); + // scanstack will call shade which will populate + // the Workbuf. + // emptywbuf(gp) will empty it before returning + // break; } gp->gcworkdone = true; @@ -1108,6 +1226,7 @@ runtime·gosweepdone(void) return runtime·mheap.sweepdone; } + void runtime·gchelper(void) { @@ -1118,10 +1237,8 @@ runtime·gchelper(void) // parallel mark for over gc roots runtime·parfordo(work.markfor); - - // help other threads scan secondary blocks - scanblock(nil, 0, nil); - + if(runtime·gcphase != GCscan) + scanblock(nil, 0, nil); // blocks in getfull nproc = work.nproc; // work.nproc can change right after we increment work.ndone if(runtime·xadd(&work.ndone, +1) == nproc-1) runtime·notewakeup(&work.alldone); @@ -1288,6 +1405,7 @@ runtime·gcinit(void) runtime·gcbssmask = unrollglobgcprog(runtime·gcbss, runtime·ebss - runtime·bss); } +// Called from malloc.go using onM, stopping and starting the world handled in caller. void runtime·gc_m(void) { @@ -1311,7 +1429,8 @@ gc(struct gc_args *args) int64 t0, t1, t2, t3, t4; uint64 heap0, heap1, obj; GCStats stats; - + uint32 oldphase; + if(runtime·debug.allocfreetrace) runtime·tracegc(); @@ -1327,7 +1446,7 @@ gc(struct gc_args *args) while(runtime·sweepone() != -1) runtime·sweep.npausesweep++; - // Cache runtime.mheap.allspans in work.spans to avoid conflicts with + // Cache runtime·mheap.allspans in work.spans to avoid conflicts with // resizing/freeing allspans. // New spans can be created while GC progresses, but they are not garbage for // this round: @@ -1344,10 +1463,13 @@ gc(struct gc_args *args) work.spans = runtime·mheap.allspans; work.nspan = runtime·mheap.nspan; runtime·unlock(&runtime·mheap.lock); + oldphase = runtime·gcphase; work.nwait = 0; work.ndone = 0; - work.nproc = runtime·gcprocs(); + work.nproc = runtime·gcprocs(); + runtime·gcphase = GCmark; //^^ vv + runtime·parforsetup(work.markfor, work.nproc, RootCount + runtime·allglen, nil, false, markroot); if(work.nproc > 1) { runtime·noteclear(&work.alldone); @@ -1360,8 +1482,9 @@ gc(struct gc_args *args) gchelperstart(); runtime·parfordo(work.markfor); - scanblock(nil, 0, nil); + scanblock(nil, 0, nil); + runtime·gcphase = oldphase; //^^ vv t3 = 0; if(runtime·debug.gctrace) t3 = runtime·nanotime(); diff --git a/src/runtime/proc.c b/src/runtime/proc.c index 25f916640..1f1044d1d 100644 --- a/src/runtime/proc.c +++ b/src/runtime/proc.c @@ -623,9 +623,10 @@ mquiesce(G *gpmaster) uint32 status; uint32 activeglen; - activeglen = runtime·allglen; // enqueue the calling goroutine. runtime·restartg(gpmaster); + + activeglen = runtime·allglen; for(i = 0; i < activeglen; i++) { gp = runtime·allg[i]; if(runtime·readgstatus(gp) == Gdead) diff --git a/src/runtime/runtime.h b/src/runtime/runtime.h index adc74cf41..74d7ba4f5 100644 --- a/src/runtime/runtime.h +++ b/src/runtime/runtime.h @@ -93,6 +93,7 @@ typedef struct PollDesc PollDesc; typedef struct DebugVars DebugVars; typedef struct ForceGCState ForceGCState; typedef struct Stack Stack; +typedef struct Workbuf Workbuf; /* * Per-CPU declaration. @@ -303,7 +304,7 @@ struct G bool paniconfault; // panic (instead of crash) on unexpected fault address bool preemptscan; // preempted g does scan for GC bool gcworkdone; // debug: cleared at begining of gc work phase cycle, set by gcphasework, tested at end of cycle - bool throwsplit; // must not split stack + bool throwsplit; // must not split stack int8 raceignore; // ignore race detection events M* m; // for debuggers, but offset not hard-coded M* lockedm; @@ -561,6 +562,16 @@ struct ParFor uint64 nsleep; }; +enum { + WorkbufSize = 4*1024, +}; +struct Workbuf +{ + LFNode node; // must be first + uintptr nobj; + byte* obj[(WorkbufSize-sizeof(LFNode)-sizeof(uintptr))/PtrSize]; +}; + // Track memory allocated by code not written in Go during a cgo call, // so that the garbage collector can see them. struct CgoMal @@ -583,12 +594,14 @@ struct DebugVars // Indicates to write barrier and sychronization task to preform. enum -{ // Synchronization Write barrier - GCoff, // stop and start nop - GCquiesce, // stop and start nop - GCstw, // stop the ps nop - GCmark, // scan the stacks and start no white to black - GCsweep, // stop and start nop +{ // Action WB installation + GCoff = 0, // stop and start no wb + GCquiesce, // stop and start no wb + GCstw, // stop the ps nop + GCscan, // scan the stacks prior to marking + GCmark, // mark use wbufs from GCscan and globals, scan the stacks, then go to GCtermination + GCmarktermination, // mark termination detection. Allocate black, Ps help out GC + GCsweep, // stop and start nop }; struct ForceGCState -- cgit v1.2.1 From af64ab79c0ad72decf083d78cf54257d009741b5 Mon Sep 17 00:00:00 2001 From: Rick Hudson Date: Tue, 14 Oct 2014 09:51:46 -0400 Subject: [dev.garbage] runtime: Write barrier code. Comments lay out the concurrent GC algorithms. This CL implements parts of the algorithm. The acknowledgement code has been removed from this CL LGTM=rsc, dvyukov R=dvyukov, rsc CC=golang-codereviews https://codereview.appspot.com/151540043 --- src/runtime/mgc0.c | 145 +++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 129 insertions(+), 16 deletions(-) (limited to 'src') diff --git a/src/runtime/mgc0.c b/src/runtime/mgc0.c index 39fae9bbe..dabd38a60 100644 --- a/src/runtime/mgc0.c +++ b/src/runtime/mgc0.c @@ -4,22 +4,73 @@ // Garbage collector (GC). // -// GC is: -// - mark&sweep -// - mostly precise (with the exception of some C-allocated objects, assembly frames/arguments, etc) -// - parallel (up to MaxGcproc threads) -// - partially concurrent (mark is stop-the-world, while sweep is concurrent) -// - non-moving/non-compacting -// - full (non-partial) +// The GC runs concurrently with mutator threads, is type accurate (aka precise), allows multiple GC +// thread to run in parallel. It is a concurrent mark and sweep that uses a write barrier. It is +// non-generational and non-compacting. Allocation is done using size segregated per P allocation +// areas to minimize fragmentation while eliminating locks in the common case. // -// GC rate. -// Next GC is after we've allocated an extra amount of memory proportional to -// the amount already in use. The proportion is controlled by GOGC environment variable -// (100 by default). If GOGC=100 and we're using 4M, we'll GC again when we get to 8M -// (this mark is tracked in next_gc variable). This keeps the GC cost in linear -// proportion to the allocation cost. Adjusting GOGC just changes the linear constant -// (and also the amount of extra memory used). +// The algorithm decomposes into several steps. +// This is a high level description of the algorithm being used. For an overview of GC a good +// place to start is Richard Jones' gchandbook.org. +// +// The algorithm's intellectual heritage includes Dijkstra's on-the-fly algorithm, see +// Edsger W. Dijkstra, Leslie Lamport, A. J. Martin, C. S. Scholten, and E. F. M. Steffens. 1978. +// On-the-fly garbage collection: an exercise in cooperation. Commun. ACM 21, 11 (November 1978), 966-975. +// For journal quality proofs that these steps are complete, correct, and terminate see +// Hudson, R., and Moss, J.E.B. Copying Garbage Collection without stopping the world. +// Concurrency and Computation: Practice and Experience 15(3-5), 2003. // +// 0. Set phase = GCscan from GCoff. +// 1. Wait for all P's to acknowledge phase change. +// At this point all goroutines have passed through a GC safepoint and +// know we are in the GCscan phase. +// 2. GC scans all goroutine stacks, mark and enqueues all encountered pointers +// (marking avoids most duplicate enqueuing but races may produce duplication which is benign). +// Preempted goroutines are scanned before P schedules next goroutine. +// 3. Set phase = GCmark. +// 4. Wait for all P's to acknowledge phase change. +// 5. Now write barrier marks and enqueues black or grey to white pointers. If a pointer is +// stored into a white slot, such pointer is not marked. +// Malloc still allocates white (non-marked) objects. +// 6. Meanwhile GC transitively walks the heap marking reachable objects. +// 7. When GC finishes marking heap, it preempts P's one-by-one and +// retakes partial wbufs (filled by write barrier or during a stack scan of the goroutine +// currently scheduled on the P). +// 8. Once the GC has exhausted all available marking work it sets phase = marktermination. +// 9. Wait for all P's to acknowledge phase change. +// 10. Malloc now allocates black objects, so number of unmarked reachable objects +// monotonically decreases. +// 11. GC preempts P's one-by-one taking partial wbufs and marks all unmarked yet reachable objects. +// 12. When GC completes a full cycle over P's and discovers no new grey +// objects, (which means all reachable objects are marked) set phase = GCsweep. +// 13. Wait for all P's to acknowledge phase change. +// 14. Now malloc allocates white (but sweeps spans before use). +// Write barrier becomes nop. +// 15. GC does background sweeping, see description below. +// 16. When sweeping is complete set phase to GCoff. +// 17. When sufficient allocation has taken place replay the sequence starting at 0 above, +// see discussion of GC rate below. + +// Changing phases. +// Phases are changed by setting the gcphase to the next phase and call ackgcphase. +// All phase action must be benign in the presence of a change. +// Starting with GCoff +// GCoff to GCscan +// GSscan scans stacks and globals greying them and never marks an object black. +// Once all the P's are aware of the new phase they will scan gs on preemption. +// This means that the scanning of preempted gs can't start until all the Ps +// have acknowledged. +// GCscan to GCmark +// GCMark turns on the write barrier which also only greys objects. No scanning +// of objects (making them black) can happen until all the Ps have acknowledged +// the phase change. +// GCmark to GCmarktermination +// The only change here is that we start allocating black so the Ps must acknowledge +// the change before we begin the termination algorithm +// GCmarktermination to GSsweep +// Object currently on the freelist must be marked black for this to work. +// Are things on the free lists black or white? How does the sweep phase work? + // Concurrent sweep. // The sweep phase proceeds concurrently with normal program execution. // The heap is swept span-by-span both lazily (when a goroutine needs another span) @@ -50,6 +101,14 @@ // The finalizer goroutine is kicked off only when all spans are swept. // When the next GC starts, it sweeps all not-yet-swept spans (if any). +// GC rate. +// Next GC is after we've allocated an extra amount of memory proportional to +// the amount already in use. The proportion is controlled by GOGC environment variable +// (100 by default). If GOGC=100 and we're using 4M, we'll GC again when we get to 8M +// (this mark is tracked in next_gc variable). This keeps the GC cost in linear +// proportion to the allocation cost. Adjusting GOGC just changes the linear constant +// (and also the amount of extra memory used). + #include "runtime.h" #include "arch_GOARCH.h" #include "malloc.h" @@ -141,6 +200,8 @@ static void scanblock(byte*, uintptr, byte*); static byte* objectstart(byte*, Markbits*); static Workbuf* greyobject(byte*, Markbits*, Workbuf*); static bool inheap(byte*); +static bool shaded(byte*); +static void shade(byte*); static void slottombits(byte*, Markbits*); void runtime·bgsweep(void); @@ -633,13 +694,12 @@ runtime·gcworkbuffree(Workbuf *b) } } - // Get a full work buffer off the work.full list, or return nil. // getfull acts as a barrier for work.nproc helpers. As long as one // gchelper is actively marking objects it // may create a workbuffer that the other helpers can work on. // The for loop either exits when a work buffer is found -// or when _all_ of the work.nproc gc helpers are in the loop +// or when _all_ of the work.nproc GC helpers are in the loop // looking for work and thus not capable of creating new work. // This is in fact the termination condition for the STW mark // phase. @@ -823,6 +883,59 @@ scanstack(G *gp) runtime·tracebackdefers(gp, &fn, nil); } +// If the slot is grey or black return true, if white return false. +// If the slot is not in the known heap and thus does not have a valid GC bitmap then +// it is considered grey. Globals and stacks can hold such slots. +// The slot is grey if its mark bit is set and it is enqueued to be scanned. +// The slot is black if it has already been scanned. +// It is white if it has a valid mark bit and the bit is not set. +static bool +shaded(byte *slot) +{ + Markbits mbits; + + if(!inheap(slot)) // non-heap slots considered grey + return true; + + objectstart(slot, &mbits); + return (mbits.bits&bitMarked) != 0; +} + +// Shade the object if it isn't already. +// The object is not nil and known to be in the heap. +static void +shade(byte *b) +{ + byte *obj; + Workbuf *wbuf; + Markbits mbits; + + if(!inheap(b)) + runtime·throw("shade: passed an address not in the heap"); + + wbuf = getpartial(); + // Mark the object, return some important bits. + // If we combine the following two rotines we don't have to pass mbits or obj around. + obj = objectstart(b, &mbits); + wbuf = greyobject(obj, &mbits, wbuf); // augments the wbuf + putpartial(wbuf); + return; +} + +// This is the Dijkstra barrier coarsened to shade grey to white whereas +// the original Dijkstra barrier only shaded black to white. +// +// Shade indicates that it has seen a white pointer by adding the referent +// to wbuf. +void +runtime·markwb(void **slot, void *ptr) +{ + // initial nil check avoids some needlesss loads + if(ptr != nil && inheap(ptr) && shaded((void*)slot)) + shade(ptr); + *slot = ptr; +} + // The gp has been moved to a gc safepoint. If there is gcphase specific // work it is done here. void -- cgit v1.2.1 From 978c971bacf92ee97b8e24ce45b485afa1c31fad Mon Sep 17 00:00:00 2001 From: Rick Hudson Date: Thu, 23 Oct 2014 15:51:17 -0400 Subject: [dev.garbage] runtime: simplifiy lfstack.c due to undiagnosed buffer corruption. The changes got rid of the problems we were seeing. We suspect the pushcnt field has a race. LGTM=rsc R=dvyukov, rsc CC=golang-codereviews https://codereview.appspot.com/159330043 Committer: Russ Cox --- src/runtime/lfstack.c | 14 ++++++-------- src/runtime/runtime.h | 2 +- 2 files changed, 7 insertions(+), 9 deletions(-) (limited to 'src') diff --git a/src/runtime/lfstack.c b/src/runtime/lfstack.c index 57e0af282..0ced839c2 100644 --- a/src/runtime/lfstack.c +++ b/src/runtime/lfstack.c @@ -46,7 +46,7 @@ runtime·lfstackpush(uint64 *head, LFNode *node) new = (uint64)(uintptr)node|(((uint64)node->pushcnt&CNT_MASK)<next = (LFNode*)(uintptr)(old&PTR_MASK); + node->next = old; if(runtime·cas64(head, old, new)) break; } @@ -55,19 +55,17 @@ runtime·lfstackpush(uint64 *head, LFNode *node) LFNode* runtime·lfstackpop(uint64 *head) { - LFNode *node, *node2; - uint64 old, new; + LFNode *node; + uint64 old, next; for(;;) { old = runtime·atomicload64(head); if(old == 0) return nil; node = (LFNode*)(uintptr)(old&PTR_MASK); - node2 = runtime·atomicloadp(&node->next); - new = 0; - if(node2 != nil) - new = (uint64)(uintptr)node2|(((uint64)node2->pushcnt&CNT_MASK)<next); + + if(runtime·cas64(head, old, next)) return node; } } diff --git a/src/runtime/runtime.h b/src/runtime/runtime.h index bea773799..37929c59c 100644 --- a/src/runtime/runtime.h +++ b/src/runtime/runtime.h @@ -573,7 +573,7 @@ enum { // Lock-free stack node. struct LFNode { - LFNode *next; + uint64 next; uintptr pushcnt; }; -- cgit v1.2.1 From 2c987a9ddefd9d256bf9e7e21396e2485a7d6514 Mon Sep 17 00:00:00 2001 From: Rick Hudson Date: Fri, 24 Oct 2014 11:07:16 -0400 Subject: [dev.garbage] runtime: Concurrent scan code Routines and logic to preform a concurrent stack scan of go-routines. This CL excersizes most of the functionality needed. The major exception being that it does not scan running goroutines. After doing the scans it relies on a STW to finish the GC, including rescanning the stacks. It is intended to achieve correctness, performance will follow. LGTM=rsc R=golang-codereviews, rsc CC=dvyukov, golang-codereviews https://codereview.appspot.com/156580043 --- src/runtime/malloc.go | 8 ++ src/runtime/malloc.h | 5 - src/runtime/mcache.c | 2 +- src/runtime/mgc0.c | 308 ++++++++++++++++++++++++++++++++++---------------- src/runtime/proc.c | 19 ++-- src/runtime/runtime.h | 2 + src/runtime/stack.c | 35 +++--- src/runtime/stubs.go | 2 + 8 files changed, 254 insertions(+), 127 deletions(-) (limited to 'src') diff --git a/src/runtime/malloc.go b/src/runtime/malloc.go index 9b4264f2b..c56e03886 100644 --- a/src/runtime/malloc.go +++ b/src/runtime/malloc.go @@ -438,7 +438,15 @@ func gogc(force int32) { mp = acquirem() mp.gcing = 1 releasem(mp) + onM(stoptheworld) + onM(finishsweep_m) // finish sweep before we start concurrent scan. + onM(starttheworld) + + // Do a concurrent heap scan before we stop the world. + onM(gcscan_m) + onM(stoptheworld) + if mp != acquirem() { gothrow("gogc: rescheduled") } diff --git a/src/runtime/malloc.h b/src/runtime/malloc.h index edcd0be77..e606b0c7a 100644 --- a/src/runtime/malloc.h +++ b/src/runtime/malloc.h @@ -343,11 +343,6 @@ struct MCache StackFreeList stackcache[NumStackOrders]; SudoG* sudogcache; - // Cached P local buffer holding grey objects (marked by not yet scanned) - // Used by mutator for write barrier work. - // GC uses the mcache of the P it is running on for stack and global scanning - // work as well marking. - Workbuf* gcworkbuf; // Local allocator stats, flushed during GC. uintptr local_nlookup; // number of pointer lookups diff --git a/src/runtime/mcache.c b/src/runtime/mcache.c index 5fdbe3266..95ddced3e 100644 --- a/src/runtime/mcache.c +++ b/src/runtime/mcache.c @@ -39,12 +39,12 @@ runtime·allocmcache(void) return c; } +// mheap.lock needs to be held to release the gcworkbuf. static void freemcache(MCache *c) { runtime·MCache_ReleaseAll(c); runtime·stackcache_clear(c); - runtime·gcworkbuffree(c->gcworkbuf); runtime·lock(&runtime·mheap.lock); runtime·purgecachedstats(c); runtime·FixAlloc_Free(&runtime·mheap.cachealloc, c); diff --git a/src/runtime/mgc0.c b/src/runtime/mgc0.c index 8620f47af..c385d51cf 100644 --- a/src/runtime/mgc0.c +++ b/src/runtime/mgc0.c @@ -52,7 +52,7 @@ // see discussion of GC rate below. // Changing phases. -// Phases are changed by setting the gcphase to the next phase and call ackgcphase. +// Phases are changed by setting the gcphase to the next phase and possibly calling ackgcphase. // All phase action must be benign in the presence of a change. // Starting with GCoff // GCoff to GCscan @@ -137,7 +137,7 @@ enum { // ptrmask for an allocation containing a single pointer. static byte oneptr[] = {BitsPointer}; -// Initialized from $GOGC. GOGC=off means no gc. +// Initialized from $GOGC. GOGC=off means no GC. extern int32 runtime·gcpercent; // Holding worldsema grants an M the right to try to stop the world. @@ -185,11 +185,12 @@ BitVector runtime·gcbssmask; Mutex runtime·gclock; -static Workbuf* getpartial(void); +static Workbuf* getpartialorempty(void); static void putpartial(Workbuf*); static Workbuf* getempty(Workbuf*); static Workbuf* getfull(Workbuf*); static void putempty(Workbuf*); +static void putfull(Workbuf*); static Workbuf* handoff(Workbuf*); static void gchelperstart(void); static void flushallmcaches(void); @@ -205,12 +206,14 @@ static void shade(byte*); static void slottombits(byte*, Markbits*); void runtime·bgsweep(void); +void runtime·finishsweep_m(void); static FuncVal bgsweepv = {runtime·bgsweep}; typedef struct WorkData WorkData; struct WorkData { - uint64 full; // lock-free list of full blocks - uint64 empty; // lock-free list of empty blocks + uint64 full; // lock-free list of full blocks + uint64 empty; // lock-free list of empty blocks + uint64 partial; // lock-free list of partially filled blocks byte pad0[CacheLineSize]; // prevents false-sharing between full/empty and nproc/nwait uint32 nproc; int64 tstart; @@ -455,15 +458,22 @@ scanblock(byte *b, uintptr n, byte *ptrmask) Workbuf *wbuf; bool keepworking; - wbuf = getpartial(); + wbuf = getpartialorempty(); if(b != nil) { wbuf = scanobject(b, n, ptrmask, wbuf); if(runtime·gcphase == GCscan) { + if(inheap(b) && !ptrmask) + // b is in heap, we are in GCscan so there should be a ptrmask. + runtime·throw("scanblock: In GCscan phase and inheap is true."); + // GCscan only goes one level deep since mark wb not turned on. putpartial(wbuf); return; } } - + if(runtime·gcphase == GCscan) { + runtime·throw("scanblock: In GCscan phase but no b passed in."); + } + keepworking = b == nil; // ptrmask can have 2 possible values: @@ -479,6 +489,11 @@ scanblock(byte *b, uintptr n, byte *ptrmask) wbuf = getfull(wbuf); if(wbuf == nil) // nil means out of work barrier reached return; + + if(wbuf->nobj<=0) { + runtime·throw("runtime:scanblock getfull returns empty buffer"); + } + } // If another proc wants a pointer, give it some. @@ -506,7 +521,7 @@ markroot(ParFor *desc, uint32 i) void *p; uint32 status; bool restart; - + USED(&desc); // Note: if you add a case here, please also update heapdump.c:dumproots. switch(i) { @@ -553,7 +568,8 @@ markroot(ParFor *desc, uint32 i) break; case RootFlushCaches: - flushallmcaches(); + if (runtime·gcphase != GCscan) // Do not flush mcaches during GCscan phase. + flushallmcaches(); break; default: @@ -566,9 +582,10 @@ markroot(ParFor *desc, uint32 i) status = runtime·readgstatus(gp); // We are not in a scan state if((status == Gwaiting || status == Gsyscall) && gp->waitsince == 0) gp->waitsince = runtime·work.tstart; - // Shrink a stack if not much of it is being used. - runtime·shrinkstack(gp); - if(runtime·readgstatus(gp) == Gdead) + // Shrink a stack if not much of it is being used but not in the scan phase. + if (runtime·gcphase != GCscan) // Do not shrink during GCscan phase. + runtime·shrinkstack(gp); + if(runtime·readgstatus(gp) == Gdead) gp->gcworkdone = true; else gp->gcworkdone = false; @@ -576,121 +593,120 @@ markroot(ParFor *desc, uint32 i) // goroutine will scan its own stack when it stops running. // Wait until it has. - while((status = runtime·readgstatus(gp)) == Grunning && !gp->gcworkdone) { + while(runtime·readgstatus(gp) == Grunning && !gp->gcworkdone) { + } + + // scanstack(gp) is done as part of gcphasework + // But to make sure we finished we need to make sure that + // the stack traps have all responded so drop into + // this while loop until they respond. + while(!gp->gcworkdone){ + status = runtime·readgstatus(gp); if(status == Gdead) { - // TBD you need to explain why Gdead without gp->gcworkdone - // being true. If there is a race then it needs to be - // explained here. gp->gcworkdone = true; // scan is a noop break; //do nothing, scan not needed. } - // try again + if(status == Gwaiting || status == Grunnable) + restart = runtime·stopg(gp); } - - // scanstack(gp); now done as part of gcphasework - // But to make sure we finished we need to make sure that - // the stack traps have all responded so drop into - // this while loop until they respond. - if(!gp->gcworkdone) - // For some reason a G has not completed its work. This is a bug that - // needs to be investigated. For now I'll just print this message in - // case the bug is benign. - runtime·printf("runtime:markroot: post stack scan work not done gp=%p has status %x\n", gp, status); - if(restart) runtime·restartg(gp); break; } } +// wblock is used for creating new empty work buffer blocks. +static Mutex wblock; + // Get an empty work buffer off the work.empty list, // allocating new buffers as needed. static Workbuf* getempty(Workbuf *b) { - MCache *c; - - if(b != nil) - runtime·lfstackpush(&runtime·work.full, &b->node); - b = nil; - c = g->m->mcache; - if(c->gcworkbuf != nil) { - b = c->gcworkbuf; - c->gcworkbuf = nil; + if(b != nil) { + putfull(b); + b = nil; } - if(b == nil) + if(runtime·work.empty) b = (Workbuf*)runtime·lfstackpop(&runtime·work.empty); + + if(b && b->nobj != 0) { + runtime·printf("m%d: getempty: popped b=%p with non-zero b->nobj=%D\n", g->m->id, b, b->nobj); + runtime·throw("getempty: workbuffer not empty, b->nobj not 0"); + } if(b == nil) { + runtime·lock(&wblock); b = runtime·persistentalloc(sizeof(*b), CacheLineSize, &mstats.gc_sys); b->nobj = 0; + runtime·unlock(&wblock); } - if(b->nobj != 0) - runtime·throw("getempty: b->nobj not 0/n"); - b->nobj = 0; return b; } static void putempty(Workbuf *b) { - MCache *c; - - if(b->nobj != 0) - runtime·throw("putempty: b->nobj=%D not 0\n"); - c = g->m->mcache; - if(c->gcworkbuf == nil) { - c->gcworkbuf = b; - return; + if(b->nobj != 0) { + runtime·throw("putempty: b->nobj not 0\n"); } runtime·lfstackpush(&runtime·work.empty, &b->node); } -// Get an partially empty work buffer from the mcache structure -// and if non is available get an empty one. +// Put a full or partially full workbuf on the full list. +static void +putfull(Workbuf *b) +{ + if(b->nobj <= 0) { + runtime·throw("putfull: b->nobj <= 0\n"); + } + runtime·lfstackpush(&runtime·work.full, &b->node); +} + +// Get an partially empty work buffer +// if none are available get an empty one. static Workbuf* -getpartial(void) +getpartialorempty(void) { - MCache *c; Workbuf *b; - c = g->m->mcache; - if(c->gcworkbuf != nil) { - b = c->gcworkbuf; - c->gcworkbuf = nil; - } else { + b = (Workbuf*)runtime·lfstackpop(&runtime·work.partial); + if(b == nil) b = getempty(nil); - } return b; } static void putpartial(Workbuf *b) { - MCache *c; - c = g->m->mcache; - if(c->gcworkbuf == nil) { - c->gcworkbuf = b; - return; + if(b->nobj == 0) + runtime·lfstackpush(&runtime·work.empty, &b->node); + else if (b->nobj < nelem(b->obj)) + runtime·lfstackpush(&runtime·work.partial, &b->node); + else if (b->nobj == nelem(b->obj)) + runtime·lfstackpush(&runtime·work.full, &b->node); + else { + runtime·printf("b=%p, b->nobj=%D, nelem(b->obj)=%d\n", b, b->nobj, nelem(b->obj)); + runtime·throw("putpartial: bad Workbuf b->nobj"); } - - runtime·throw("putpartial: c->gcworkbuf is not nil\n"); - - runtime·lfstackpush(&runtime·work.full, &b->node); } void runtime·gcworkbuffree(Workbuf *b) { - if(b != nil) { - if(b->nobj != 0) - runtime·throw("gcworkbufferfree: b->nobj not 0\n"); + if(b == nil) + return; + if(b->nobj == 0) putempty(b); - } + else + putfull(b); } -// Get a full work buffer off the work.full list, or return nil. +// Get a full work buffer off the work.full or a partially +// filled one off the work.partial list. If nothing is available +// wait until all the other gc helpers have finished and then +// return nil. // getfull acts as a barrier for work.nproc helpers. As long as one // gchelper is actively marking objects it // may create a workbuffer that the other helpers can work on. @@ -704,12 +720,12 @@ getfull(Workbuf *b) { int32 i; - if(b != nil) { - if(b->nobj != 0) - runtime·printf("runtime:getfull: b->nobj=%D not 0.", b->nobj); - runtime·lfstackpush(&runtime·work.empty, &b->node); - } + if(b != nil) + putempty(b); + b = (Workbuf*)runtime·lfstackpop(&runtime·work.full); + if(b==nil) + b = (Workbuf*)runtime·lfstackpop(&runtime·work.partial); if(b != nil || runtime·work.nproc == 1) return b; @@ -718,7 +734,9 @@ getfull(Workbuf *b) if(runtime·work.full != 0) { runtime·xadd(&runtime·work.nwait, -1); b = (Workbuf*)runtime·lfstackpop(&runtime·work.full); - if(b != nil) + if(b==nil) + b = (Workbuf*)runtime·lfstackpop(&runtime·work.partial); + if(b != nil) return b; runtime·xadd(&runtime·work.nwait, +1); } @@ -861,8 +879,7 @@ scanstack(G *gp) case Gdead: return; case Grunning: - runtime·printf("runtime: gp=%p, goid=%D, gp->atomicstatus=%d\n", gp, gp->goid, runtime·readgstatus(gp)); - runtime·throw("mark - world not stopped"); + runtime·throw("scanstack: - goroutine not stopped"); case Grunnable: case Gsyscall: case Gwaiting: @@ -909,7 +926,7 @@ shade(byte *b) if(!inheap(b)) runtime·throw("shade: passed an address not in the heap"); - wbuf = getpartial(); + wbuf = getpartialorempty(); // Mark the object, return some important bits. // If we combine the following two rotines we don't have to pass mbits or obj around. obj = objectstart(b, &mbits); @@ -932,8 +949,8 @@ runtime·markwb(void **slot, void *ptr) *slot = ptr; } -// The gp has been moved to a gc safepoint. If there is gcphase specific -// work it is done here. +// The gp has been moved to a GC safepoint. GC phase specific +// work is done here. void runtime·gcphasework(G *gp) { @@ -953,14 +970,8 @@ runtime·gcphasework(G *gp) break; case GCmark: case GCmarktermination: - // - // Disabled until concurrent GC is implemented - // but indicate the scan has been done. scanstack(gp); - // scanstack will call shade which will populate - // the Workbuf. - // emptywbuf(gp) will empty it before returning - // + // All available mark work will be emptied before returning. break; } gp->gcworkdone = true; @@ -1050,6 +1061,7 @@ runtime·iterate_finq(void (*callback)(FuncVal*, byte*, uintptr, Type*, PtrType* } } +// Returns only when span s has been swept. void runtime·MSpan_EnsureSwept(MSpan *s) { @@ -1064,6 +1076,7 @@ runtime·MSpan_EnsureSwept(MSpan *s) sg = runtime·mheap.sweepgen; if(runtime·atomicload(&s->sweepgen) == sg) return; + // The caller must be sure that the span is a MSpanInUse span. if(runtime·cas(&s->sweepgen, sg-2, sg-1)) { runtime·MSpan_Sweep(s, false); return; @@ -1347,7 +1360,7 @@ runtime·gchelper(void) g->m->traceback = 2; gchelperstart(); - // parallel mark for over gc roots + // parallel mark for over GC roots runtime·parfordo(runtime·work.markfor); if(runtime·gcphase != GCscan) scanblock(nil, 0, nil); // blocks in getfull @@ -1531,10 +1544,93 @@ runtime·gc_m(void) a.start_time = (uint64)(g->m->scalararg[0]) | ((uint64)(g->m->scalararg[1]) << 32); a.eagersweep = g->m->scalararg[2]; gc(&a); - runtime·casgstatus(gp, Gwaiting, Grunning); } +void +runtime·finishsweep_m(void) +{ + uint32 i, sg; + MSpan *s; + + // The world is stopped so we should be able to complete the sweeps + // quickly. + while(runtime·sweepone() != -1) + runtime·sweep.npausesweep++; + + // There may be some other spans being swept concurrently that + // we need to wait for. If finishsweep_m is done with the world stopped + // this code is not required. + sg = runtime·mheap.sweepgen; + for(i=0; isweepgen == sg) { + continue; + } + if(s->state != MSpanInUse) // Span is not part of the GCed heap so no need to ensure it is swept. + continue; + runtime·MSpan_EnsureSwept(s); + } +} + +// Scan all of the stacks, greying (or graying if in America) the referents +// but not blackening them since the mark write barrier isn't installed. +void +runtime·gcscan_m(void) +{ + uint32 i, allglen, oldphase; + G *gp, *mastergp, **allg; + + // Grab the g that called us and potentially allow rescheduling. + // This allows it to be scanned like other goroutines. + mastergp = g->m->curg; + + runtime·casgstatus(mastergp, Grunning, Gwaiting); + mastergp->waitreason = runtime·gostringnocopy((byte*)"garbage collection scan"); + + // Span sweeping has been done by finishsweep_m. + // Long term we will want to make this goroutine runnable + // by placing it onto a scanenqueue state and then calling + // runtime·restartg(mastergp) to make it Grunnable. + // At the bottom we will want to return this p back to the scheduler. + + oldphase = runtime·gcphase; + + runtime·lock(&runtime·allglock); + allglen = runtime·allglen; + allg = runtime·allg; + // Prepare flag indicating that the scan has not been completed. + for(i = 0; i < allglen; i++) { + gp = allg[i]; + gp->gcworkdone = false; // set to true in gcphasework + } + runtime·unlock(&runtime·allglock); + + runtime·work.nwait = 0; + runtime·work.ndone = 0; + runtime·work.nproc = 1; // For now do not do this in parallel. + runtime·gcphase = GCscan; + // ackgcphase is not needed since we are not scanning running goroutines. + runtime·parforsetup(runtime·work.markfor, runtime·work.nproc, RootCount + allglen, nil, false, markroot); + runtime·parfordo(runtime·work.markfor); + + runtime·lock(&runtime·allglock); + + allg = runtime·allg; + // Check that gc work is done. + for(i = 0; i < allglen; i++) { + gp = allg[i]; + if(!gp->gcworkdone) { + runtime·throw("scan missed a g"); + } + } + runtime·unlock(&runtime·allglock); + + runtime·gcphase = oldphase; + runtime·casgstatus(mastergp, Gwaiting, Grunning); + // Let the g that called us continue to run. +} + static void gc(struct gc_args *args) { @@ -1542,7 +1638,9 @@ gc(struct gc_args *args) uint64 heap0, heap1, obj; GCStats stats; uint32 oldphase; - + uint32 i; + G *gp; + if(runtime·debug.allocfreetrace) runtime·tracegc(); @@ -1554,9 +1652,7 @@ gc(struct gc_args *args) if(runtime·debug.gctrace) t1 = runtime·nanotime(); - // Sweep what is not sweeped by bgsweep. - while(runtime·sweepone() != -1) - runtime·sweep.npausesweep++; + runtime·finishsweep_m(); // Cache runtime·mheap.allspans in work.spans to avoid conflicts with // resizing/freeing allspans. @@ -1580,7 +1676,13 @@ gc(struct gc_args *args) runtime·work.nwait = 0; runtime·work.ndone = 0; runtime·work.nproc = runtime·gcprocs(); - runtime·gcphase = GCmark; //^^ vv + runtime·gcphase = GCmark; + + // World is stopped so allglen will not change. + for(i = 0; i < runtime·allglen; i++) { + gp = runtime·allg[i]; + gp->gcworkdone = false; // set to true in gcphasework + } runtime·parforsetup(runtime·work.markfor, runtime·work.nproc, RootCount + runtime·allglen, nil, false, markroot); if(runtime·work.nproc > 1) { @@ -1596,7 +1698,13 @@ gc(struct gc_args *args) runtime·parfordo(runtime·work.markfor); scanblock(nil, 0, nil); - runtime·gcphase = oldphase; //^^ vv + + if(runtime·work.full) + runtime·throw("runtime·work.full != nil"); + if(runtime·work.partial) + runtime·throw("runtime·work.partial != nil"); + + runtime·gcphase = oldphase; t3 = 0; if(runtime·debug.gctrace) t3 = runtime·nanotime(); @@ -1735,7 +1843,7 @@ readgcstats_m(void) if(pauses->cap < nelem(mstats.pause_ns)+3) runtime·throw("runtime: short slice passed to readGCStats"); - // Pass back: pauses, last gc (absolute time), number of gc, total pause ns. + // Pass back: pauses, last GC (absolute time), number of GC, total pause ns. p = (uint64*)pauses->array; runtime·lock(&runtime·mheap.lock); n = mstats.numgc; diff --git a/src/runtime/proc.c b/src/runtime/proc.c index 9643abcc6..b824f574d 100644 --- a/src/runtime/proc.c +++ b/src/runtime/proc.c @@ -423,13 +423,7 @@ runtime·casgstatus(G *gp, uint32 oldval, uint32 newval) // loop if gp->atomicstatus is in a scan state giving // GC time to finish and change the state to oldval. while(!runtime·cas(&gp->atomicstatus, oldval, newval)) { - // Help GC if needed. - if(gp->preemptscan && !gp->gcworkdone && (oldval == Grunning || oldval == Gsyscall)) { - gp->preemptscan = false; - g->m->ptrarg[0] = gp; - fn = helpcasgstatus; - runtime·onM(&fn); - } + } } @@ -504,6 +498,13 @@ runtime·stopg(G *gp) return false; case Grunning: + if(runtime·gcphase == GCscan) { + gp->gcworkdone = true; + return false; + // Running routines not scanned during + // GCscan phase, we only scan non-running routines. + } + // Claim goroutine, so we aren't racing with a status // transition away from Grunning. if(!runtime·castogscanstatus(gp, Grunning, Gscanrunning)) @@ -1918,6 +1919,7 @@ exitsyscallfast(void) // Freezetheworld sets stopwait but does not retake P's. if(runtime·sched.stopwait) { + g->m->mcache = nil; g->m->p = nil; return false; } @@ -1930,6 +1932,7 @@ exitsyscallfast(void) return true; } // Try to get any other idle P. + g->m->mcache = nil; g->m->p = nil; if(runtime·sched.pidle) { fn = exitsyscallfast_pidle; @@ -2617,6 +2620,8 @@ runtime·setcpuprofilerate_m(void) P *runtime·newP(void); // Change number of processors. The world is stopped, sched is locked. +// gcworkbufs are not being modified by either the GC or +// the write barrier code. static void procresize(int32 new) { diff --git a/src/runtime/runtime.h b/src/runtime/runtime.h index 37929c59c..cbbf6b3fc 100644 --- a/src/runtime/runtime.h +++ b/src/runtime/runtime.h @@ -649,6 +649,7 @@ struct ForceGCState }; extern uint32 runtime·gcphase; +extern Mutex runtime·allglock; /* * defined macros @@ -677,6 +678,7 @@ enum { uint32 runtime·readgstatus(G*); void runtime·casgstatus(G*, uint32, uint32); +bool runtime·castogscanstatus(G*, uint32, uint32); void runtime·quiesce(G*); bool runtime·stopg(G*); void runtime·restartg(G*); diff --git a/src/runtime/stack.c b/src/runtime/stack.c index e402691f4..e06e48a93 100644 --- a/src/runtime/stack.c +++ b/src/runtime/stack.c @@ -587,13 +587,13 @@ adjustsudogs(G *gp, AdjustInfo *adjinfo) } // Copies gp's stack to a new stack of a different size. +// Caller must have changed gp status to Gcopystack. static void copystack(G *gp, uintptr newsize) { Stack old, new; uintptr used; AdjustInfo adjinfo; - uint32 oldstatus; bool (*cb)(Stkframe*, void*); byte *p, *ep; @@ -637,20 +637,11 @@ copystack(G *gp, uintptr newsize) } runtime·memmove((byte*)new.hi - used, (byte*)old.hi - used, used); - oldstatus = runtime·readgstatus(gp); - oldstatus &= ~Gscan; - if(oldstatus == Gwaiting || oldstatus == Grunnable) - runtime·casgstatus(gp, oldstatus, Gcopystack); // oldstatus is Gwaiting or Grunnable - else - runtime·throw("copystack: bad status, not Gwaiting or Grunnable"); - // Swap out old stack for new one gp->stack = new; gp->stackguard0 = new.lo + StackGuard; // NOTE: might clobber a preempt request gp->sched.sp = new.hi - used; - runtime·casgstatus(gp, Gcopystack, oldstatus); // oldstatus is Gwaiting or Grunnable - // free old stack if(StackPoisonCopy) { p = (byte*)old.lo; @@ -700,6 +691,7 @@ void runtime·newstack(void) { int32 oldsize, newsize; + uint32 oldstatus; uintptr sp; G *gp; Gobuf morebuf; @@ -789,12 +781,15 @@ runtime·newstack(void) runtime·throw("stack overflow"); } - // Note that the concurrent GC might be scanning the stack as we try to replace it. - // copystack takes care of the appropriate coordination with the stack scanner. + oldstatus = runtime·readgstatus(gp); + oldstatus &= ~Gscan; + runtime·casgstatus(gp, oldstatus, Gcopystack); // oldstatus is Gwaiting or Grunnable + // The concurrent GC will not scan the stack while we are doing the copy since + // the gp is in a Gcopystack status. copystack(gp, newsize); if(StackDebug >= 1) runtime·printf("stack grow done\n"); - runtime·casgstatus(gp, Gwaiting, Grunning); + runtime·casgstatus(gp, Gcopystack, Grunning); runtime·gogo(&gp->sched); } @@ -825,6 +820,7 @@ void runtime·shrinkstack(G *gp) { uintptr used, oldsize, newsize; + uint32 oldstatus; if(runtime·readgstatus(gp) == Gdead) { if(gp->stack.lo != 0) { @@ -858,8 +854,19 @@ runtime·shrinkstack(G *gp) #endif if(StackDebug > 0) runtime·printf("shrinking stack %D->%D\n", (uint64)oldsize, (uint64)newsize); + // This is being done in a Gscan state and was initiated by the GC so no need to move to + // the Gcopystate. + // The world is stopped, so the goroutine must be Gwaiting or Grunnable, + // and what it is is not changing underfoot. + + oldstatus = runtime·readgstatus(gp); + oldstatus &= ~Gscan; + if(oldstatus != Gwaiting && oldstatus != Grunnable) + runtime·throw("status is not Gwaiting or Grunnable"); + runtime·casgstatus(gp, oldstatus, Gcopystack); copystack(gp, newsize); -} + runtime·casgstatus(gp, Gcopystack, oldstatus); + } // Do any delayed stack freeing that was queued up during GC. void diff --git a/src/runtime/stubs.go b/src/runtime/stubs.go index 6561094ff..32dfed7d3 100644 --- a/src/runtime/stubs.go +++ b/src/runtime/stubs.go @@ -106,6 +106,8 @@ func recovery_m(*g) func mcacheRefill_m() func largeAlloc_m() func gc_m() +func gcscan_m() +func finishsweep_m() func scavenge_m() func setFinalizer_m() func removeFinalizer_m() -- cgit v1.2.1 From 297ae6e1545a6982878a82114d5d2d40da373ca6 Mon Sep 17 00:00:00 2001 From: Russ Cox Date: Mon, 27 Oct 2014 15:57:07 -0400 Subject: [dev.garbage] runtime: fix TestLFStack on 386 LGTM=rlh R=rlh, dvyukov CC=golang-codereviews https://codereview.appspot.com/157430044 --- src/runtime/export_test.go | 2 +- src/runtime/lfstack_test.go | 2 +- src/runtime/runtime.h | 1 + 3 files changed, 3 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/runtime/export_test.go b/src/runtime/export_test.go index be352557f..65e918e84 100644 --- a/src/runtime/export_test.go +++ b/src/runtime/export_test.go @@ -26,7 +26,7 @@ var Exitsyscall = exitsyscall var LockedOSThread = lockedOSThread type LFNode struct { - Next *LFNode + Next uint64 Pushcnt uintptr } diff --git a/src/runtime/lfstack_test.go b/src/runtime/lfstack_test.go index e51877704..68f221d6e 100644 --- a/src/runtime/lfstack_test.go +++ b/src/runtime/lfstack_test.go @@ -121,7 +121,7 @@ func TestLFStackStress(t *testing.T) { } cnt++ sum2 += node.data - node.Next = nil + node.Next = 0 } } if cnt != K { diff --git a/src/runtime/runtime.h b/src/runtime/runtime.h index cbbf6b3fc..c1bba423a 100644 --- a/src/runtime/runtime.h +++ b/src/runtime/runtime.h @@ -571,6 +571,7 @@ enum { #endif // Lock-free stack node. +// Also known to export_test.go. struct LFNode { uint64 next; -- cgit v1.2.1 From 9232cbb6a6aaa8a4197b22819406128ff0f99265 Mon Sep 17 00:00:00 2001 From: Rick Hudson Date: Mon, 27 Oct 2014 17:07:53 -0400 Subject: [dev.garbage] runtime: Fix 386 compiler warnings. LGTM=rsc R=rsc CC=golang-codereviews https://codereview.appspot.com/163390043 --- src/runtime/mgc0.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/runtime/mgc0.c b/src/runtime/mgc0.c index c385d51cf..cc1f81123 100644 --- a/src/runtime/mgc0.c +++ b/src/runtime/mgc0.c @@ -632,7 +632,7 @@ getempty(Workbuf *b) b = (Workbuf*)runtime·lfstackpop(&runtime·work.empty); if(b && b->nobj != 0) { - runtime·printf("m%d: getempty: popped b=%p with non-zero b->nobj=%D\n", g->m->id, b, b->nobj); + runtime·printf("m%d: getempty: popped b=%p with non-zero b->nobj=%d\n", g->m->id, b, (uint32)b->nobj); runtime·throw("getempty: workbuffer not empty, b->nobj not 0"); } if(b == nil) { @@ -687,7 +687,7 @@ putpartial(Workbuf *b) else if (b->nobj == nelem(b->obj)) runtime·lfstackpush(&runtime·work.full, &b->node); else { - runtime·printf("b=%p, b->nobj=%D, nelem(b->obj)=%d\n", b, b->nobj, nelem(b->obj)); + runtime·printf("b=%p, b->nobj=%d, nelem(b->obj)=%d\n", b, b->nobj, (uint32)nelem(b->obj)); runtime·throw("putpartial: bad Workbuf b->nobj"); } } -- cgit v1.2.1