summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRick Hudson <rlh@golang.org>2014-10-24 11:07:16 -0400
committerRick Hudson <rlh@golang.org>2014-10-24 11:07:16 -0400
commit2c987a9ddefd9d256bf9e7e21396e2485a7d6514 (patch)
tree856e4b1fb53139120ce60e1724361a749c7e2afe
parent978c971bacf92ee97b8e24ce45b485afa1c31fad (diff)
downloadgo-2c987a9ddefd9d256bf9e7e21396e2485a7d6514.tar.gz
[dev.garbage] runtime: Concurrent scan code
Routines and logic to preform a concurrent stack scan of go-routines. This CL excersizes most of the functionality needed. The major exception being that it does not scan running goroutines. After doing the scans it relies on a STW to finish the GC, including rescanning the stacks. It is intended to achieve correctness, performance will follow. LGTM=rsc R=golang-codereviews, rsc CC=dvyukov, golang-codereviews https://codereview.appspot.com/156580043
-rw-r--r--src/runtime/malloc.go8
-rw-r--r--src/runtime/malloc.h5
-rw-r--r--src/runtime/mcache.c2
-rw-r--r--src/runtime/mgc0.c308
-rw-r--r--src/runtime/proc.c19
-rw-r--r--src/runtime/runtime.h2
-rw-r--r--src/runtime/stack.c35
-rw-r--r--src/runtime/stubs.go2
8 files changed, 254 insertions, 127 deletions
diff --git a/src/runtime/malloc.go b/src/runtime/malloc.go
index 9b4264f2b..c56e03886 100644
--- a/src/runtime/malloc.go
+++ b/src/runtime/malloc.go
@@ -438,7 +438,15 @@ func gogc(force int32) {
mp = acquirem()
mp.gcing = 1
releasem(mp)
+
onM(stoptheworld)
+ onM(finishsweep_m) // finish sweep before we start concurrent scan.
+ onM(starttheworld)
+
+ // Do a concurrent heap scan before we stop the world.
+ onM(gcscan_m)
+ onM(stoptheworld)
+
if mp != acquirem() {
gothrow("gogc: rescheduled")
}
diff --git a/src/runtime/malloc.h b/src/runtime/malloc.h
index edcd0be77..e606b0c7a 100644
--- a/src/runtime/malloc.h
+++ b/src/runtime/malloc.h
@@ -343,11 +343,6 @@ struct MCache
StackFreeList stackcache[NumStackOrders];
SudoG* sudogcache;
- // Cached P local buffer holding grey objects (marked by not yet scanned)
- // Used by mutator for write barrier work.
- // GC uses the mcache of the P it is running on for stack and global scanning
- // work as well marking.
- Workbuf* gcworkbuf;
// Local allocator stats, flushed during GC.
uintptr local_nlookup; // number of pointer lookups
diff --git a/src/runtime/mcache.c b/src/runtime/mcache.c
index 5fdbe3266..95ddced3e 100644
--- a/src/runtime/mcache.c
+++ b/src/runtime/mcache.c
@@ -39,12 +39,12 @@ runtime·allocmcache(void)
return c;
}
+// mheap.lock needs to be held to release the gcworkbuf.
static void
freemcache(MCache *c)
{
runtime·MCache_ReleaseAll(c);
runtime·stackcache_clear(c);
- runtime·gcworkbuffree(c->gcworkbuf);
runtime·lock(&runtime·mheap.lock);
runtime·purgecachedstats(c);
runtime·FixAlloc_Free(&runtime·mheap.cachealloc, c);
diff --git a/src/runtime/mgc0.c b/src/runtime/mgc0.c
index 8620f47af..c385d51cf 100644
--- a/src/runtime/mgc0.c
+++ b/src/runtime/mgc0.c
@@ -52,7 +52,7 @@
// see discussion of GC rate below.
// Changing phases.
-// Phases are changed by setting the gcphase to the next phase and call ackgcphase.
+// Phases are changed by setting the gcphase to the next phase and possibly calling ackgcphase.
// All phase action must be benign in the presence of a change.
// Starting with GCoff
// GCoff to GCscan
@@ -137,7 +137,7 @@ enum {
// ptrmask for an allocation containing a single pointer.
static byte oneptr[] = {BitsPointer};
-// Initialized from $GOGC. GOGC=off means no gc.
+// Initialized from $GOGC. GOGC=off means no GC.
extern int32 runtime·gcpercent;
// Holding worldsema grants an M the right to try to stop the world.
@@ -185,11 +185,12 @@ BitVector runtime·gcbssmask;
Mutex runtime·gclock;
-static Workbuf* getpartial(void);
+static Workbuf* getpartialorempty(void);
static void putpartial(Workbuf*);
static Workbuf* getempty(Workbuf*);
static Workbuf* getfull(Workbuf*);
static void putempty(Workbuf*);
+static void putfull(Workbuf*);
static Workbuf* handoff(Workbuf*);
static void gchelperstart(void);
static void flushallmcaches(void);
@@ -205,12 +206,14 @@ static void shade(byte*);
static void slottombits(byte*, Markbits*);
void runtime·bgsweep(void);
+void runtime·finishsweep_m(void);
static FuncVal bgsweepv = {runtime·bgsweep};
typedef struct WorkData WorkData;
struct WorkData {
- uint64 full; // lock-free list of full blocks
- uint64 empty; // lock-free list of empty blocks
+ uint64 full; // lock-free list of full blocks
+ uint64 empty; // lock-free list of empty blocks
+ uint64 partial; // lock-free list of partially filled blocks
byte pad0[CacheLineSize]; // prevents false-sharing between full/empty and nproc/nwait
uint32 nproc;
int64 tstart;
@@ -455,15 +458,22 @@ scanblock(byte *b, uintptr n, byte *ptrmask)
Workbuf *wbuf;
bool keepworking;
- wbuf = getpartial();
+ wbuf = getpartialorempty();
if(b != nil) {
wbuf = scanobject(b, n, ptrmask, wbuf);
if(runtime·gcphase == GCscan) {
+ if(inheap(b) && !ptrmask)
+ // b is in heap, we are in GCscan so there should be a ptrmask.
+ runtime·throw("scanblock: In GCscan phase and inheap is true.");
+ // GCscan only goes one level deep since mark wb not turned on.
putpartial(wbuf);
return;
}
}
-
+ if(runtime·gcphase == GCscan) {
+ runtime·throw("scanblock: In GCscan phase but no b passed in.");
+ }
+
keepworking = b == nil;
// ptrmask can have 2 possible values:
@@ -479,6 +489,11 @@ scanblock(byte *b, uintptr n, byte *ptrmask)
wbuf = getfull(wbuf);
if(wbuf == nil) // nil means out of work barrier reached
return;
+
+ if(wbuf->nobj<=0) {
+ runtime·throw("runtime:scanblock getfull returns empty buffer");
+ }
+
}
// If another proc wants a pointer, give it some.
@@ -506,7 +521,7 @@ markroot(ParFor *desc, uint32 i)
void *p;
uint32 status;
bool restart;
-
+
USED(&desc);
// Note: if you add a case here, please also update heapdump.c:dumproots.
switch(i) {
@@ -553,7 +568,8 @@ markroot(ParFor *desc, uint32 i)
break;
case RootFlushCaches:
- flushallmcaches();
+ if (runtime·gcphase != GCscan) // Do not flush mcaches during GCscan phase.
+ flushallmcaches();
break;
default:
@@ -566,9 +582,10 @@ markroot(ParFor *desc, uint32 i)
status = runtime·readgstatus(gp); // We are not in a scan state
if((status == Gwaiting || status == Gsyscall) && gp->waitsince == 0)
gp->waitsince = runtime·work.tstart;
- // Shrink a stack if not much of it is being used.
- runtime·shrinkstack(gp);
- if(runtime·readgstatus(gp) == Gdead)
+ // Shrink a stack if not much of it is being used but not in the scan phase.
+ if (runtime·gcphase != GCscan) // Do not shrink during GCscan phase.
+ runtime·shrinkstack(gp);
+ if(runtime·readgstatus(gp) == Gdead)
gp->gcworkdone = true;
else
gp->gcworkdone = false;
@@ -576,121 +593,120 @@ markroot(ParFor *desc, uint32 i)
// goroutine will scan its own stack when it stops running.
// Wait until it has.
- while((status = runtime·readgstatus(gp)) == Grunning && !gp->gcworkdone) {
+ while(runtime·readgstatus(gp) == Grunning && !gp->gcworkdone) {
+ }
+
+ // scanstack(gp) is done as part of gcphasework
+ // But to make sure we finished we need to make sure that
+ // the stack traps have all responded so drop into
+ // this while loop until they respond.
+ while(!gp->gcworkdone){
+ status = runtime·readgstatus(gp);
if(status == Gdead) {
- // TBD you need to explain why Gdead without gp->gcworkdone
- // being true. If there is a race then it needs to be
- // explained here.
gp->gcworkdone = true; // scan is a noop
break;
//do nothing, scan not needed.
}
- // try again
+ if(status == Gwaiting || status == Grunnable)
+ restart = runtime·stopg(gp);
}
-
- // scanstack(gp); now done as part of gcphasework
- // But to make sure we finished we need to make sure that
- // the stack traps have all responded so drop into
- // this while loop until they respond.
- if(!gp->gcworkdone)
- // For some reason a G has not completed its work. This is a bug that
- // needs to be investigated. For now I'll just print this message in
- // case the bug is benign.
- runtime·printf("runtime:markroot: post stack scan work not done gp=%p has status %x\n", gp, status);
-
if(restart)
runtime·restartg(gp);
break;
}
}
+// wblock is used for creating new empty work buffer blocks.
+static Mutex wblock;
+
// Get an empty work buffer off the work.empty list,
// allocating new buffers as needed.
static Workbuf*
getempty(Workbuf *b)
{
- MCache *c;
-
- if(b != nil)
- runtime·lfstackpush(&runtime·work.full, &b->node);
- b = nil;
- c = g->m->mcache;
- if(c->gcworkbuf != nil) {
- b = c->gcworkbuf;
- c->gcworkbuf = nil;
+ if(b != nil) {
+ putfull(b);
+ b = nil;
}
- if(b == nil)
+ if(runtime·work.empty)
b = (Workbuf*)runtime·lfstackpop(&runtime·work.empty);
+
+ if(b && b->nobj != 0) {
+ runtime·printf("m%d: getempty: popped b=%p with non-zero b->nobj=%D\n", g->m->id, b, b->nobj);
+ runtime·throw("getempty: workbuffer not empty, b->nobj not 0");
+ }
if(b == nil) {
+ runtime·lock(&wblock);
b = runtime·persistentalloc(sizeof(*b), CacheLineSize, &mstats.gc_sys);
b->nobj = 0;
+ runtime·unlock(&wblock);
}
- if(b->nobj != 0)
- runtime·throw("getempty: b->nobj not 0/n");
- b->nobj = 0;
return b;
}
static void
putempty(Workbuf *b)
{
- MCache *c;
-
- if(b->nobj != 0)
- runtime·throw("putempty: b->nobj=%D not 0\n");
- c = g->m->mcache;
- if(c->gcworkbuf == nil) {
- c->gcworkbuf = b;
- return;
+ if(b->nobj != 0) {
+ runtime·throw("putempty: b->nobj not 0\n");
}
runtime·lfstackpush(&runtime·work.empty, &b->node);
}
-// Get an partially empty work buffer from the mcache structure
-// and if non is available get an empty one.
+// Put a full or partially full workbuf on the full list.
+static void
+putfull(Workbuf *b)
+{
+ if(b->nobj <= 0) {
+ runtime·throw("putfull: b->nobj <= 0\n");
+ }
+ runtime·lfstackpush(&runtime·work.full, &b->node);
+}
+
+// Get an partially empty work buffer
+// if none are available get an empty one.
static Workbuf*
-getpartial(void)
+getpartialorempty(void)
{
- MCache *c;
Workbuf *b;
- c = g->m->mcache;
- if(c->gcworkbuf != nil) {
- b = c->gcworkbuf;
- c->gcworkbuf = nil;
- } else {
+ b = (Workbuf*)runtime·lfstackpop(&runtime·work.partial);
+ if(b == nil)
b = getempty(nil);
- }
return b;
}
static void
putpartial(Workbuf *b)
{
- MCache *c;
- c = g->m->mcache;
- if(c->gcworkbuf == nil) {
- c->gcworkbuf = b;
- return;
+ if(b->nobj == 0)
+ runtime·lfstackpush(&runtime·work.empty, &b->node);
+ else if (b->nobj < nelem(b->obj))
+ runtime·lfstackpush(&runtime·work.partial, &b->node);
+ else if (b->nobj == nelem(b->obj))
+ runtime·lfstackpush(&runtime·work.full, &b->node);
+ else {
+ runtime·printf("b=%p, b->nobj=%D, nelem(b->obj)=%d\n", b, b->nobj, nelem(b->obj));
+ runtime·throw("putpartial: bad Workbuf b->nobj");
}
-
- runtime·throw("putpartial: c->gcworkbuf is not nil\n");
-
- runtime·lfstackpush(&runtime·work.full, &b->node);
}
void
runtime·gcworkbuffree(Workbuf *b)
{
- if(b != nil) {
- if(b->nobj != 0)
- runtime·throw("gcworkbufferfree: b->nobj not 0\n");
+ if(b == nil)
+ return;
+ if(b->nobj == 0)
putempty(b);
- }
+ else
+ putfull(b);
}
-// Get a full work buffer off the work.full list, or return nil.
+// Get a full work buffer off the work.full or a partially
+// filled one off the work.partial list. If nothing is available
+// wait until all the other gc helpers have finished and then
+// return nil.
// getfull acts as a barrier for work.nproc helpers. As long as one
// gchelper is actively marking objects it
// may create a workbuffer that the other helpers can work on.
@@ -704,12 +720,12 @@ getfull(Workbuf *b)
{
int32 i;
- if(b != nil) {
- if(b->nobj != 0)
- runtime·printf("runtime:getfull: b->nobj=%D not 0.", b->nobj);
- runtime·lfstackpush(&runtime·work.empty, &b->node);
- }
+ if(b != nil)
+ putempty(b);
+
b = (Workbuf*)runtime·lfstackpop(&runtime·work.full);
+ if(b==nil)
+ b = (Workbuf*)runtime·lfstackpop(&runtime·work.partial);
if(b != nil || runtime·work.nproc == 1)
return b;
@@ -718,7 +734,9 @@ getfull(Workbuf *b)
if(runtime·work.full != 0) {
runtime·xadd(&runtime·work.nwait, -1);
b = (Workbuf*)runtime·lfstackpop(&runtime·work.full);
- if(b != nil)
+ if(b==nil)
+ b = (Workbuf*)runtime·lfstackpop(&runtime·work.partial);
+ if(b != nil)
return b;
runtime·xadd(&runtime·work.nwait, +1);
}
@@ -861,8 +879,7 @@ scanstack(G *gp)
case Gdead:
return;
case Grunning:
- runtime·printf("runtime: gp=%p, goid=%D, gp->atomicstatus=%d\n", gp, gp->goid, runtime·readgstatus(gp));
- runtime·throw("mark - world not stopped");
+ runtime·throw("scanstack: - goroutine not stopped");
case Grunnable:
case Gsyscall:
case Gwaiting:
@@ -909,7 +926,7 @@ shade(byte *b)
if(!inheap(b))
runtime·throw("shade: passed an address not in the heap");
- wbuf = getpartial();
+ wbuf = getpartialorempty();
// Mark the object, return some important bits.
// If we combine the following two rotines we don't have to pass mbits or obj around.
obj = objectstart(b, &mbits);
@@ -932,8 +949,8 @@ runtime·markwb(void **slot, void *ptr)
*slot = ptr;
}
-// The gp has been moved to a gc safepoint. If there is gcphase specific
-// work it is done here.
+// The gp has been moved to a GC safepoint. GC phase specific
+// work is done here.
void
runtime·gcphasework(G *gp)
{
@@ -953,14 +970,8 @@ runtime·gcphasework(G *gp)
break;
case GCmark:
case GCmarktermination:
- //
- // Disabled until concurrent GC is implemented
- // but indicate the scan has been done.
scanstack(gp);
- // scanstack will call shade which will populate
- // the Workbuf.
- // emptywbuf(gp) will empty it before returning
- //
+ // All available mark work will be emptied before returning.
break;
}
gp->gcworkdone = true;
@@ -1050,6 +1061,7 @@ runtime·iterate_finq(void (*callback)(FuncVal*, byte*, uintptr, Type*, PtrType*
}
}
+// Returns only when span s has been swept.
void
runtime·MSpan_EnsureSwept(MSpan *s)
{
@@ -1064,6 +1076,7 @@ runtime·MSpan_EnsureSwept(MSpan *s)
sg = runtime·mheap.sweepgen;
if(runtime·atomicload(&s->sweepgen) == sg)
return;
+ // The caller must be sure that the span is a MSpanInUse span.
if(runtime·cas(&s->sweepgen, sg-2, sg-1)) {
runtime·MSpan_Sweep(s, false);
return;
@@ -1347,7 +1360,7 @@ runtime·gchelper(void)
g->m->traceback = 2;
gchelperstart();
- // parallel mark for over gc roots
+ // parallel mark for over GC roots
runtime·parfordo(runtime·work.markfor);
if(runtime·gcphase != GCscan)
scanblock(nil, 0, nil); // blocks in getfull
@@ -1531,10 +1544,93 @@ runtime·gc_m(void)
a.start_time = (uint64)(g->m->scalararg[0]) | ((uint64)(g->m->scalararg[1]) << 32);
a.eagersweep = g->m->scalararg[2];
gc(&a);
-
runtime·casgstatus(gp, Gwaiting, Grunning);
}
+void
+runtime·finishsweep_m(void)
+{
+ uint32 i, sg;
+ MSpan *s;
+
+ // The world is stopped so we should be able to complete the sweeps
+ // quickly.
+ while(runtime·sweepone() != -1)
+ runtime·sweep.npausesweep++;
+
+ // There may be some other spans being swept concurrently that
+ // we need to wait for. If finishsweep_m is done with the world stopped
+ // this code is not required.
+ sg = runtime·mheap.sweepgen;
+ for(i=0; i<runtime·work.nspan; i++) {
+ s = runtime·work.spans[i];
+ if(s->sweepgen == sg) {
+ continue;
+ }
+ if(s->state != MSpanInUse) // Span is not part of the GCed heap so no need to ensure it is swept.
+ continue;
+ runtime·MSpan_EnsureSwept(s);
+ }
+}
+
+// Scan all of the stacks, greying (or graying if in America) the referents
+// but not blackening them since the mark write barrier isn't installed.
+void
+runtime·gcscan_m(void)
+{
+ uint32 i, allglen, oldphase;
+ G *gp, *mastergp, **allg;
+
+ // Grab the g that called us and potentially allow rescheduling.
+ // This allows it to be scanned like other goroutines.
+ mastergp = g->m->curg;
+
+ runtime·casgstatus(mastergp, Grunning, Gwaiting);
+ mastergp->waitreason = runtime·gostringnocopy((byte*)"garbage collection scan");
+
+ // Span sweeping has been done by finishsweep_m.
+ // Long term we will want to make this goroutine runnable
+ // by placing it onto a scanenqueue state and then calling
+ // runtime·restartg(mastergp) to make it Grunnable.
+ // At the bottom we will want to return this p back to the scheduler.
+
+ oldphase = runtime·gcphase;
+
+ runtime·lock(&runtime·allglock);
+ allglen = runtime·allglen;
+ allg = runtime·allg;
+ // Prepare flag indicating that the scan has not been completed.
+ for(i = 0; i < allglen; i++) {
+ gp = allg[i];
+ gp->gcworkdone = false; // set to true in gcphasework
+ }
+ runtime·unlock(&runtime·allglock);
+
+ runtime·work.nwait = 0;
+ runtime·work.ndone = 0;
+ runtime·work.nproc = 1; // For now do not do this in parallel.
+ runtime·gcphase = GCscan;
+ // ackgcphase is not needed since we are not scanning running goroutines.
+ runtime·parforsetup(runtime·work.markfor, runtime·work.nproc, RootCount + allglen, nil, false, markroot);
+ runtime·parfordo(runtime·work.markfor);
+
+ runtime·lock(&runtime·allglock);
+
+ allg = runtime·allg;
+ // Check that gc work is done.
+ for(i = 0; i < allglen; i++) {
+ gp = allg[i];
+ if(!gp->gcworkdone) {
+ runtime·throw("scan missed a g");
+ }
+ }
+ runtime·unlock(&runtime·allglock);
+
+ runtime·gcphase = oldphase;
+ runtime·casgstatus(mastergp, Gwaiting, Grunning);
+ // Let the g that called us continue to run.
+}
+
static void
gc(struct gc_args *args)
{
@@ -1542,7 +1638,9 @@ gc(struct gc_args *args)
uint64 heap0, heap1, obj;
GCStats stats;
uint32 oldphase;
-
+ uint32 i;
+ G *gp;
+
if(runtime·debug.allocfreetrace)
runtime·tracegc();
@@ -1554,9 +1652,7 @@ gc(struct gc_args *args)
if(runtime·debug.gctrace)
t1 = runtime·nanotime();
- // Sweep what is not sweeped by bgsweep.
- while(runtime·sweepone() != -1)
- runtime·sweep.npausesweep++;
+ runtime·finishsweep_m();
// Cache runtime·mheap.allspans in work.spans to avoid conflicts with
// resizing/freeing allspans.
@@ -1580,7 +1676,13 @@ gc(struct gc_args *args)
runtime·work.nwait = 0;
runtime·work.ndone = 0;
runtime·work.nproc = runtime·gcprocs();
- runtime·gcphase = GCmark; //^^ vv
+ runtime·gcphase = GCmark;
+
+ // World is stopped so allglen will not change.
+ for(i = 0; i < runtime·allglen; i++) {
+ gp = runtime·allg[i];
+ gp->gcworkdone = false; // set to true in gcphasework
+ }
runtime·parforsetup(runtime·work.markfor, runtime·work.nproc, RootCount + runtime·allglen, nil, false, markroot);
if(runtime·work.nproc > 1) {
@@ -1596,7 +1698,13 @@ gc(struct gc_args *args)
runtime·parfordo(runtime·work.markfor);
scanblock(nil, 0, nil);
- runtime·gcphase = oldphase; //^^ vv
+
+ if(runtime·work.full)
+ runtime·throw("runtime·work.full != nil");
+ if(runtime·work.partial)
+ runtime·throw("runtime·work.partial != nil");
+
+ runtime·gcphase = oldphase;
t3 = 0;
if(runtime·debug.gctrace)
t3 = runtime·nanotime();
@@ -1735,7 +1843,7 @@ readgcstats_m(void)
if(pauses->cap < nelem(mstats.pause_ns)+3)
runtime·throw("runtime: short slice passed to readGCStats");
- // Pass back: pauses, last gc (absolute time), number of gc, total pause ns.
+ // Pass back: pauses, last GC (absolute time), number of GC, total pause ns.
p = (uint64*)pauses->array;
runtime·lock(&runtime·mheap.lock);
n = mstats.numgc;
diff --git a/src/runtime/proc.c b/src/runtime/proc.c
index 9643abcc6..b824f574d 100644
--- a/src/runtime/proc.c
+++ b/src/runtime/proc.c
@@ -423,13 +423,7 @@ runtime·casgstatus(G *gp, uint32 oldval, uint32 newval)
// loop if gp->atomicstatus is in a scan state giving
// GC time to finish and change the state to oldval.
while(!runtime·cas(&gp->atomicstatus, oldval, newval)) {
- // Help GC if needed.
- if(gp->preemptscan && !gp->gcworkdone && (oldval == Grunning || oldval == Gsyscall)) {
- gp->preemptscan = false;
- g->m->ptrarg[0] = gp;
- fn = helpcasgstatus;
- runtime·onM(&fn);
- }
+
}
}
@@ -504,6 +498,13 @@ runtime·stopg(G *gp)
return false;
case Grunning:
+ if(runtime·gcphase == GCscan) {
+ gp->gcworkdone = true;
+ return false;
+ // Running routines not scanned during
+ // GCscan phase, we only scan non-running routines.
+ }
+
// Claim goroutine, so we aren't racing with a status
// transition away from Grunning.
if(!runtime·castogscanstatus(gp, Grunning, Gscanrunning))
@@ -1918,6 +1919,7 @@ exitsyscallfast(void)
// Freezetheworld sets stopwait but does not retake P's.
if(runtime·sched.stopwait) {
+ g->m->mcache = nil;
g->m->p = nil;
return false;
}
@@ -1930,6 +1932,7 @@ exitsyscallfast(void)
return true;
}
// Try to get any other idle P.
+ g->m->mcache = nil;
g->m->p = nil;
if(runtime·sched.pidle) {
fn = exitsyscallfast_pidle;
@@ -2617,6 +2620,8 @@ runtime·setcpuprofilerate_m(void)
P *runtime·newP(void);
// Change number of processors. The world is stopped, sched is locked.
+// gcworkbufs are not being modified by either the GC or
+// the write barrier code.
static void
procresize(int32 new)
{
diff --git a/src/runtime/runtime.h b/src/runtime/runtime.h
index 37929c59c..cbbf6b3fc 100644
--- a/src/runtime/runtime.h
+++ b/src/runtime/runtime.h
@@ -649,6 +649,7 @@ struct ForceGCState
};
extern uint32 runtime·gcphase;
+extern Mutex runtime·allglock;
/*
* defined macros
@@ -677,6 +678,7 @@ enum {
uint32 runtime·readgstatus(G*);
void runtime·casgstatus(G*, uint32, uint32);
+bool runtime·castogscanstatus(G*, uint32, uint32);
void runtime·quiesce(G*);
bool runtime·stopg(G*);
void runtime·restartg(G*);
diff --git a/src/runtime/stack.c b/src/runtime/stack.c
index e402691f4..e06e48a93 100644
--- a/src/runtime/stack.c
+++ b/src/runtime/stack.c
@@ -587,13 +587,13 @@ adjustsudogs(G *gp, AdjustInfo *adjinfo)
}
// Copies gp's stack to a new stack of a different size.
+// Caller must have changed gp status to Gcopystack.
static void
copystack(G *gp, uintptr newsize)
{
Stack old, new;
uintptr used;
AdjustInfo adjinfo;
- uint32 oldstatus;
bool (*cb)(Stkframe*, void*);
byte *p, *ep;
@@ -637,20 +637,11 @@ copystack(G *gp, uintptr newsize)
}
runtime·memmove((byte*)new.hi - used, (byte*)old.hi - used, used);
- oldstatus = runtime·readgstatus(gp);
- oldstatus &= ~Gscan;
- if(oldstatus == Gwaiting || oldstatus == Grunnable)
- runtime·casgstatus(gp, oldstatus, Gcopystack); // oldstatus is Gwaiting or Grunnable
- else
- runtime·throw("copystack: bad status, not Gwaiting or Grunnable");
-
// Swap out old stack for new one
gp->stack = new;
gp->stackguard0 = new.lo + StackGuard; // NOTE: might clobber a preempt request
gp->sched.sp = new.hi - used;
- runtime·casgstatus(gp, Gcopystack, oldstatus); // oldstatus is Gwaiting or Grunnable
-
// free old stack
if(StackPoisonCopy) {
p = (byte*)old.lo;
@@ -700,6 +691,7 @@ void
runtime·newstack(void)
{
int32 oldsize, newsize;
+ uint32 oldstatus;
uintptr sp;
G *gp;
Gobuf morebuf;
@@ -789,12 +781,15 @@ runtime·newstack(void)
runtime·throw("stack overflow");
}
- // Note that the concurrent GC might be scanning the stack as we try to replace it.
- // copystack takes care of the appropriate coordination with the stack scanner.
+ oldstatus = runtime·readgstatus(gp);
+ oldstatus &= ~Gscan;
+ runtime·casgstatus(gp, oldstatus, Gcopystack); // oldstatus is Gwaiting or Grunnable
+ // The concurrent GC will not scan the stack while we are doing the copy since
+ // the gp is in a Gcopystack status.
copystack(gp, newsize);
if(StackDebug >= 1)
runtime·printf("stack grow done\n");
- runtime·casgstatus(gp, Gwaiting, Grunning);
+ runtime·casgstatus(gp, Gcopystack, Grunning);
runtime·gogo(&gp->sched);
}
@@ -825,6 +820,7 @@ void
runtime·shrinkstack(G *gp)
{
uintptr used, oldsize, newsize;
+ uint32 oldstatus;
if(runtime·readgstatus(gp) == Gdead) {
if(gp->stack.lo != 0) {
@@ -858,8 +854,19 @@ runtime·shrinkstack(G *gp)
#endif
if(StackDebug > 0)
runtime·printf("shrinking stack %D->%D\n", (uint64)oldsize, (uint64)newsize);
+ // This is being done in a Gscan state and was initiated by the GC so no need to move to
+ // the Gcopystate.
+ // The world is stopped, so the goroutine must be Gwaiting or Grunnable,
+ // and what it is is not changing underfoot.
+
+ oldstatus = runtime·readgstatus(gp);
+ oldstatus &= ~Gscan;
+ if(oldstatus != Gwaiting && oldstatus != Grunnable)
+ runtime·throw("status is not Gwaiting or Grunnable");
+ runtime·casgstatus(gp, oldstatus, Gcopystack);
copystack(gp, newsize);
-}
+ runtime·casgstatus(gp, Gcopystack, oldstatus);
+ }
// Do any delayed stack freeing that was queued up during GC.
void
diff --git a/src/runtime/stubs.go b/src/runtime/stubs.go
index 6561094ff..32dfed7d3 100644
--- a/src/runtime/stubs.go
+++ b/src/runtime/stubs.go
@@ -106,6 +106,8 @@ func recovery_m(*g)
func mcacheRefill_m()
func largeAlloc_m()
func gc_m()
+func gcscan_m()
+func finishsweep_m()
func scavenge_m()
func setFinalizer_m()
func removeFinalizer_m()