diff options
Diffstat (limited to 'libgo/runtime/proc.c')
-rw-r--r-- | libgo/runtime/proc.c | 2977 |
1 files changed, 539 insertions, 2438 deletions
diff --git a/libgo/runtime/proc.c b/libgo/runtime/proc.c index c6ac972bd4..06a9c2ad6b 100644 --- a/libgo/runtime/proc.c +++ b/libgo/runtime/proc.c @@ -2,6 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +#include <errno.h> #include <limits.h> #include <signal.h> #include <stdlib.h> @@ -19,7 +20,6 @@ #include "defs.h" #include "malloc.h" #include "go-type.h" -#include "go-defer.h" #ifdef USING_SPLIT_STACK @@ -62,7 +62,6 @@ static void gtraceback(G*); #endif static __thread G *g; -static __thread M *m; #ifndef SETCONTEXT_CLOBBERS_TLS @@ -158,6 +157,26 @@ fixcontext(ucontext_t *c) #endif +// ucontext_arg returns a properly aligned ucontext_t value. On some +// systems a ucontext_t value must be aligned to a 16-byte boundary. +// The g structure that has fields of type ucontext_t is defined in +// Go, and Go has no simple way to align a field to such a boundary. +// So we make the field larger in runtime2.go and pick an appropriate +// offset within the field here. +static ucontext_t* +ucontext_arg(void** go_ucontext) +{ + uintptr_t p = (uintptr_t)go_ucontext; + size_t align = __alignof__(ucontext_t); + if(align > 16) { + // We only ensured space for up to a 16 byte alignment + // in libgo/go/runtime/runtime2.go. + runtime_throw("required alignment of ucontext_t too large"); + } + p = (p + align - 1) &~ (uintptr_t)(align - 1); + return (ucontext_t*)p; +} + // We can not always refer to the TLS variables directly. The // compiler will call tls_get_addr to get the address of the variable, // and it may hold it in a register across a call to schedule. When @@ -179,14 +198,15 @@ M* runtime_m(void) __attribute__ ((noinline, no_split_stack)); M* runtime_m(void) { - return m; + if(g == nil) + return nil; + return g->m; } -// Set m and g. +// Set g. void -runtime_setmg(M* mp, G* gp) +runtime_setg(G* gp) { - m = mp; g = gp; } @@ -197,6 +217,7 @@ runtime_newosproc(M *mp) pthread_attr_t attr; sigset_t clear, old; pthread_t tid; + int tries; int ret; if(pthread_attr_init(&attr) != 0) @@ -215,11 +236,21 @@ runtime_newosproc(M *mp) sigemptyset(&old); pthread_sigmask(SIG_BLOCK, &clear, &old); - ret = pthread_create(&tid, &attr, runtime_mstart, mp); + + for (tries = 0; tries < 20; tries++) { + ret = pthread_create(&tid, &attr, runtime_mstart, mp); + if (ret != EAGAIN) { + break; + } + runtime_usleep((tries + 1) * 1000); // Milliseconds. + } + pthread_sigmask(SIG_SETMASK, &old, nil); - if (ret != 0) + if (ret != 0) { + runtime_printf("pthread_create failed: %d\n", ret); runtime_throw("pthread_create"); + } } // First function run by a new goroutine. This replaces gogocall. @@ -227,13 +258,17 @@ static void kickoff(void) { void (*fn)(void*); + void *param; if(g->traceback != nil) gtraceback(g); fn = (void (*)(void*))(g->entry); - fn(g->param); - runtime_goexit(); + param = g->param; + g->entry = nil; + g->param = nil; + fn(param); + runtime_goexit1(); } // Switch context to a different goroutine. This is like longjmp. @@ -242,12 +277,12 @@ void runtime_gogo(G* newg) { #ifdef USING_SPLIT_STACK - __splitstack_setcontext(&newg->stack_context[0]); + __splitstack_setcontext(&newg->stackcontext[0]); #endif g = newg; newg->fromgogo = true; - fixcontext(&newg->context); - setcontext(&newg->context); + fixcontext(ucontext_arg(&newg->context[0])); + setcontext(ucontext_arg(&newg->context[0])); runtime_throw("gogo setcontext returned"); } @@ -261,42 +296,47 @@ runtime_mcall(void (*pfn)(G*)) { M *mp; G *gp; +#ifndef USING_SPLIT_STACK + void *afterregs; +#endif // Ensure that all registers are on the stack for the garbage // collector. __builtin_unwind_init(); - mp = m; gp = g; + mp = gp->m; if(gp == mp->g0) runtime_throw("runtime: mcall called on m->g0 stack"); if(gp != nil) { #ifdef USING_SPLIT_STACK - __splitstack_getcontext(&g->stack_context[0]); + __splitstack_getcontext(&g->stackcontext[0]); #else - gp->gcnext_sp = &pfn; + // We have to point to an address on the stack that is + // below the saved registers. + gp->gcnextsp = &afterregs; #endif gp->fromgogo = false; - getcontext(&gp->context); + getcontext(ucontext_arg(&gp->context[0])); // When we return from getcontext, we may be running - // in a new thread. That means that m and g may have - // changed. They are global variables so we will - // reload them, but the addresses of m and g may be - // cached in our local stack frame, and those - // addresses may be wrong. Call functions to reload - // the values for this thread. - mp = runtime_m(); + // in a new thread. That means that g may have + // changed. It is a global variables so we will + // reload it, but the address of g may be cached in + // our local stack frame, and that address may be + // wrong. Call the function to reload the value for + // this thread. gp = runtime_g(); + mp = gp->m; if(gp->traceback != nil) gtraceback(gp); } if (gp == nil || !gp->fromgogo) { #ifdef USING_SPLIT_STACK - __splitstack_setcontext(&mp->g0->stack_context[0]); + __splitstack_setcontext(&mp->g0->stackcontext[0]); #endif mp->g0->entry = (byte*)pfn; mp->g0->param = gp; @@ -306,8 +346,8 @@ runtime_mcall(void (*pfn)(G*)) // the getcontext call just above. g = mp->g0; - fixcontext(&mp->g0->context); - setcontext(&mp->g0->context); + fixcontext(ucontext_arg(&mp->g0->context[0])); + setcontext(ucontext_arg(&mp->g0->context[0])); runtime_throw("runtime: mcall function returned"); } } @@ -324,110 +364,92 @@ runtime_mcall(void (*pfn)(G*)) // // Design doc at http://golang.org/s/go11sched. -typedef struct Sched Sched; -struct Sched { - Lock; - - uint64 goidgen; - M* midle; // idle m's waiting for work - int32 nmidle; // number of idle m's waiting for work - int32 nmidlelocked; // number of locked m's waiting for work - int32 mcount; // number of m's that have been created - int32 maxmcount; // maximum number of m's allowed (or die) - - P* pidle; // idle P's - uint32 npidle; - uint32 nmspinning; - - // Global runnable queue. - G* runqhead; - G* runqtail; - int32 runqsize; - - // Global cache of dead G's. - Lock gflock; - G* gfree; - - uint32 gcwaiting; // gc is waiting to run - int32 stopwait; - Note stopnote; - uint32 sysmonwait; - Note sysmonnote; - uint64 lastpoll; - - int32 profilehz; // cpu profiling rate -}; - enum { - // The max value of GOMAXPROCS. - // There are no fundamental restrictions on the value. - MaxGomaxprocs = 1<<8, - - // Number of goroutine ids to grab from runtime_sched.goidgen to local per-P cache at once. + // Number of goroutine ids to grab from runtime_sched->goidgen to local per-P cache at once. // 16 seems to provide enough amortization, but other than that it's mostly arbitrary number. GoidCacheBatch = 16, }; -Sched runtime_sched; -int32 runtime_gomaxprocs; -uint32 runtime_needextram = 1; +extern Sched* runtime_getsched() __asm__ (GOSYM_PREFIX "runtime.getsched"); +extern bool* runtime_getCgoHasExtraM() + __asm__ (GOSYM_PREFIX "runtime.getCgoHasExtraM"); +extern P** runtime_getAllP() + __asm__ (GOSYM_PREFIX "runtime.getAllP"); +extern G* allocg(void) + __asm__ (GOSYM_PREFIX "runtime.allocg"); +extern bool needaddgcproc(void) + __asm__ (GOSYM_PREFIX "runtime.needaddgcproc"); +extern void startm(P*, bool) + __asm__(GOSYM_PREFIX "runtime.startm"); +extern void newm(void(*)(void), P*) + __asm__(GOSYM_PREFIX "runtime.newm"); + +Sched* runtime_sched; M runtime_m0; G runtime_g0; // idle goroutine for m0 G* runtime_lastg; -M* runtime_allm; P** runtime_allp; -M* runtime_extram; int8* runtime_goos; int32 runtime_ncpu; bool runtime_precisestack; -static int32 newprocs; - -static Lock allglock; // the following vars are protected by this lock or by stoptheworld -G** runtime_allg; -uintptr runtime_allglen; -static uintptr allgcap; bool runtime_isarchive; void* runtime_mstart(void*); -static void runqput(P*, G*); -static G* runqget(P*); -static bool runqputslow(P*, G*, uint32, uint32); -static G* runqsteal(P*, P*); -static void mput(M*); -static M* mget(void); -static void mcommoninit(M*); -static void schedule(void); -static void procresize(int32); -static void acquirep(P*); -static P* releasep(void); -static void newm(void(*)(void), P*); -static void stopm(void); -static void startm(P*, bool); -static void handoffp(P*); -static void wakep(void); -static void stoplockedm(void); -static void startlockedm(G*); -static void sysmon(void); -static uint32 retake(int64); -static void incidlelocked(int32); -static void checkdead(void); static void exitsyscall0(G*); static void park0(G*); static void goexit0(G*); -static void gfput(P*, G*); -static G* gfget(P*); -static void gfpurge(P*); -static void globrunqput(G*); -static void globrunqputbatch(G*, G*, int32); -static G* globrunqget(P*, int32); -static P* pidleget(void); -static void pidleput(P*); -static void injectglist(G*); -static bool preemptall(void); static bool exitsyscallfast(void); -static void allgadd(G*); + +extern void setncpu(int32) + __asm__(GOSYM_PREFIX "runtime.setncpu"); +extern void setpagesize(uintptr_t) + __asm__(GOSYM_PREFIX "runtime.setpagesize"); +extern void allgadd(G*) + __asm__(GOSYM_PREFIX "runtime.allgadd"); +extern void mcommoninit(M*) + __asm__(GOSYM_PREFIX "runtime.mcommoninit"); +extern void stopm(void) + __asm__(GOSYM_PREFIX "runtime.stopm"); +extern void handoffp(P*) + __asm__(GOSYM_PREFIX "runtime.handoffp"); +extern void wakep(void) + __asm__(GOSYM_PREFIX "runtime.wakep"); +extern void stoplockedm(void) + __asm__(GOSYM_PREFIX "runtime.stoplockedm"); +extern void schedule(void) + __asm__(GOSYM_PREFIX "runtime.schedule"); +extern void execute(G*, bool) + __asm__(GOSYM_PREFIX "runtime.execute"); +extern void gfput(P*, G*) + __asm__(GOSYM_PREFIX "runtime.gfput"); +extern G* gfget(P*) + __asm__(GOSYM_PREFIX "runtime.gfget"); +extern void procresize(int32) + __asm__(GOSYM_PREFIX "runtime.procresize"); +extern void acquirep(P*) + __asm__(GOSYM_PREFIX "runtime.acquirep"); +extern P* releasep(void) + __asm__(GOSYM_PREFIX "runtime.releasep"); +extern void incidlelocked(int32) + __asm__(GOSYM_PREFIX "runtime.incidlelocked"); +extern void checkdead(void) + __asm__(GOSYM_PREFIX "runtime.checkdead"); +extern void sysmon(void) + __asm__(GOSYM_PREFIX "runtime.sysmon"); +extern void mput(M*) + __asm__(GOSYM_PREFIX "runtime.mput"); +extern M* mget(void) + __asm__(GOSYM_PREFIX "runtime.mget"); +extern void globrunqput(G*) + __asm__(GOSYM_PREFIX "runtime.globrunqput"); +extern P* pidleget(void) + __asm__(GOSYM_PREFIX "runtime.pidleget"); +extern bool runqempty(P*) + __asm__(GOSYM_PREFIX "runtime.runqempty"); +extern void runqput(P*, G*, bool) + __asm__(GOSYM_PREFIX "runtime.runqput"); bool runtime_isstarted; @@ -442,11 +464,16 @@ bool runtime_isstarted; void runtime_schedinit(void) { + M *m; int32 n, procs; String s; const byte *p; Eface i; + setncpu(runtime_ncpu); + setpagesize(getpagesize()); + runtime_sched = runtime_getsched(); + m = &runtime_m0; g = &runtime_g0; m->g0 = g; @@ -455,13 +482,14 @@ runtime_schedinit(void) initcontext(); - runtime_sched.maxmcount = 10000; + runtime_sched->maxmcount = 10000; runtime_precisestack = 0; // runtime_symtabinit(); runtime_mallocinit(); mcommoninit(m); - + runtime_alginit(); // maps must not be used before this call + // Initialize the itable value for newErrorCString, // so that the next time it gets called, possibly // in a fault during a garbage collection, it will not @@ -476,20 +504,20 @@ runtime_schedinit(void) runtime_goenvs(); runtime_parsedebugvars(); - runtime_sched.lastpoll = runtime_nanotime(); + runtime_sched->lastpoll = runtime_nanotime(); procs = 1; s = runtime_getenv("GOMAXPROCS"); p = s.str; if(p != nil && (n = runtime_atoi(p, s.len)) > 0) { - if(n > MaxGomaxprocs) - n = MaxGomaxprocs; + if(n > _MaxGomaxprocs) + n = _MaxGomaxprocs; procs = n; } - runtime_allp = runtime_malloc((MaxGomaxprocs+1)*sizeof(runtime_allp[0])); + runtime_allp = runtime_getAllP(); procresize(procs); // Can not enable GC until all roots are registered. - // mstats.enablegc = 1; + // mstats()->enablegc = 1; } extern void main_init(void) __asm__ (GOSYM_PREFIX "__go_init_main"); @@ -503,54 +531,6 @@ struct field_align Hchan *p; }; -// main_init_done is a signal used by cgocallbackg that initialization -// has been completed. It is made before _cgo_notify_runtime_init_done, -// so all cgo calls can rely on it existing. When main_init is -// complete, it is closed, meaning cgocallbackg can reliably receive -// from it. -Hchan *runtime_main_init_done; - -// The chan bool type, for runtime_main_init_done. - -extern const struct __go_type_descriptor bool_type_descriptor - __asm__ (GOSYM_PREFIX "__go_tdn_bool"); - -static struct __go_channel_type chan_bool_type_descriptor = - { - /* __common */ - { - /* __code */ - GO_CHAN, - /* __align */ - __alignof (Hchan *), - /* __field_align */ - offsetof (struct field_align, p) - 1, - /* __size */ - sizeof (Hchan *), - /* __hash */ - 0, /* This value doesn't matter. */ - /* __hashfn */ - &__go_type_hash_error_descriptor, - /* __equalfn */ - &__go_type_equal_error_descriptor, - /* __gc */ - NULL, /* This value doesn't matter */ - /* __reflection */ - NULL, /* This value doesn't matter */ - /* __uncommon */ - NULL, - /* __pointer_to_this */ - NULL - }, - /* __element_type */ - &bool_type_descriptor, - /* __dir */ - CHANNEL_BOTH_DIR - }; - -extern Hchan *__go_new_channel (ChanType *, uintptr); -extern void closechan(Hchan *) __asm__ (GOSYM_PREFIX "runtime.closechan"); - static void initDone(void *arg __attribute__ ((unused))) { runtime_unlockOSThread(); @@ -583,37 +563,37 @@ runtime_main(void* dummy __attribute__((unused))) runtime_lockOSThread(); // Defer unlock so that runtime.Goexit during init does the unlock too. - d.__pfn = initDone; - d.__next = g->defer; - d.__arg = (void*)-1; - d.__panic = g->panic; - d.__retaddr = nil; - d.__makefunc_can_recover = 0; - d.__frame = &frame; - d.__special = true; - g->defer = &d; - - if(m != &runtime_m0) + d.pfn = (uintptr)(void*)initDone; + d.link = g->_defer; + d.arg = (void*)-1; + d._panic = g->_panic; + d.retaddr = 0; + d.makefunccanrecover = 0; + d.frame = &frame; + d.special = true; + g->_defer = &d; + + if(g->m != &runtime_m0) runtime_throw("runtime_main not on m0"); __go_go(runtime_MHeap_Scavenger, nil); - runtime_main_init_done = __go_new_channel(&chan_bool_type_descriptor, 0); + makeMainInitDone(); _cgo_notify_runtime_init_done(); main_init(); - closechan(runtime_main_init_done); + closeMainInitDone(); - if(g->defer != &d || d.__pfn != initDone) + if(g->_defer != &d || (void*)d.pfn != initDone) runtime_throw("runtime: bad defer entry after init"); - g->defer = d.__next; + g->_defer = d.link; runtime_unlockOSThread(); // For gccgo we have to wait until after main is initialized // to enable GC, because initializing main registers the GC // roots. - mstats.enablegc = 1; + mstats()->enablegc = 1; if(runtime_isarchive) { // This is not a complete program, but is instead a @@ -629,7 +609,7 @@ runtime_main(void* dummy __attribute__((unused))) // another goroutine at the same time as main returns, // let the other goroutine finish printing the panic trace. // Once it does, it will exit. See issue 3934. - if(runtime_panicking) + if(runtime_panicking()) runtime_park(nil, nil, "panicwait"); runtime_exit(0); @@ -637,150 +617,24 @@ runtime_main(void* dummy __attribute__((unused))) *(int32*)0 = 0; } -void -runtime_goroutineheader(G *gp) -{ - const char *status; - int64 waitfor; - - switch(gp->status) { - case Gidle: - status = "idle"; - break; - case Grunnable: - status = "runnable"; - break; - case Grunning: - status = "running"; - break; - case Gsyscall: - status = "syscall"; - break; - case Gwaiting: - if(gp->waitreason) - status = gp->waitreason; - else - status = "waiting"; - break; - default: - status = "???"; - break; - } - - // approx time the G is blocked, in minutes - waitfor = 0; - if((gp->status == Gwaiting || gp->status == Gsyscall) && gp->waitsince != 0) - waitfor = (runtime_nanotime() - gp->waitsince) / (60LL*1000*1000*1000); - - if(waitfor < 1) - runtime_printf("goroutine %D [%s]:\n", gp->goid, status); - else - runtime_printf("goroutine %D [%s, %D minutes]:\n", gp->goid, status, waitfor); -} - -void -runtime_printcreatedby(G *g) -{ - if(g != nil && g->gopc != 0 && g->goid != 1) { - String fn; - String file; - intgo line; - - if(__go_file_line(g->gopc - 1, &fn, &file, &line)) { - runtime_printf("created by %S\n", fn); - runtime_printf("\t%S:%D\n", file, (int64) line); - } - } -} - -struct Traceback -{ - G* gp; - Location locbuf[TracebackMaxFrames]; - int32 c; -}; +void getTraceback(G*, G*) __asm__(GOSYM_PREFIX "runtime.getTraceback"); -void -runtime_tracebackothers(G * volatile me) +// getTraceback stores a traceback of gp in the g's traceback field +// and then returns to me. We expect that gp's traceback is not nil. +// It works by saving me's current context, and checking gp's traceback field. +// If gp's traceback field is not nil, it starts running gp. +// In places where we call getcontext, we check the traceback field. +// If it is not nil, we collect a traceback, and then return to the +// goroutine stored in the traceback field, which is me. +void getTraceback(G* me, G* gp) { - G * volatile gp; - Traceback tb; - int32 traceback; - volatile uintptr i; - - tb.gp = me; - traceback = runtime_gotraceback(nil); - - // Show the current goroutine first, if we haven't already. - if((gp = m->curg) != nil && gp != me) { - runtime_printf("\n"); - runtime_goroutineheader(gp); - gp->traceback = &tb; - -#ifdef USING_SPLIT_STACK - __splitstack_getcontext(&me->stack_context[0]); -#endif - getcontext(&me->context); - - if(gp->traceback != nil) { - runtime_gogo(gp); - } - - runtime_printtrace(tb.locbuf, tb.c, false); - runtime_printcreatedby(gp); - } - - runtime_lock(&allglock); - for(i = 0; i < runtime_allglen; i++) { - gp = runtime_allg[i]; - if(gp == me || gp == m->curg || gp->status == Gdead) - continue; - if(gp->issystem && traceback < 2) - continue; - runtime_printf("\n"); - runtime_goroutineheader(gp); - - // Our only mechanism for doing a stack trace is - // _Unwind_Backtrace. And that only works for the - // current thread, not for other random goroutines. - // So we need to switch context to the goroutine, get - // the backtrace, and then switch back. - - // This means that if g is running or in a syscall, we - // can't reliably print a stack trace. FIXME. - - if(gp->status == Grunning) { - runtime_printf("\tgoroutine running on other thread; stack unavailable\n"); - runtime_printcreatedby(gp); - } else if(gp->status == Gsyscall) { - runtime_printf("\tgoroutine in C code; stack unavailable\n"); - runtime_printcreatedby(gp); - } else { - gp->traceback = &tb; - #ifdef USING_SPLIT_STACK - __splitstack_getcontext(&me->stack_context[0]); + __splitstack_getcontext(&me->stackcontext[0]); #endif - getcontext(&me->context); - - if(gp->traceback != nil) { - runtime_gogo(gp); - } + getcontext(ucontext_arg(&me->context[0])); - runtime_printtrace(tb.locbuf, tb.c, false); - runtime_printcreatedby(gp); - } - } - runtime_unlock(&allglock); -} - -static void -checkmcount(void) -{ - // sched lock is held - if(runtime_sched.mcount > runtime_sched.maxmcount) { - runtime_printf("runtime: program exceeds %d-thread limit\n", runtime_sched.maxmcount); - runtime_throw("thread exhaustion"); + if (gp->traceback != nil) { + runtime_gogo(gp); } } @@ -794,286 +648,56 @@ gtraceback(G* gp) traceback = gp->traceback; gp->traceback = nil; + if(gp->m != nil) + runtime_throw("gtraceback: m is not nil"); + gp->m = traceback->gp->m; traceback->c = runtime_callers(1, traceback->locbuf, sizeof traceback->locbuf / sizeof traceback->locbuf[0], false); + gp->m = nil; runtime_gogo(traceback->gp); } -static void -mcommoninit(M *mp) -{ - // If there is no mcache runtime_callers() will crash, - // and we are most likely in sysmon thread so the stack is senseless anyway. - if(m->mcache) - runtime_callers(1, mp->createstack, nelem(mp->createstack), false); - - mp->fastrand = 0x49f6428aUL + mp->id + runtime_cputicks(); - - runtime_lock(&runtime_sched); - mp->id = runtime_sched.mcount++; - checkmcount(); - runtime_mpreinit(mp); - - // Add to runtime_allm so garbage collector doesn't free m - // when it is just in a register or thread-local storage. - mp->alllink = runtime_allm; - // runtime_NumCgoCall() iterates over allm w/o schedlock, - // so we need to publish it safely. - runtime_atomicstorep(&runtime_allm, mp); - runtime_unlock(&runtime_sched); -} - -// Mark gp ready to run. -void -runtime_ready(G *gp) -{ - // Mark runnable. - m->locks++; // disable preemption because it can be holding p in a local var - if(gp->status != Gwaiting) { - runtime_printf("goroutine %D has status %d\n", gp->goid, gp->status); - runtime_throw("bad g->status in ready"); - } - gp->status = Grunnable; - runqput(m->p, gp); - if(runtime_atomicload(&runtime_sched.npidle) != 0 && runtime_atomicload(&runtime_sched.nmspinning) == 0) // TODO: fast atomic - wakep(); - m->locks--; -} - -int32 -runtime_gcprocs(void) -{ - int32 n; - - // Figure out how many CPUs to use during GC. - // Limited by gomaxprocs, number of actual CPUs, and MaxGcproc. - runtime_lock(&runtime_sched); - n = runtime_gomaxprocs; - if(n > runtime_ncpu) - n = runtime_ncpu > 0 ? runtime_ncpu : 1; - if(n > MaxGcproc) - n = MaxGcproc; - if(n > runtime_sched.nmidle+1) // one M is currently running - n = runtime_sched.nmidle+1; - runtime_unlock(&runtime_sched); - return n; -} - -static bool -needaddgcproc(void) -{ - int32 n; - - runtime_lock(&runtime_sched); - n = runtime_gomaxprocs; - if(n > runtime_ncpu) - n = runtime_ncpu; - if(n > MaxGcproc) - n = MaxGcproc; - n -= runtime_sched.nmidle+1; // one M is currently running - runtime_unlock(&runtime_sched); - return n > 0; -} - -void -runtime_helpgc(int32 nproc) -{ - M *mp; - int32 n, pos; - - runtime_lock(&runtime_sched); - pos = 0; - for(n = 1; n < nproc; n++) { // one M is currently running - if(runtime_allp[pos]->mcache == m->mcache) - pos++; - mp = mget(); - if(mp == nil) - runtime_throw("runtime_gcprocs inconsistency"); - mp->helpgc = n; - mp->mcache = runtime_allp[pos]->mcache; - pos++; - runtime_notewakeup(&mp->park); - } - runtime_unlock(&runtime_sched); -} - -// Similar to stoptheworld but best-effort and can be called several times. -// There is no reverse operation, used during crashing. -// This function must not lock any mutexes. -void -runtime_freezetheworld(void) -{ - int32 i; - - if(runtime_gomaxprocs == 1) - return; - // stopwait and preemption requests can be lost - // due to races with concurrently executing threads, - // so try several times - for(i = 0; i < 5; i++) { - // this should tell the scheduler to not start any new goroutines - runtime_sched.stopwait = 0x7fffffff; - runtime_atomicstore((uint32*)&runtime_sched.gcwaiting, 1); - // this should stop running goroutines - if(!preemptall()) - break; // no running goroutines - runtime_usleep(1000); - } - // to be sure - runtime_usleep(1000); - preemptall(); - runtime_usleep(1000); -} - -void -runtime_stoptheworld(void) -{ - int32 i; - uint32 s; - P *p; - bool wait; - - runtime_lock(&runtime_sched); - runtime_sched.stopwait = runtime_gomaxprocs; - runtime_atomicstore((uint32*)&runtime_sched.gcwaiting, 1); - preemptall(); - // stop current P - m->p->status = Pgcstop; - runtime_sched.stopwait--; - // try to retake all P's in Psyscall status - for(i = 0; i < runtime_gomaxprocs; i++) { - p = runtime_allp[i]; - s = p->status; - if(s == Psyscall && runtime_cas(&p->status, s, Pgcstop)) - runtime_sched.stopwait--; - } - // stop idle P's - while((p = pidleget()) != nil) { - p->status = Pgcstop; - runtime_sched.stopwait--; - } - wait = runtime_sched.stopwait > 0; - runtime_unlock(&runtime_sched); - - // wait for remaining P's to stop voluntarily - if(wait) { - runtime_notesleep(&runtime_sched.stopnote); - runtime_noteclear(&runtime_sched.stopnote); - } - if(runtime_sched.stopwait) - runtime_throw("stoptheworld: not stopped"); - for(i = 0; i < runtime_gomaxprocs; i++) { - p = runtime_allp[i]; - if(p->status != Pgcstop) - runtime_throw("stoptheworld: not stopped"); - } -} - -static void -mhelpgc(void) -{ - m->helpgc = -1; -} - -void -runtime_starttheworld(void) -{ - P *p, *p1; - M *mp; - G *gp; - bool add; - - m->locks++; // disable preemption because it can be holding p in a local var - gp = runtime_netpoll(false); // non-blocking - injectglist(gp); - add = needaddgcproc(); - runtime_lock(&runtime_sched); - if(newprocs) { - procresize(newprocs); - newprocs = 0; - } else - procresize(runtime_gomaxprocs); - runtime_sched.gcwaiting = 0; - - p1 = nil; - while((p = pidleget()) != nil) { - // procresize() puts p's with work at the beginning of the list. - // Once we reach a p without a run queue, the rest don't have one either. - if(p->runqhead == p->runqtail) { - pidleput(p); - break; - } - p->m = mget(); - p->link = p1; - p1 = p; - } - if(runtime_sched.sysmonwait) { - runtime_sched.sysmonwait = false; - runtime_notewakeup(&runtime_sched.sysmonnote); - } - runtime_unlock(&runtime_sched); - - while(p1) { - p = p1; - p1 = p1->link; - if(p->m) { - mp = p->m; - p->m = nil; - if(mp->nextp) - runtime_throw("starttheworld: inconsistent mp->nextp"); - mp->nextp = p; - runtime_notewakeup(&mp->park); - } else { - // Start M to run P. Do not start another M below. - newm(nil, p); - add = false; - } - } - - if(add) { - // If GC could have used another helper proc, start one now, - // in the hope that it will be available next time. - // It would have been even better to start it before the collection, - // but doing so requires allocating memory, so it's tricky to - // coordinate. This lazy approach works out in practice: - // we don't mind if the first couple gc rounds don't have quite - // the maximum number of procs. - newm(mhelpgc, nil); - } - m->locks--; -} - // Called to start an M. void* runtime_mstart(void* mp) { + M *m; + G *gp; + m = (M*)mp; g = m->g0; + g->m = m; + gp = g; initcontext(); - g->entry = nil; - g->param = nil; + gp->entry = nil; + gp->param = nil; // Record top of stack for use by mcall. // Once we call schedule we're never coming back, // so other calls can reuse this stack space. #ifdef USING_SPLIT_STACK - __splitstack_getcontext(&g->stack_context[0]); + __splitstack_getcontext(&g->stackcontext[0]); #else - g->gcinitial_sp = ∓ - // Setting gcstack_size to 0 is a marker meaning that gcinitial_sp + gp->gcinitialsp = ∓ + // Setting gcstacksize to 0 is a marker meaning that gcinitialsp // is the top of the stack, not the bottom. - g->gcstack_size = 0; - g->gcnext_sp = ∓ + gp->gcstacksize = 0; + gp->gcnextsp = ∓ #endif - getcontext(&g->context); + getcontext(ucontext_arg(&gp->context[0])); + + if(gp->traceback != nil) + gtraceback(gp); - if(g->entry != nil) { + if(gp->entry != nil) { // Got here from mcall. - void (*pfn)(G*) = (void (*)(G*))g->entry; - G* gp = (G*)g->param; - pfn(gp); + void (*pfn)(G*) = (void (*)(G*))gp->entry; + G* gp1 = (G*)gp->param; + gp->entry = nil; + gp->param = nil; + pfn(gp1); *(int*)0x21 = 0x21; } runtime_minit(); @@ -1088,23 +712,25 @@ runtime_mstart(void* mp) // Install signal handlers; after minit so that minit can // prepare the thread to be able to handle the signals. if(m == &runtime_m0) { - if(runtime_iscgo && !runtime_cgoHasExtraM) { - runtime_cgoHasExtraM = true; - runtime_newextram(); - runtime_needextram = 0; + if(runtime_iscgo) { + bool* cgoHasExtraM = runtime_getCgoHasExtraM(); + if(!*cgoHasExtraM) { + *cgoHasExtraM = true; + runtime_newextram(); + } } runtime_initsig(false); } if(m->mstartfn) - m->mstartfn(); + ((void (*)(void))m->mstartfn)(); if(m->helpgc) { m->helpgc = 0; stopm(); } else if(m != &runtime_m0) { - acquirep(m->nextp); - m->nextp = nil; + acquirep((P*)m->nextp); + m->nextp = 0; } schedule(); @@ -1124,15 +750,18 @@ struct CgoThreadStart void (*fn)(void); }; +M* runtime_allocm(P*, bool, byte**, uintptr*) + __asm__(GOSYM_PREFIX "runtime.allocm"); + // Allocate a new m unassociated with any thread. // Can use p for allocation context if needed. M* -runtime_allocm(P *p, int32 stacksize, byte** ret_g0_stack, size_t* ret_g0_stacksize) +runtime_allocm(P *p, bool allocatestack, byte** ret_g0_stack, uintptr* ret_g0_stacksize) { M *mp; - m->locks++; // disable GC because it can be called from sysmon - if(m->p == nil) + g->m->locks++; // disable GC because it can be called from sysmon + if(g->m->p == 0) acquirep(p); // temporarily borrow p for mallocs in this function #if 0 if(mtype == nil) { @@ -1144,355 +773,106 @@ runtime_allocm(P *p, int32 stacksize, byte** ret_g0_stack, size_t* ret_g0_stacks mp = runtime_mal(sizeof *mp); mcommoninit(mp); - mp->g0 = runtime_malg(stacksize, ret_g0_stack, ret_g0_stacksize); + mp->g0 = runtime_malg(allocatestack, false, ret_g0_stack, ret_g0_stacksize); + mp->g0->m = mp; - if(p == m->p) + if(p == (P*)g->m->p) releasep(); - m->locks--; + g->m->locks--; return mp; } -static G* -allocg(void) -{ - G *gp; - // static Type *gtype; - - // if(gtype == nil) { - // Eface e; - // runtime_gc_g_ptr(&e); - // gtype = ((PtrType*)e.__type_descriptor)->__element_type; - // } - // gp = runtime_cnew(gtype); - gp = runtime_malloc(sizeof(G)); - return gp; -} +void setGContext(void) __asm__ (GOSYM_PREFIX "runtime.setGContext"); -static M* lockextra(bool nilokay); -static void unlockextra(M*); - -// needm is called when a cgo callback happens on a -// thread without an m (a thread not created by Go). -// In this case, needm is expected to find an m to use -// and return with m, g initialized correctly. -// Since m and g are not set now (likely nil, but see below) -// needm is limited in what routines it can call. In particular -// it can only call nosplit functions (textflag 7) and cannot -// do any scheduling that requires an m. -// -// In order to avoid needing heavy lifting here, we adopt -// the following strategy: there is a stack of available m's -// that can be stolen. Using compare-and-swap -// to pop from the stack has ABA races, so we simulate -// a lock by doing an exchange (via casp) to steal the stack -// head and replace the top pointer with MLOCKED (1). -// This serves as a simple spin lock that we can use even -// without an m. The thread that locks the stack in this way -// unlocks the stack by storing a valid stack head pointer. -// -// In order to make sure that there is always an m structure -// available to be stolen, we maintain the invariant that there -// is always one more than needed. At the beginning of the -// program (if cgo is in use) the list is seeded with a single m. -// If needm finds that it has taken the last m off the list, its job -// is - once it has installed its own m so that it can do things like -// allocate memory - to create a spare m and put it on the list. -// -// Each of these extra m's also has a g0 and a curg that are -// pressed into service as the scheduling stack and current -// goroutine for the duration of the cgo callback. -// -// When the callback is done with the m, it calls dropm to -// put the m back on the list. -// -// Unlike the gc toolchain, we start running on curg, since we are -// just going to return and let the caller continue. +// setGContext sets up a new goroutine context for the current g. void -runtime_needm(void) +setGContext() { - M *mp; - - if(runtime_needextram) { - // Can happen if C/C++ code calls Go from a global ctor. - // Can not throw, because scheduler is not initialized yet. - int rv __attribute__((unused)); - rv = runtime_write(2, "fatal error: cgo callback before cgo call\n", - sizeof("fatal error: cgo callback before cgo call\n")-1); - runtime_exit(1); - } + int val; + G *gp; - // Lock extra list, take head, unlock popped list. - // nilokay=false is safe here because of the invariant above, - // that the extra list always contains or will soon contain - // at least one m. - mp = lockextra(false); - - // Set needextram when we've just emptied the list, - // so that the eventual call into cgocallbackg will - // allocate a new m for the extra list. We delay the - // allocation until then so that it can be done - // after exitsyscall makes sure it is okay to be - // running at all (that is, there's no garbage collection - // running right now). - mp->needextram = mp->schedlink == nil; - unlockextra(mp->schedlink); - - // Install m and g (= m->curg). - runtime_setmg(mp, mp->curg); - - // Initialize g's context as in mstart. initcontext(); - g->status = Gsyscall; - g->entry = nil; - g->param = nil; + gp = g; + gp->entry = nil; + gp->param = nil; #ifdef USING_SPLIT_STACK - __splitstack_getcontext(&g->stack_context[0]); + __splitstack_getcontext(&gp->stackcontext[0]); + val = 0; + __splitstack_block_signals(&val, nil); #else - g->gcinitial_sp = ∓ - g->gcstack = nil; - g->gcstack_size = 0; - g->gcnext_sp = ∓ + gp->gcinitialsp = &val; + gp->gcstack = nil; + gp->gcstacksize = 0; + gp->gcnextsp = &val; #endif - getcontext(&g->context); + getcontext(ucontext_arg(&gp->context[0])); - if(g->entry != nil) { + if(gp->entry != nil) { // Got here from mcall. - void (*pfn)(G*) = (void (*)(G*))g->entry; - G* gp = (G*)g->param; - pfn(gp); + void (*pfn)(G*) = (void (*)(G*))gp->entry; + G* gp1 = (G*)gp->param; + gp->entry = nil; + gp->param = nil; + pfn(gp1); *(int*)0x22 = 0x22; } - - // Initialize this thread to use the m. - runtime_minit(); - -#ifdef USING_SPLIT_STACK - { - int dont_block_signals = 0; - __splitstack_block_signals(&dont_block_signals, nil); - } -#endif } -// newextram allocates an m and puts it on the extra list. -// It is called with a working local m, so that it can do things -// like call schedlock and allocate. -void -runtime_newextram(void) -{ - M *mp, *mnext; - G *gp; - byte *g0_sp, *sp; - size_t g0_spsize, spsize; - - // Create extra goroutine locked to extra m. - // The goroutine is the context in which the cgo callback will run. - // The sched.pc will never be returned to, but setting it to - // runtime.goexit makes clear to the traceback routines where - // the goroutine stack ends. - mp = runtime_allocm(nil, StackMin, &g0_sp, &g0_spsize); - gp = runtime_malg(StackMin, &sp, &spsize); - gp->status = Gdead; - mp->curg = gp; - mp->locked = LockInternal; - mp->lockedg = gp; - gp->lockedm = mp; - gp->goid = runtime_xadd64(&runtime_sched.goidgen, 1); - // put on allg for garbage collector - allgadd(gp); - - // The context for gp will be set up in runtime_needm. But - // here we need to set up the context for g0. - getcontext(&mp->g0->context); - mp->g0->context.uc_stack.ss_sp = g0_sp; - mp->g0->context.uc_stack.ss_size = g0_spsize; - makecontext(&mp->g0->context, kickoff, 0); - - // Add m to the extra list. - mnext = lockextra(true); - mp->schedlink = mnext; - unlockextra(mp); -} +void makeGContext(G*, byte*, uintptr) + __asm__(GOSYM_PREFIX "runtime.makeGContext"); -// dropm is called when a cgo callback has called needm but is now -// done with the callback and returning back into the non-Go thread. -// It puts the current m back onto the extra list. -// -// The main expense here is the call to signalstack to release the -// m's signal stack, and then the call to needm on the next callback -// from this thread. It is tempting to try to save the m for next time, -// which would eliminate both these costs, but there might not be -// a next time: the current thread (which Go does not control) might exit. -// If we saved the m for that thread, there would be an m leak each time -// such a thread exited. Instead, we acquire and release an m on each -// call. These should typically not be scheduling operations, just a few -// atomics, so the cost should be small. -// -// TODO(rsc): An alternative would be to allocate a dummy pthread per-thread -// variable using pthread_key_create. Unlike the pthread keys we already use -// on OS X, this dummy key would never be read by Go code. It would exist -// only so that we could register at thread-exit-time destructor. -// That destructor would put the m back onto the extra list. -// This is purely a performance optimization. The current version, -// in which dropm happens on each cgo call, is still correct too. -// We may have to keep the current version on systems with cgo -// but without pthreads, like Windows. +// makeGContext makes a new context for a g. void -runtime_dropm(void) -{ - M *mp, *mnext; - - // Undo whatever initialization minit did during needm. - runtime_unminit(); +makeGContext(G* gp, byte* sp, uintptr spsize) { + ucontext_t *uc; - // Clear m and g, and return m to the extra list. - // After the call to setmg we can only call nosplit functions. - mp = m; - runtime_setmg(nil, nil); - - mp->curg->status = Gdead; - mp->curg->gcstack = nil; - mp->curg->gcnext_sp = nil; - - mnext = lockextra(true); - mp->schedlink = mnext; - unlockextra(mp); -} - -#define MLOCKED ((M*)1) - -// lockextra locks the extra list and returns the list head. -// The caller must unlock the list by storing a new list head -// to runtime.extram. If nilokay is true, then lockextra will -// return a nil list head if that's what it finds. If nilokay is false, -// lockextra will keep waiting until the list head is no longer nil. -static M* -lockextra(bool nilokay) -{ - M *mp; - void (*yield)(void); - - for(;;) { - mp = runtime_atomicloadp(&runtime_extram); - if(mp == MLOCKED) { - yield = runtime_osyield; - yield(); - continue; - } - if(mp == nil && !nilokay) { - runtime_usleep(1); - continue; - } - if(!runtime_casp(&runtime_extram, mp, MLOCKED)) { - yield = runtime_osyield; - yield(); - continue; - } - break; - } - return mp; -} - -static void -unlockextra(M *mp) -{ - runtime_atomicstorep(&runtime_extram, mp); -} - -static int32 -countextra() -{ - M *mp, *mc; - int32 c; - - for(;;) { - mp = runtime_atomicloadp(&runtime_extram); - if(mp == MLOCKED) { - runtime_osyield(); - continue; - } - if(!runtime_casp(&runtime_extram, mp, MLOCKED)) { - runtime_osyield(); - continue; - } - c = 0; - for(mc = mp; mc != nil; mc = mc->schedlink) - c++; - runtime_atomicstorep(&runtime_extram, mp); - return c; - } + uc = ucontext_arg(&gp->context[0]); + getcontext(uc); + uc->uc_stack.ss_sp = sp; + uc->uc_stack.ss_size = (size_t)spsize; + makecontext(uc, kickoff, 0); } // Create a new m. It will start off with a call to fn, or else the scheduler. -static void +void newm(void(*fn)(void), P *p) { M *mp; - mp = runtime_allocm(p, -1, nil, nil); - mp->nextp = p; - mp->mstartfn = fn; + mp = runtime_allocm(p, false, nil, nil); + mp->nextp = (uintptr)p; + mp->mstartfn = (uintptr)(void*)fn; runtime_newosproc(mp); } -// Stops execution of the current m until new work is available. -// Returns with acquired P. -static void -stopm(void) -{ - if(m->locks) - runtime_throw("stopm holding locks"); - if(m->p) - runtime_throw("stopm holding p"); - if(m->spinning) { - m->spinning = false; - runtime_xadd(&runtime_sched.nmspinning, -1); - } - -retry: - runtime_lock(&runtime_sched); - mput(m); - runtime_unlock(&runtime_sched); - runtime_notesleep(&m->park); - runtime_noteclear(&m->park); - if(m->helpgc) { - runtime_gchelper(); - m->helpgc = 0; - m->mcache = nil; - goto retry; - } - acquirep(m->nextp); - m->nextp = nil; -} - static void mspinning(void) { - m->spinning = true; + g->m->spinning = true; } // Schedules some M to run the p (creates an M if necessary). // If p==nil, tries to get an idle P, if no idle P's does nothing. -static void +void startm(P *p, bool spinning) { M *mp; void (*fn)(void); - runtime_lock(&runtime_sched); + runtime_lock(&runtime_sched->lock); if(p == nil) { p = pidleget(); if(p == nil) { - runtime_unlock(&runtime_sched); + runtime_unlock(&runtime_sched->lock); if(spinning) - runtime_xadd(&runtime_sched.nmspinning, -1); + runtime_xadd(&runtime_sched->nmspinning, -1); return; } } mp = mget(); - runtime_unlock(&runtime_sched); + runtime_unlock(&runtime_sched->lock); if(mp == nil) { fn = nil; if(spinning) @@ -1504,369 +884,39 @@ startm(P *p, bool spinning) runtime_throw("startm: m is spinning"); if(mp->nextp) runtime_throw("startm: m has p"); - mp->spinning = spinning; - mp->nextp = p; - runtime_notewakeup(&mp->park); -} - -// Hands off P from syscall or locked M. -static void -handoffp(P *p) -{ - // if it has local work, start it straight away - if(p->runqhead != p->runqtail || runtime_sched.runqsize) { - startm(p, false); - return; - } - // no local work, check that there are no spinning/idle M's, - // otherwise our help is not required - if(runtime_atomicload(&runtime_sched.nmspinning) + runtime_atomicload(&runtime_sched.npidle) == 0 && // TODO: fast atomic - runtime_cas(&runtime_sched.nmspinning, 0, 1)) { - startm(p, true); - return; - } - runtime_lock(&runtime_sched); - if(runtime_sched.gcwaiting) { - p->status = Pgcstop; - if(--runtime_sched.stopwait == 0) - runtime_notewakeup(&runtime_sched.stopnote); - runtime_unlock(&runtime_sched); - return; - } - if(runtime_sched.runqsize) { - runtime_unlock(&runtime_sched); - startm(p, false); - return; - } - // If this is the last running P and nobody is polling network, - // need to wakeup another M to poll network. - if(runtime_sched.npidle == (uint32)runtime_gomaxprocs-1 && runtime_atomicload64(&runtime_sched.lastpoll) != 0) { - runtime_unlock(&runtime_sched); - startm(p, false); - return; - } - pidleput(p); - runtime_unlock(&runtime_sched); -} - -// Tries to add one more P to execute G's. -// Called when a G is made runnable (newproc, ready). -static void -wakep(void) -{ - // be conservative about spinning threads - if(!runtime_cas(&runtime_sched.nmspinning, 0, 1)) - return; - startm(nil, true); -} - -// Stops execution of the current m that is locked to a g until the g is runnable again. -// Returns with acquired P. -static void -stoplockedm(void) -{ - P *p; - - if(m->lockedg == nil || m->lockedg->lockedm != m) - runtime_throw("stoplockedm: inconsistent locking"); - if(m->p) { - // Schedule another M to run this p. - p = releasep(); - handoffp(p); + if(spinning && !runqempty(p)) { + runtime_throw("startm: p has runnable gs"); } - incidlelocked(1); - // Wait until another thread schedules lockedg again. - runtime_notesleep(&m->park); - runtime_noteclear(&m->park); - if(m->lockedg->status != Grunnable) - runtime_throw("stoplockedm: not runnable"); - acquirep(m->nextp); - m->nextp = nil; -} - -// Schedules the locked m to run the locked gp. -static void -startlockedm(G *gp) -{ - M *mp; - P *p; - - mp = gp->lockedm; - if(mp == m) - runtime_throw("startlockedm: locked to me"); - if(mp->nextp) - runtime_throw("startlockedm: m has p"); - // directly handoff current P to the locked m - incidlelocked(-1); - p = releasep(); - mp->nextp = p; + mp->spinning = spinning; + mp->nextp = (uintptr)p; runtime_notewakeup(&mp->park); - stopm(); -} - -// Stops the current m for stoptheworld. -// Returns when the world is restarted. -static void -gcstopm(void) -{ - P *p; - - if(!runtime_sched.gcwaiting) - runtime_throw("gcstopm: not waiting for gc"); - if(m->spinning) { - m->spinning = false; - runtime_xadd(&runtime_sched.nmspinning, -1); - } - p = releasep(); - runtime_lock(&runtime_sched); - p->status = Pgcstop; - if(--runtime_sched.stopwait == 0) - runtime_notewakeup(&runtime_sched.stopnote); - runtime_unlock(&runtime_sched); - stopm(); -} - -// Schedules gp to run on the current M. -// Never returns. -static void -execute(G *gp) -{ - int32 hz; - - if(gp->status != Grunnable) { - runtime_printf("execute: bad g status %d\n", gp->status); - runtime_throw("execute: bad g status"); - } - gp->status = Grunning; - gp->waitsince = 0; - m->p->schedtick++; - m->curg = gp; - gp->m = m; - - // Check whether the profiler needs to be turned on or off. - hz = runtime_sched.profilehz; - if(m->profilehz != hz) - runtime_resetcpuprofiler(hz); - - runtime_gogo(gp); } -// Finds a runnable goroutine to execute. -// Tries to steal from other P's, get g from global queue, poll network. -static G* -findrunnable(void) -{ - G *gp; - P *p; - int32 i; - -top: - if(runtime_sched.gcwaiting) { - gcstopm(); - goto top; - } - if(runtime_fingwait && runtime_fingwake && (gp = runtime_wakefing()) != nil) - runtime_ready(gp); - // local runq - gp = runqget(m->p); - if(gp) - return gp; - // global runq - if(runtime_sched.runqsize) { - runtime_lock(&runtime_sched); - gp = globrunqget(m->p, 0); - runtime_unlock(&runtime_sched); - if(gp) - return gp; - } - // poll network - gp = runtime_netpoll(false); // non-blocking - if(gp) { - injectglist(gp->schedlink); - gp->status = Grunnable; - return gp; - } - // If number of spinning M's >= number of busy P's, block. - // This is necessary to prevent excessive CPU consumption - // when GOMAXPROCS>>1 but the program parallelism is low. - if(!m->spinning && 2 * runtime_atomicload(&runtime_sched.nmspinning) >= runtime_gomaxprocs - runtime_atomicload(&runtime_sched.npidle)) // TODO: fast atomic - goto stop; - if(!m->spinning) { - m->spinning = true; - runtime_xadd(&runtime_sched.nmspinning, 1); - } - // random steal from other P's - for(i = 0; i < 2*runtime_gomaxprocs; i++) { - if(runtime_sched.gcwaiting) - goto top; - p = runtime_allp[runtime_fastrand1()%runtime_gomaxprocs]; - if(p == m->p) - gp = runqget(p); - else - gp = runqsteal(m->p, p); - if(gp) - return gp; - } -stop: - // return P and block - runtime_lock(&runtime_sched); - if(runtime_sched.gcwaiting) { - runtime_unlock(&runtime_sched); - goto top; - } - if(runtime_sched.runqsize) { - gp = globrunqget(m->p, 0); - runtime_unlock(&runtime_sched); - return gp; - } - p = releasep(); - pidleput(p); - runtime_unlock(&runtime_sched); - if(m->spinning) { - m->spinning = false; - runtime_xadd(&runtime_sched.nmspinning, -1); - } - // check all runqueues once again - for(i = 0; i < runtime_gomaxprocs; i++) { - p = runtime_allp[i]; - if(p && p->runqhead != p->runqtail) { - runtime_lock(&runtime_sched); - p = pidleget(); - runtime_unlock(&runtime_sched); - if(p) { - acquirep(p); - goto top; - } - break; - } - } - // poll network - if(runtime_xchg64(&runtime_sched.lastpoll, 0) != 0) { - if(m->p) - runtime_throw("findrunnable: netpoll with p"); - if(m->spinning) - runtime_throw("findrunnable: netpoll with spinning"); - gp = runtime_netpoll(true); // block until new work is available - runtime_atomicstore64(&runtime_sched.lastpoll, runtime_nanotime()); - if(gp) { - runtime_lock(&runtime_sched); - p = pidleget(); - runtime_unlock(&runtime_sched); - if(p) { - acquirep(p); - injectglist(gp->schedlink); - gp->status = Grunnable; - return gp; - } - injectglist(gp); - } - } - stopm(); - goto top; -} - -static void -resetspinning(void) -{ - int32 nmspinning; - - if(m->spinning) { - m->spinning = false; - nmspinning = runtime_xadd(&runtime_sched.nmspinning, -1); - if(nmspinning < 0) - runtime_throw("findrunnable: negative nmspinning"); - } else - nmspinning = runtime_atomicload(&runtime_sched.nmspinning); - - // M wakeup policy is deliberately somewhat conservative (see nmspinning handling), - // so see if we need to wakeup another P here. - if (nmspinning == 0 && runtime_atomicload(&runtime_sched.npidle) > 0) - wakep(); -} - -// Injects the list of runnable G's into the scheduler. -// Can run concurrently with GC. -static void -injectglist(G *glist) +// Puts the current goroutine into a waiting state and calls unlockf. +// If unlockf returns false, the goroutine is resumed. +void +runtime_park(bool(*unlockf)(G*, void*), void *lock, const char *reason) { - int32 n; - G *gp; - - if(glist == nil) - return; - runtime_lock(&runtime_sched); - for(n = 0; glist; n++) { - gp = glist; - glist = gp->schedlink; - gp->status = Grunnable; - globrunqput(gp); - } - runtime_unlock(&runtime_sched); - - for(; n && runtime_sched.npidle; n--) - startm(nil, false); + if(g->atomicstatus != _Grunning) + runtime_throw("bad g status"); + g->m->waitlock = lock; + g->m->waitunlockf = unlockf; + g->waitreason = runtime_gostringnocopy((const byte*)reason); + runtime_mcall(park0); } -// One round of scheduler: find a runnable goroutine and execute it. -// Never returns. -static void -schedule(void) -{ - G *gp; - uint32 tick; - - if(m->locks) - runtime_throw("schedule: holding locks"); - -top: - if(runtime_sched.gcwaiting) { - gcstopm(); - goto top; - } +void gopark(FuncVal *, void *, String, byte, int) + __asm__ ("runtime.gopark"); - gp = nil; - // Check the global runnable queue once in a while to ensure fairness. - // Otherwise two goroutines can completely occupy the local runqueue - // by constantly respawning each other. - tick = m->p->schedtick; - // This is a fancy way to say tick%61==0, - // it uses 2 MUL instructions instead of a single DIV and so is faster on modern processors. - if(tick - (((uint64)tick*0x4325c53fu)>>36)*61 == 0 && runtime_sched.runqsize > 0) { - runtime_lock(&runtime_sched); - gp = globrunqget(m->p, 1); - runtime_unlock(&runtime_sched); - if(gp) - resetspinning(); - } - if(gp == nil) { - gp = runqget(m->p); - if(gp && m->spinning) - runtime_throw("schedule: spinning with local work"); - } - if(gp == nil) { - gp = findrunnable(); // blocks until work is available - resetspinning(); - } - - if(gp->lockedm) { - // Hands off own p to the locked m, - // then blocks waiting for a new p. - startlockedm(gp); - goto top; - } - - execute(gp); -} - -// Puts the current goroutine into a waiting state and calls unlockf. -// If unlockf returns false, the goroutine is resumed. void -runtime_park(bool(*unlockf)(G*, void*), void *lock, const char *reason) +gopark(FuncVal *unlockf, void *lock, String reason, + byte traceEv __attribute__ ((unused)), + int traceskip __attribute__ ((unused))) { - if(g->status != Grunning) + if(g->atomicstatus != _Grunning) runtime_throw("bad g status"); - m->waitlock = lock; - m->waitunlockf = unlockf; + g->m->waitlock = lock; + g->m->waitunlockf = unlockf == nil ? nil : (void*)unlockf->fn; g->waitreason = reason; runtime_mcall(park0); } @@ -1887,27 +937,44 @@ runtime_parkunlock(Lock *lock, const char *reason) runtime_park(parkunlock, lock, reason); } +void goparkunlock(Lock *, String, byte, int) + __asm__ (GOSYM_PREFIX "runtime.goparkunlock"); + +void +goparkunlock(Lock *lock, String reason, byte traceEv __attribute__ ((unused)), + int traceskip __attribute__ ((unused))) +{ + if(g->atomicstatus != _Grunning) + runtime_throw("bad g status"); + g->m->waitlock = lock; + g->m->waitunlockf = parkunlock; + g->waitreason = reason; + runtime_mcall(park0); +} + // runtime_park continuation on g0. static void park0(G *gp) { + M *m; bool ok; - gp->status = Gwaiting; + m = g->m; + gp->atomicstatus = _Gwaiting; gp->m = nil; m->curg = nil; if(m->waitunlockf) { - ok = m->waitunlockf(gp, m->waitlock); + ok = ((bool (*)(G*, void*))m->waitunlockf)(gp, m->waitlock); m->waitunlockf = nil; m->waitlock = nil; if(!ok) { - gp->status = Grunnable; - execute(gp); // Schedule it back, never returns. + gp->atomicstatus = _Grunnable; + execute(gp, true); // Schedule it back, never returns. } } if(m->lockedg) { stoplockedm(); - execute(gp); // Never returns. + execute(gp, true); // Never returns. } schedule(); } @@ -1916,7 +983,7 @@ park0(G *gp) void runtime_gosched(void) { - if(g->status != Grunning) + if(g->atomicstatus != _Grunning) runtime_throw("bad g status"); runtime_mcall(runtime_gosched0); } @@ -1925,15 +992,18 @@ runtime_gosched(void) void runtime_gosched0(G *gp) { - gp->status = Grunnable; + M *m; + + m = g->m; + gp->atomicstatus = _Grunnable; gp->m = nil; m->curg = nil; - runtime_lock(&runtime_sched); + runtime_lock(&runtime_sched->lock); globrunqput(gp); - runtime_unlock(&runtime_sched); + runtime_unlock(&runtime_sched->lock); if(m->lockedg) { stoplockedm(); - execute(gp); // Never returns. + execute(gp, true); // Never returns. } schedule(); } @@ -1942,38 +1012,43 @@ runtime_gosched0(G *gp) // Need to mark it as nosplit, because it runs with sp > stackbase (as runtime_lessstack). // Since it does not return it does not matter. But if it is preempted // at the split stack check, GC will complain about inconsistent sp. -void runtime_goexit(void) __attribute__ ((noinline)); +void runtime_goexit1(void) __attribute__ ((noinline)); void -runtime_goexit(void) +runtime_goexit1(void) { - if(g->status != Grunning) + if(g->atomicstatus != _Grunning) runtime_throw("bad g status"); runtime_mcall(goexit0); } -// runtime_goexit continuation on g0. +// runtime_goexit1 continuation on g0. static void goexit0(G *gp) { - gp->status = Gdead; + M *m; + + m = g->m; + gp->atomicstatus = _Gdead; gp->entry = nil; gp->m = nil; gp->lockedm = nil; gp->paniconfault = 0; - gp->defer = nil; // should be true already but just in case. - gp->panic = nil; // non-nil for Goexit during panic. points at stack-allocated data. - gp->writenbuf = 0; - gp->writebuf = nil; - gp->waitreason = nil; + gp->_defer = nil; // should be true already but just in case. + gp->_panic = nil; // non-nil for Goexit during panic. points at stack-allocated data. + gp->writebuf.__values = nil; + gp->writebuf.__count = 0; + gp->writebuf.__capacity = 0; + gp->waitreason = runtime_gostringnocopy(nil); gp->param = nil; + m->curg->m = nil; m->curg = nil; m->lockedg = nil; - if(m->locked & ~LockExternal) { + if(m->locked & ~_LockExternal) { runtime_printf("invalid m->locked = %d\n", m->locked); runtime_throw("internal lockOSThread error"); } m->locked = 0; - gfput(m->p, gp); + gfput((P*)m->p, gp); schedule(); } @@ -1986,15 +1061,16 @@ goexit0(G *gp) // make g->sched refer to the caller's stack segment, because // entersyscall is going to return immediately after. -void runtime_entersyscall(void) __attribute__ ((no_split_stack)); -static void doentersyscall(void) __attribute__ ((no_split_stack, noinline)); +void runtime_entersyscall(int32) __attribute__ ((no_split_stack)); +static void doentersyscall(uintptr, uintptr) + __attribute__ ((no_split_stack, noinline)); void -runtime_entersyscall() +runtime_entersyscall(int32 dummy __attribute__ ((unused))) { // Save the registers in the g structure so that any pointers // held in registers will be seen by the garbage collector. - getcontext(&g->gcregs); + getcontext(ucontext_arg(&g->gcregs[0])); // Do the work in a separate function, so that this function // doesn't save any registers on its own stack. If this @@ -2005,84 +1081,99 @@ runtime_entersyscall() // callee-saved registers to access the TLS variable g. We // don't want to put the ucontext_t on the stack because it is // large and we can not split the stack here. - doentersyscall(); + doentersyscall((uintptr)runtime_getcallerpc(&dummy), + (uintptr)runtime_getcallersp(&dummy)); } static void -doentersyscall() +doentersyscall(uintptr pc, uintptr sp) { - // Disable preemption because during this function g is in Gsyscall status, + // Disable preemption because during this function g is in _Gsyscall status, // but can have inconsistent g->sched, do not let GC observe it. - m->locks++; + g->m->locks++; // Leave SP around for GC and traceback. #ifdef USING_SPLIT_STACK - g->gcstack = __splitstack_find(nil, nil, &g->gcstack_size, - &g->gcnext_segment, &g->gcnext_sp, - &g->gcinitial_sp); + { + size_t gcstacksize; + g->gcstack = __splitstack_find(nil, nil, &gcstacksize, + &g->gcnextsegment, &g->gcnextsp, + &g->gcinitialsp); + g->gcstacksize = (uintptr)gcstacksize; + } #else { void *v; - g->gcnext_sp = (byte *) &v; + g->gcnextsp = (byte *) &v; } #endif - g->status = Gsyscall; + g->syscallsp = sp; + g->syscallpc = pc; - if(runtime_atomicload(&runtime_sched.sysmonwait)) { // TODO: fast atomic - runtime_lock(&runtime_sched); - if(runtime_atomicload(&runtime_sched.sysmonwait)) { - runtime_atomicstore(&runtime_sched.sysmonwait, 0); - runtime_notewakeup(&runtime_sched.sysmonnote); + g->atomicstatus = _Gsyscall; + + if(runtime_atomicload(&runtime_sched->sysmonwait)) { // TODO: fast atomic + runtime_lock(&runtime_sched->lock); + if(runtime_atomicload(&runtime_sched->sysmonwait)) { + runtime_atomicstore(&runtime_sched->sysmonwait, 0); + runtime_notewakeup(&runtime_sched->sysmonnote); } - runtime_unlock(&runtime_sched); + runtime_unlock(&runtime_sched->lock); } - m->mcache = nil; - m->p->m = nil; - runtime_atomicstore(&m->p->status, Psyscall); - if(runtime_atomicload(&runtime_sched.gcwaiting)) { - runtime_lock(&runtime_sched); - if (runtime_sched.stopwait > 0 && runtime_cas(&m->p->status, Psyscall, Pgcstop)) { - if(--runtime_sched.stopwait == 0) - runtime_notewakeup(&runtime_sched.stopnote); + g->m->mcache = nil; + ((P*)(g->m->p))->m = 0; + runtime_atomicstore(&((P*)g->m->p)->status, _Psyscall); + if(runtime_atomicload(&runtime_sched->gcwaiting)) { + runtime_lock(&runtime_sched->lock); + if (runtime_sched->stopwait > 0 && runtime_cas(&((P*)g->m->p)->status, _Psyscall, _Pgcstop)) { + if(--runtime_sched->stopwait == 0) + runtime_notewakeup(&runtime_sched->stopnote); } - runtime_unlock(&runtime_sched); + runtime_unlock(&runtime_sched->lock); } - m->locks--; + g->m->locks--; } // The same as runtime_entersyscall(), but with a hint that the syscall is blocking. void -runtime_entersyscallblock(void) +runtime_entersyscallblock(int32 dummy __attribute__ ((unused))) { P *p; - m->locks++; // see comment in entersyscall + g->m->locks++; // see comment in entersyscall // Leave SP around for GC and traceback. #ifdef USING_SPLIT_STACK - g->gcstack = __splitstack_find(nil, nil, &g->gcstack_size, - &g->gcnext_segment, &g->gcnext_sp, - &g->gcinitial_sp); + { + size_t gcstacksize; + g->gcstack = __splitstack_find(nil, nil, &gcstacksize, + &g->gcnextsegment, &g->gcnextsp, + &g->gcinitialsp); + g->gcstacksize = (uintptr)gcstacksize; + } #else - g->gcnext_sp = (byte *) &p; + g->gcnextsp = (byte *) &p; #endif // Save the registers in the g structure so that any pointers // held in registers will be seen by the garbage collector. - getcontext(&g->gcregs); + getcontext(ucontext_arg(&g->gcregs[0])); + + g->syscallpc = (uintptr)runtime_getcallerpc(&dummy); + g->syscallsp = (uintptr)runtime_getcallersp(&dummy); - g->status = Gsyscall; + g->atomicstatus = _Gsyscall; p = releasep(); handoffp(p); if(g->isbackground) // do not consider blocked scavenger for deadlock detection incidlelocked(1); - m->locks--; + g->m->locks--; } // The goroutine g exited its system call. @@ -2090,33 +1181,34 @@ runtime_entersyscallblock(void) // This is called only from the go syscall library, not // from the low-level system calls used by the runtime. void -runtime_exitsyscall(void) +runtime_exitsyscall(int32 dummy __attribute__ ((unused))) { G *gp; - m->locks++; // see comment in entersyscall - gp = g; + gp->m->locks++; // see comment in entersyscall + if(gp->isbackground) // do not consider blocked scavenger for deadlock detection incidlelocked(-1); - g->waitsince = 0; + gp->waitsince = 0; if(exitsyscallfast()) { // There's a cpu for us, so we can run. - m->p->syscalltick++; - gp->status = Grunning; + ((P*)gp->m->p)->syscalltick++; + gp->atomicstatus = _Grunning; // Garbage collector isn't running (since we are), // so okay to clear gcstack and gcsp. #ifdef USING_SPLIT_STACK gp->gcstack = nil; #endif - gp->gcnext_sp = nil; - runtime_memclr(&gp->gcregs, sizeof gp->gcregs); - m->locks--; + gp->gcnextsp = nil; + runtime_memclr(&gp->gcregs[0], sizeof gp->gcregs); + gp->syscallsp = 0; + gp->m->locks--; return; } - m->locks--; + gp->m->locks--; // Call the scheduler. runtime_mcall(exitsyscall0); @@ -2130,42 +1222,47 @@ runtime_exitsyscall(void) #ifdef USING_SPLIT_STACK gp->gcstack = nil; #endif - gp->gcnext_sp = nil; - runtime_memclr(&gp->gcregs, sizeof gp->gcregs); + gp->gcnextsp = nil; + runtime_memclr(&gp->gcregs[0], sizeof gp->gcregs); + + gp->syscallsp = 0; - // Don't refer to m again, we might be running on a different - // thread after returning from runtime_mcall. - runtime_m()->p->syscalltick++; + // Note that this gp->m might be different than the earlier + // gp->m after returning from runtime_mcall. + ((P*)gp->m->p)->syscalltick++; } static bool exitsyscallfast(void) { + G *gp; P *p; + gp = g; + // Freezetheworld sets stopwait but does not retake P's. - if(runtime_sched.stopwait) { - m->p = nil; + if(runtime_sched->stopwait) { + gp->m->p = 0; return false; } // Try to re-acquire the last P. - if(m->p && m->p->status == Psyscall && runtime_cas(&m->p->status, Psyscall, Prunning)) { + if(gp->m->p && ((P*)gp->m->p)->status == _Psyscall && runtime_cas(&((P*)gp->m->p)->status, _Psyscall, _Prunning)) { // There's a cpu for us, so we can run. - m->mcache = m->p->mcache; - m->p->m = m; + gp->m->mcache = ((P*)gp->m->p)->mcache; + ((P*)gp->m->p)->m = (uintptr)gp->m; return true; } // Try to get any other idle P. - m->p = nil; - if(runtime_sched.pidle) { - runtime_lock(&runtime_sched); + gp->m->p = 0; + if(runtime_sched->pidle) { + runtime_lock(&runtime_sched->lock); p = pidleget(); - if(p && runtime_atomicload(&runtime_sched.sysmonwait)) { - runtime_atomicstore(&runtime_sched.sysmonwait, 0); - runtime_notewakeup(&runtime_sched.sysmonnote); + if(p && runtime_atomicload(&runtime_sched->sysmonwait)) { + runtime_atomicstore(&runtime_sched->sysmonwait, 0); + runtime_notewakeup(&runtime_sched->sysmonnote); } - runtime_unlock(&runtime_sched); + runtime_unlock(&runtime_sched->lock); if(p) { acquirep(p); return true; @@ -2179,75 +1276,93 @@ exitsyscallfast(void) static void exitsyscall0(G *gp) { + M *m; P *p; - gp->status = Grunnable; + m = g->m; + gp->atomicstatus = _Grunnable; gp->m = nil; m->curg = nil; - runtime_lock(&runtime_sched); + runtime_lock(&runtime_sched->lock); p = pidleget(); if(p == nil) globrunqput(gp); - else if(runtime_atomicload(&runtime_sched.sysmonwait)) { - runtime_atomicstore(&runtime_sched.sysmonwait, 0); - runtime_notewakeup(&runtime_sched.sysmonnote); + else if(runtime_atomicload(&runtime_sched->sysmonwait)) { + runtime_atomicstore(&runtime_sched->sysmonwait, 0); + runtime_notewakeup(&runtime_sched->sysmonnote); } - runtime_unlock(&runtime_sched); + runtime_unlock(&runtime_sched->lock); if(p) { acquirep(p); - execute(gp); // Never returns. + execute(gp, false); // Never returns. } if(m->lockedg) { // Wait until another thread schedules gp and so m again. stoplockedm(); - execute(gp); // Never returns. + execute(gp, false); // Never returns. } stopm(); schedule(); // Never returns. } -// Called from syscall package before fork. -void syscall_runtime_BeforeFork(void) - __asm__(GOSYM_PREFIX "syscall.runtime_BeforeFork"); +void syscall_entersyscall(void) + __asm__(GOSYM_PREFIX "syscall.Entersyscall"); + +void syscall_entersyscall(void) __attribute__ ((no_split_stack)); + void -syscall_runtime_BeforeFork(void) +syscall_entersyscall() { - // Fork can hang if preempted with signals frequently enough (see issue 5517). - // Ensure that we stay on the same M where we disable profiling. - runtime_m()->locks++; - if(runtime_m()->profilehz != 0) - runtime_resetcpuprofiler(0); + runtime_entersyscall(0); } -// Called from syscall package after fork in parent. -void syscall_runtime_AfterFork(void) - __asm__(GOSYM_PREFIX "syscall.runtime_AfterFork"); +void syscall_exitsyscall(void) + __asm__(GOSYM_PREFIX "syscall.Exitsyscall"); + +void syscall_exitsyscall(void) __attribute__ ((no_split_stack)); + void -syscall_runtime_AfterFork(void) +syscall_exitsyscall() { - int32 hz; - - hz = runtime_sched.profilehz; - if(hz != 0) - runtime_resetcpuprofiler(hz); - runtime_m()->locks--; + runtime_exitsyscall(0); } // Allocate a new g, with a stack big enough for stacksize bytes. G* -runtime_malg(int32 stacksize, byte** ret_stack, size_t* ret_stacksize) +runtime_malg(bool allocatestack, bool signalstack, byte** ret_stack, uintptr* ret_stacksize) { + uintptr stacksize; G *newg; + byte* unused_stack; + uintptr unused_stacksize; +#if USING_SPLIT_STACK + int dont_block_signals = 0; + size_t ss_stacksize; +#endif + if (ret_stack == nil) { + ret_stack = &unused_stack; + } + if (ret_stacksize == nil) { + ret_stacksize = &unused_stacksize; + } newg = allocg(); - if(stacksize >= 0) { -#if USING_SPLIT_STACK - int dont_block_signals = 0; + if(allocatestack) { + stacksize = StackMin; + if(signalstack) { + stacksize = 32 * 1024; // OS X wants >= 8K, GNU/Linux >= 2K +#ifdef SIGSTKSZ + if(stacksize < SIGSTKSZ) + stacksize = SIGSTKSZ; +#endif + } +#if USING_SPLIT_STACK *ret_stack = __splitstack_makecontext(stacksize, - &newg->stack_context[0], - ret_stacksize); - __splitstack_block_signals_context(&newg->stack_context[0], + &newg->stackcontext[0], + &ss_stacksize); + *ret_stacksize = (uintptr)ss_stacksize; + __splitstack_block_signals_context(&newg->stackcontext[0], &dont_block_signals, nil); #else // In 64-bit mode, the maximum Go allocation space is @@ -2257,7 +1372,7 @@ runtime_malg(int32 stacksize, byte** ret_stack, size_t* ret_stacksize) // 32-bit mode, the Go allocation space is all of // memory anyhow. if(sizeof(void*) == 8) { - void *p = runtime_SysAlloc(stacksize, &mstats.other_sys); + void *p = runtime_SysAlloc(stacksize, &mstats()->other_sys); if(p == nil) runtime_throw("runtime: cannot allocate memory for goroutine stack"); *ret_stack = (byte*)p; @@ -2265,41 +1380,14 @@ runtime_malg(int32 stacksize, byte** ret_stack, size_t* ret_stacksize) *ret_stack = runtime_mallocgc(stacksize, 0, FlagNoProfiling|FlagNoGC); runtime_xadd(&runtime_stacks_sys, stacksize); } - *ret_stacksize = stacksize; - newg->gcinitial_sp = *ret_stack; - newg->gcstack_size = (size_t)stacksize; + *ret_stacksize = (uintptr)stacksize; + newg->gcinitialsp = *ret_stack; + newg->gcstacksize = (uintptr)stacksize; #endif } return newg; } -/* For runtime package testing. */ - - -// Create a new g running fn with siz bytes of arguments. -// Put it on the queue of g's waiting to run. -// The compiler turns a go statement into a call to this. -// Cannot split the stack because it assumes that the arguments -// are available sequentially after &fn; they would not be -// copied if a stack split occurred. It's OK for this to call -// functions that split the stack. -void runtime_testing_entersyscall(int32) - __asm__ (GOSYM_PREFIX "runtime.entersyscall"); -void -runtime_testing_entersyscall(int32 dummy __attribute__ ((unused))) -{ - runtime_entersyscall(); -} - -void runtime_testing_exitsyscall(int32) - __asm__ (GOSYM_PREFIX "runtime.exitsyscall"); - -void -runtime_testing_exitsyscall(int32 dummy __attribute__ ((unused))) -{ - runtime_exitsyscall(); -} - G* __go_go(void (*fn)(void*), void* arg) { @@ -2310,155 +1398,55 @@ __go_go(void (*fn)(void*), void* arg) //runtime_printf("newproc1 %p %p narg=%d nret=%d\n", fn->fn, argp, narg, nret); if(fn == nil) { - m->throwing = -1; // do not dump full stacks + g->m->throwing = -1; // do not dump full stacks runtime_throw("go of nil func value"); } - m->locks++; // disable preemption because it can be holding p in a local var + g->m->locks++; // disable preemption because it can be holding p in a local var - p = m->p; + p = (P*)g->m->p; if((newg = gfget(p)) != nil) { #ifdef USING_SPLIT_STACK int dont_block_signals = 0; - sp = __splitstack_resetcontext(&newg->stack_context[0], + sp = __splitstack_resetcontext(&newg->stackcontext[0], &spsize); - __splitstack_block_signals_context(&newg->stack_context[0], + __splitstack_block_signals_context(&newg->stackcontext[0], &dont_block_signals, nil); #else - sp = newg->gcinitial_sp; - spsize = newg->gcstack_size; + sp = newg->gcinitialsp; + spsize = newg->gcstacksize; if(spsize == 0) runtime_throw("bad spsize in __go_go"); - newg->gcnext_sp = sp; + newg->gcnextsp = sp; #endif + newg->traceback = nil; } else { - newg = runtime_malg(StackMin, &sp, &spsize); + uintptr malsize; + + newg = runtime_malg(true, false, &sp, &malsize); + spsize = (size_t)malsize; + newg->atomicstatus = _Gdead; allgadd(newg); } newg->entry = (byte*)fn; newg->param = arg; newg->gopc = (uintptr)__builtin_return_address(0); - newg->status = Grunnable; + newg->atomicstatus = _Grunnable; if(p->goidcache == p->goidcacheend) { - p->goidcache = runtime_xadd64(&runtime_sched.goidgen, GoidCacheBatch); + p->goidcache = runtime_xadd64(&runtime_sched->goidgen, GoidCacheBatch); p->goidcacheend = p->goidcache + GoidCacheBatch; } newg->goid = p->goidcache++; - { - // Avoid warnings about variables clobbered by - // longjmp. - byte * volatile vsp = sp; - size_t volatile vspsize = spsize; - G * volatile vnewg = newg; - - getcontext(&vnewg->context); - vnewg->context.uc_stack.ss_sp = vsp; -#ifdef MAKECONTEXT_STACK_TOP - vnewg->context.uc_stack.ss_sp += vspsize; -#endif - vnewg->context.uc_stack.ss_size = vspsize; - makecontext(&vnewg->context, kickoff, 0); - - runqput(p, vnewg); - - if(runtime_atomicload(&runtime_sched.npidle) != 0 && runtime_atomicload(&runtime_sched.nmspinning) == 0 && fn != runtime_main) // TODO: fast atomic - wakep(); - m->locks--; - return vnewg; - } -} - -static void -allgadd(G *gp) -{ - G **new; - uintptr cap; - - runtime_lock(&allglock); - if(runtime_allglen >= allgcap) { - cap = 4096/sizeof(new[0]); - if(cap < 2*allgcap) - cap = 2*allgcap; - new = runtime_malloc(cap*sizeof(new[0])); - if(new == nil) - runtime_throw("runtime: cannot allocate memory"); - if(runtime_allg != nil) { - runtime_memmove(new, runtime_allg, runtime_allglen*sizeof(new[0])); - runtime_free(runtime_allg); - } - runtime_allg = new; - allgcap = cap; - } - runtime_allg[runtime_allglen++] = gp; - runtime_unlock(&allglock); -} - -// Put on gfree list. -// If local list is too long, transfer a batch to the global list. -static void -gfput(P *p, G *gp) -{ - gp->schedlink = p->gfree; - p->gfree = gp; - p->gfreecnt++; - if(p->gfreecnt >= 64) { - runtime_lock(&runtime_sched.gflock); - while(p->gfreecnt >= 32) { - p->gfreecnt--; - gp = p->gfree; - p->gfree = gp->schedlink; - gp->schedlink = runtime_sched.gfree; - runtime_sched.gfree = gp; - } - runtime_unlock(&runtime_sched.gflock); - } -} + makeGContext(newg, sp, (uintptr)spsize); -// Get from gfree list. -// If local list is empty, grab a batch from global list. -static G* -gfget(P *p) -{ - G *gp; + runqput(p, newg, true); -retry: - gp = p->gfree; - if(gp == nil && runtime_sched.gfree) { - runtime_lock(&runtime_sched.gflock); - while(p->gfreecnt < 32 && runtime_sched.gfree) { - p->gfreecnt++; - gp = runtime_sched.gfree; - runtime_sched.gfree = gp->schedlink; - gp->schedlink = p->gfree; - p->gfree = gp; - } - runtime_unlock(&runtime_sched.gflock); - goto retry; - } - if(gp) { - p->gfree = gp->schedlink; - p->gfreecnt--; - } - return gp; -} - -// Purge all cached G's from gfree list to the global list. -static void -gfpurge(P *p) -{ - G *gp; - - runtime_lock(&runtime_sched.gflock); - while(p->gfreecnt) { - p->gfreecnt--; - gp = p->gfree; - p->gfree = gp->schedlink; - gp->schedlink = runtime_sched.gfree; - runtime_sched.gfree = gp; - } - runtime_unlock(&runtime_sched.gflock); + if(runtime_atomicload(&runtime_sched->npidle) != 0 && runtime_atomicload(&runtime_sched->nmspinning) == 0 && fn != runtime_main) // TODO: fast atomic + wakep(); + g->m->locks--; + return newg; } void @@ -2475,131 +1463,9 @@ runtime_Gosched(void) runtime_gosched(); } -// Implementation of runtime.GOMAXPROCS. -// delete when scheduler is even stronger -int32 -runtime_gomaxprocsfunc(int32 n) -{ - int32 ret; - - if(n > MaxGomaxprocs) - n = MaxGomaxprocs; - runtime_lock(&runtime_sched); - ret = runtime_gomaxprocs; - if(n <= 0 || n == ret) { - runtime_unlock(&runtime_sched); - return ret; - } - runtime_unlock(&runtime_sched); - - runtime_semacquire(&runtime_worldsema, false); - m->gcing = 1; - runtime_stoptheworld(); - newprocs = n; - m->gcing = 0; - runtime_semrelease(&runtime_worldsema); - runtime_starttheworld(); - - return ret; -} - -// lockOSThread is called by runtime.LockOSThread and runtime.lockOSThread below -// after they modify m->locked. Do not allow preemption during this call, -// or else the m might be different in this function than in the caller. -static void -lockOSThread(void) -{ - m->lockedg = g; - g->lockedm = m; -} - -void runtime_LockOSThread(void) __asm__ (GOSYM_PREFIX "runtime.LockOSThread"); -void -runtime_LockOSThread(void) -{ - m->locked |= LockExternal; - lockOSThread(); -} - -void -runtime_lockOSThread(void) -{ - m->locked += LockInternal; - lockOSThread(); -} - - -// unlockOSThread is called by runtime.UnlockOSThread and runtime.unlockOSThread below -// after they update m->locked. Do not allow preemption during this call, -// or else the m might be in different in this function than in the caller. -static void -unlockOSThread(void) -{ - if(m->locked != 0) - return; - m->lockedg = nil; - g->lockedm = nil; -} - -void runtime_UnlockOSThread(void) __asm__ (GOSYM_PREFIX "runtime.UnlockOSThread"); - -void -runtime_UnlockOSThread(void) -{ - m->locked &= ~LockExternal; - unlockOSThread(); -} - -void -runtime_unlockOSThread(void) -{ - if(m->locked < LockInternal) - runtime_throw("runtime: internal error: misuse of lockOSThread/unlockOSThread"); - m->locked -= LockInternal; - unlockOSThread(); -} - -bool -runtime_lockedOSThread(void) -{ - return g->lockedm != nil && m->lockedg != nil; -} - -int32 -runtime_gcount(void) -{ - G *gp; - int32 n, s; - uintptr i; - - n = 0; - runtime_lock(&allglock); - // TODO(dvyukov): runtime.NumGoroutine() is O(N). - // We do not want to increment/decrement centralized counter in newproc/goexit, - // just to make runtime.NumGoroutine() faster. - // Compromise solution is to introduce per-P counters of active goroutines. - for(i = 0; i < runtime_allglen; i++) { - gp = runtime_allg[i]; - s = gp->status; - if(s == Grunnable || s == Grunning || s == Gsyscall || s == Gwaiting) - n++; - } - runtime_unlock(&allglock); - return n; -} - -int32 -runtime_mcount(void) -{ - return runtime_sched.mcount; -} - static struct { - Lock; - void (*fn)(uintptr*, int32); + uint32 lock; int32 hz; - uintptr pcbuf[TracebackMaxFrames]; - Location locbuf[TracebackMaxFrames]; } prof; static void System(void) {} @@ -2609,11 +1475,14 @@ static void GC(void) {} void runtime_sigprof() { - M *mp = m; + M *mp = g->m; int32 n, i; bool traceback; + uintptr pcbuf[TracebackMaxFrames]; + Location locbuf[TracebackMaxFrames]; + Slice stk; - if(prof.fn == nil || prof.hz == 0) + if(prof.hz == 0) return; if(mp == nil) @@ -2627,12 +1496,6 @@ runtime_sigprof() if(mp->mcache == nil) traceback = false; - runtime_lock(&prof); - if(prof.fn == nil) { - runtime_unlock(&prof); - mp->mallocing--; - return; - } n = 0; if(runtime_atomicload(&runtime_in_callers) > 0) { @@ -2644,797 +1507,68 @@ runtime_sigprof() } if(traceback) { - n = runtime_callers(0, prof.locbuf, nelem(prof.locbuf), false); + n = runtime_callers(0, locbuf, nelem(locbuf), false); for(i = 0; i < n; i++) - prof.pcbuf[i] = prof.locbuf[i].pc; + pcbuf[i] = locbuf[i].pc; } if(!traceback || n <= 0) { n = 2; - prof.pcbuf[0] = (uintptr)runtime_getcallerpc(&n); + pcbuf[0] = (uintptr)runtime_getcallerpc(&n); if(mp->gcing || mp->helpgc) - prof.pcbuf[1] = (uintptr)GC; + pcbuf[1] = (uintptr)GC; else - prof.pcbuf[1] = (uintptr)System; + pcbuf[1] = (uintptr)System; + } + + if (prof.hz != 0) { + stk.__values = &pcbuf[0]; + stk.__count = n; + stk.__capacity = n; + + // Simple cas-lock to coordinate with setcpuprofilerate. + while (!runtime_cas(&prof.lock, 0, 1)) { + runtime_osyield(); + } + if (prof.hz != 0) { + runtime_cpuprofAdd(stk); + } + runtime_atomicstore(&prof.lock, 0); } - prof.fn(prof.pcbuf, n); - runtime_unlock(&prof); + mp->mallocing--; } // Arrange to call fn with a traceback hz times a second. void -runtime_setcpuprofilerate(void (*fn)(uintptr*, int32), int32 hz) +runtime_setcpuprofilerate_m(int32 hz) { // Force sane arguments. if(hz < 0) hz = 0; - if(hz == 0) - fn = nil; - if(fn == nil) - hz = 0; // Disable preemption, otherwise we can be rescheduled to another thread // that has profiling enabled. - m->locks++; + g->m->locks++; // Stop profiler on this thread so that it is safe to lock prof. // if a profiling signal came in while we had prof locked, // it would deadlock. runtime_resetcpuprofiler(0); - runtime_lock(&prof); - prof.fn = fn; + while (!runtime_cas(&prof.lock, 0, 1)) { + runtime_osyield(); + } prof.hz = hz; - runtime_unlock(&prof); - runtime_lock(&runtime_sched); - runtime_sched.profilehz = hz; - runtime_unlock(&runtime_sched); + runtime_atomicstore(&prof.lock, 0); + + runtime_lock(&runtime_sched->lock); + runtime_sched->profilehz = hz; + runtime_unlock(&runtime_sched->lock); if(hz != 0) runtime_resetcpuprofiler(hz); - m->locks--; -} - -// Change number of processors. The world is stopped, sched is locked. -static void -procresize(int32 new) -{ - int32 i, old; - bool empty; - G *gp; - P *p; - - old = runtime_gomaxprocs; - if(old < 0 || old > MaxGomaxprocs || new <= 0 || new >MaxGomaxprocs) - runtime_throw("procresize: invalid arg"); - // initialize new P's - for(i = 0; i < new; i++) { - p = runtime_allp[i]; - if(p == nil) { - p = (P*)runtime_mallocgc(sizeof(*p), 0, FlagNoInvokeGC); - p->id = i; - p->status = Pgcstop; - runtime_atomicstorep(&runtime_allp[i], p); - } - if(p->mcache == nil) { - if(old==0 && i==0) - p->mcache = m->mcache; // bootstrap - else - p->mcache = runtime_allocmcache(); - } - } - - // redistribute runnable G's evenly - // collect all runnable goroutines in global queue preserving FIFO order - // FIFO order is required to ensure fairness even during frequent GCs - // see http://golang.org/issue/7126 - empty = false; - while(!empty) { - empty = true; - for(i = 0; i < old; i++) { - p = runtime_allp[i]; - if(p->runqhead == p->runqtail) - continue; - empty = false; - // pop from tail of local queue - p->runqtail--; - gp = p->runq[p->runqtail%nelem(p->runq)]; - // push onto head of global queue - gp->schedlink = runtime_sched.runqhead; - runtime_sched.runqhead = gp; - if(runtime_sched.runqtail == nil) - runtime_sched.runqtail = gp; - runtime_sched.runqsize++; - } - } - // fill local queues with at most nelem(p->runq)/2 goroutines - // start at 1 because current M already executes some G and will acquire allp[0] below, - // so if we have a spare G we want to put it into allp[1]. - for(i = 1; (uint32)i < (uint32)new * nelem(p->runq)/2 && runtime_sched.runqsize > 0; i++) { - gp = runtime_sched.runqhead; - runtime_sched.runqhead = gp->schedlink; - if(runtime_sched.runqhead == nil) - runtime_sched.runqtail = nil; - runtime_sched.runqsize--; - runqput(runtime_allp[i%new], gp); - } - - // free unused P's - for(i = new; i < old; i++) { - p = runtime_allp[i]; - runtime_freemcache(p->mcache); - p->mcache = nil; - gfpurge(p); - p->status = Pdead; - // can't free P itself because it can be referenced by an M in syscall - } - - if(m->p) - m->p->m = nil; - m->p = nil; - m->mcache = nil; - p = runtime_allp[0]; - p->m = nil; - p->status = Pidle; - acquirep(p); - for(i = new-1; i > 0; i--) { - p = runtime_allp[i]; - p->status = Pidle; - pidleput(p); - } - runtime_atomicstore((uint32*)&runtime_gomaxprocs, new); -} - -// Associate p and the current m. -static void -acquirep(P *p) -{ - if(m->p || m->mcache) - runtime_throw("acquirep: already in go"); - if(p->m || p->status != Pidle) { - runtime_printf("acquirep: p->m=%p(%d) p->status=%d\n", p->m, p->m ? p->m->id : 0, p->status); - runtime_throw("acquirep: invalid p state"); - } - m->mcache = p->mcache; - m->p = p; - p->m = m; - p->status = Prunning; -} - -// Disassociate p and the current m. -static P* -releasep(void) -{ - P *p; - - if(m->p == nil || m->mcache == nil) - runtime_throw("releasep: invalid arg"); - p = m->p; - if(p->m != m || p->mcache != m->mcache || p->status != Prunning) { - runtime_printf("releasep: m=%p m->p=%p p->m=%p m->mcache=%p p->mcache=%p p->status=%d\n", - m, m->p, p->m, m->mcache, p->mcache, p->status); - runtime_throw("releasep: invalid p state"); - } - m->p = nil; - m->mcache = nil; - p->m = nil; - p->status = Pidle; - return p; -} - -static void -incidlelocked(int32 v) -{ - runtime_lock(&runtime_sched); - runtime_sched.nmidlelocked += v; - if(v > 0) - checkdead(); - runtime_unlock(&runtime_sched); -} - -// Check for deadlock situation. -// The check is based on number of running M's, if 0 -> deadlock. -static void -checkdead(void) -{ - G *gp; - int32 run, grunning, s; - uintptr i; - - // For -buildmode=c-shared or -buildmode=c-archive it's OK if - // there are no running goroutines. The calling program is - // assumed to be running. - if(runtime_isarchive) { - return; - } - - // -1 for sysmon - run = runtime_sched.mcount - runtime_sched.nmidle - runtime_sched.nmidlelocked - 1 - countextra(); - if(run > 0) - return; - // If we are dying because of a signal caught on an already idle thread, - // freezetheworld will cause all running threads to block. - // And runtime will essentially enter into deadlock state, - // except that there is a thread that will call runtime_exit soon. - if(runtime_panicking > 0) - return; - if(run < 0) { - runtime_printf("runtime: checkdead: nmidle=%d nmidlelocked=%d mcount=%d\n", - runtime_sched.nmidle, runtime_sched.nmidlelocked, runtime_sched.mcount); - runtime_throw("checkdead: inconsistent counts"); - } - grunning = 0; - runtime_lock(&allglock); - for(i = 0; i < runtime_allglen; i++) { - gp = runtime_allg[i]; - if(gp->isbackground) - continue; - s = gp->status; - if(s == Gwaiting) - grunning++; - else if(s == Grunnable || s == Grunning || s == Gsyscall) { - runtime_unlock(&allglock); - runtime_printf("runtime: checkdead: find g %D in status %d\n", gp->goid, s); - runtime_throw("checkdead: runnable g"); - } - } - runtime_unlock(&allglock); - if(grunning == 0) // possible if main goroutine calls runtime_Goexit() - runtime_throw("no goroutines (main called runtime.Goexit) - deadlock!"); - m->throwing = -1; // do not dump full stacks - runtime_throw("all goroutines are asleep - deadlock!"); -} - -static void -sysmon(void) -{ - uint32 idle, delay; - int64 now, lastpoll, lasttrace; - G *gp; - - lasttrace = 0; - idle = 0; // how many cycles in succession we had not wokeup somebody - delay = 0; - for(;;) { - if(idle == 0) // start with 20us sleep... - delay = 20; - else if(idle > 50) // start doubling the sleep after 1ms... - delay *= 2; - if(delay > 10*1000) // up to 10ms - delay = 10*1000; - runtime_usleep(delay); - if(runtime_debug.schedtrace <= 0 && - (runtime_sched.gcwaiting || runtime_atomicload(&runtime_sched.npidle) == (uint32)runtime_gomaxprocs)) { // TODO: fast atomic - runtime_lock(&runtime_sched); - if(runtime_atomicload(&runtime_sched.gcwaiting) || runtime_atomicload(&runtime_sched.npidle) == (uint32)runtime_gomaxprocs) { - runtime_atomicstore(&runtime_sched.sysmonwait, 1); - runtime_unlock(&runtime_sched); - runtime_notesleep(&runtime_sched.sysmonnote); - runtime_noteclear(&runtime_sched.sysmonnote); - idle = 0; - delay = 20; - } else - runtime_unlock(&runtime_sched); - } - // poll network if not polled for more than 10ms - lastpoll = runtime_atomicload64(&runtime_sched.lastpoll); - now = runtime_nanotime(); - if(lastpoll != 0 && lastpoll + 10*1000*1000 < now) { - runtime_cas64(&runtime_sched.lastpoll, lastpoll, now); - gp = runtime_netpoll(false); // non-blocking - if(gp) { - // Need to decrement number of idle locked M's - // (pretending that one more is running) before injectglist. - // Otherwise it can lead to the following situation: - // injectglist grabs all P's but before it starts M's to run the P's, - // another M returns from syscall, finishes running its G, - // observes that there is no work to do and no other running M's - // and reports deadlock. - incidlelocked(-1); - injectglist(gp); - incidlelocked(1); - } - } - // retake P's blocked in syscalls - // and preempt long running G's - if(retake(now)) - idle = 0; - else - idle++; - - if(runtime_debug.schedtrace > 0 && lasttrace + runtime_debug.schedtrace*1000000ll <= now) { - lasttrace = now; - runtime_schedtrace(runtime_debug.scheddetail); - } - } -} - -typedef struct Pdesc Pdesc; -struct Pdesc -{ - uint32 schedtick; - int64 schedwhen; - uint32 syscalltick; - int64 syscallwhen; -}; -static Pdesc pdesc[MaxGomaxprocs]; - -static uint32 -retake(int64 now) -{ - uint32 i, s, n; - int64 t; - P *p; - Pdesc *pd; - - n = 0; - for(i = 0; i < (uint32)runtime_gomaxprocs; i++) { - p = runtime_allp[i]; - if(p==nil) - continue; - pd = &pdesc[i]; - s = p->status; - if(s == Psyscall) { - // Retake P from syscall if it's there for more than 1 sysmon tick (at least 20us). - t = p->syscalltick; - if(pd->syscalltick != t) { - pd->syscalltick = t; - pd->syscallwhen = now; - continue; - } - // On the one hand we don't want to retake Ps if there is no other work to do, - // but on the other hand we want to retake them eventually - // because they can prevent the sysmon thread from deep sleep. - if(p->runqhead == p->runqtail && - runtime_atomicload(&runtime_sched.nmspinning) + runtime_atomicload(&runtime_sched.npidle) > 0 && - pd->syscallwhen + 10*1000*1000 > now) - continue; - // Need to decrement number of idle locked M's - // (pretending that one more is running) before the CAS. - // Otherwise the M from which we retake can exit the syscall, - // increment nmidle and report deadlock. - incidlelocked(-1); - if(runtime_cas(&p->status, s, Pidle)) { - n++; - handoffp(p); - } - incidlelocked(1); - } else if(s == Prunning) { - // Preempt G if it's running for more than 10ms. - t = p->schedtick; - if(pd->schedtick != t) { - pd->schedtick = t; - pd->schedwhen = now; - continue; - } - if(pd->schedwhen + 10*1000*1000 > now) - continue; - // preemptone(p); - } - } - return n; -} - -// Tell all goroutines that they have been preempted and they should stop. -// This function is purely best-effort. It can fail to inform a goroutine if a -// processor just started running it. -// No locks need to be held. -// Returns true if preemption request was issued to at least one goroutine. -static bool -preemptall(void) -{ - return false; -} - -void -runtime_schedtrace(bool detailed) -{ - static int64 starttime; - int64 now; - int64 id1, id2, id3; - int32 i, t, h; - uintptr gi; - const char *fmt; - M *mp, *lockedm; - G *gp, *lockedg; - P *p; - - now = runtime_nanotime(); - if(starttime == 0) - starttime = now; - - runtime_lock(&runtime_sched); - runtime_printf("SCHED %Dms: gomaxprocs=%d idleprocs=%d threads=%d idlethreads=%d runqueue=%d", - (now-starttime)/1000000, runtime_gomaxprocs, runtime_sched.npidle, runtime_sched.mcount, - runtime_sched.nmidle, runtime_sched.runqsize); - if(detailed) { - runtime_printf(" gcwaiting=%d nmidlelocked=%d nmspinning=%d stopwait=%d sysmonwait=%d\n", - runtime_sched.gcwaiting, runtime_sched.nmidlelocked, runtime_sched.nmspinning, - runtime_sched.stopwait, runtime_sched.sysmonwait); - } - // We must be careful while reading data from P's, M's and G's. - // Even if we hold schedlock, most data can be changed concurrently. - // E.g. (p->m ? p->m->id : -1) can crash if p->m changes from non-nil to nil. - for(i = 0; i < runtime_gomaxprocs; i++) { - p = runtime_allp[i]; - if(p == nil) - continue; - mp = p->m; - h = runtime_atomicload(&p->runqhead); - t = runtime_atomicload(&p->runqtail); - if(detailed) - runtime_printf(" P%d: status=%d schedtick=%d syscalltick=%d m=%d runqsize=%d gfreecnt=%d\n", - i, p->status, p->schedtick, p->syscalltick, mp ? mp->id : -1, t-h, p->gfreecnt); - else { - // In non-detailed mode format lengths of per-P run queues as: - // [len1 len2 len3 len4] - fmt = " %d"; - if(runtime_gomaxprocs == 1) - fmt = " [%d]\n"; - else if(i == 0) - fmt = " [%d"; - else if(i == runtime_gomaxprocs-1) - fmt = " %d]\n"; - runtime_printf(fmt, t-h); - } - } - if(!detailed) { - runtime_unlock(&runtime_sched); - return; - } - for(mp = runtime_allm; mp; mp = mp->alllink) { - p = mp->p; - gp = mp->curg; - lockedg = mp->lockedg; - id1 = -1; - if(p) - id1 = p->id; - id2 = -1; - if(gp) - id2 = gp->goid; - id3 = -1; - if(lockedg) - id3 = lockedg->goid; - runtime_printf(" M%d: p=%D curg=%D mallocing=%d throwing=%d gcing=%d" - " locks=%d dying=%d helpgc=%d spinning=%d blocked=%d lockedg=%D\n", - mp->id, id1, id2, - mp->mallocing, mp->throwing, mp->gcing, mp->locks, mp->dying, mp->helpgc, - mp->spinning, m->blocked, id3); - } - runtime_lock(&allglock); - for(gi = 0; gi < runtime_allglen; gi++) { - gp = runtime_allg[gi]; - mp = gp->m; - lockedm = gp->lockedm; - runtime_printf(" G%D: status=%d(%s) m=%d lockedm=%d\n", - gp->goid, gp->status, gp->waitreason, mp ? mp->id : -1, - lockedm ? lockedm->id : -1); - } - runtime_unlock(&allglock); - runtime_unlock(&runtime_sched); -} - -// Put mp on midle list. -// Sched must be locked. -static void -mput(M *mp) -{ - mp->schedlink = runtime_sched.midle; - runtime_sched.midle = mp; - runtime_sched.nmidle++; - checkdead(); -} - -// Try to get an m from midle list. -// Sched must be locked. -static M* -mget(void) -{ - M *mp; - - if((mp = runtime_sched.midle) != nil){ - runtime_sched.midle = mp->schedlink; - runtime_sched.nmidle--; - } - return mp; -} - -// Put gp on the global runnable queue. -// Sched must be locked. -static void -globrunqput(G *gp) -{ - gp->schedlink = nil; - if(runtime_sched.runqtail) - runtime_sched.runqtail->schedlink = gp; - else - runtime_sched.runqhead = gp; - runtime_sched.runqtail = gp; - runtime_sched.runqsize++; -} - -// Put a batch of runnable goroutines on the global runnable queue. -// Sched must be locked. -static void -globrunqputbatch(G *ghead, G *gtail, int32 n) -{ - gtail->schedlink = nil; - if(runtime_sched.runqtail) - runtime_sched.runqtail->schedlink = ghead; - else - runtime_sched.runqhead = ghead; - runtime_sched.runqtail = gtail; - runtime_sched.runqsize += n; -} - -// Try get a batch of G's from the global runnable queue. -// Sched must be locked. -static G* -globrunqget(P *p, int32 max) -{ - G *gp, *gp1; - int32 n; - - if(runtime_sched.runqsize == 0) - return nil; - n = runtime_sched.runqsize/runtime_gomaxprocs+1; - if(n > runtime_sched.runqsize) - n = runtime_sched.runqsize; - if(max > 0 && n > max) - n = max; - if((uint32)n > nelem(p->runq)/2) - n = nelem(p->runq)/2; - runtime_sched.runqsize -= n; - if(runtime_sched.runqsize == 0) - runtime_sched.runqtail = nil; - gp = runtime_sched.runqhead; - runtime_sched.runqhead = gp->schedlink; - n--; - while(n--) { - gp1 = runtime_sched.runqhead; - runtime_sched.runqhead = gp1->schedlink; - runqput(p, gp1); - } - return gp; -} - -// Put p to on pidle list. -// Sched must be locked. -static void -pidleput(P *p) -{ - p->link = runtime_sched.pidle; - runtime_sched.pidle = p; - runtime_xadd(&runtime_sched.npidle, 1); // TODO: fast atomic -} - -// Try get a p from pidle list. -// Sched must be locked. -static P* -pidleget(void) -{ - P *p; - - p = runtime_sched.pidle; - if(p) { - runtime_sched.pidle = p->link; - runtime_xadd(&runtime_sched.npidle, -1); // TODO: fast atomic - } - return p; -} - -// Try to put g on local runnable queue. -// If it's full, put onto global queue. -// Executed only by the owner P. -static void -runqput(P *p, G *gp) -{ - uint32 h, t; - -retry: - h = runtime_atomicload(&p->runqhead); // load-acquire, synchronize with consumers - t = p->runqtail; - if(t - h < nelem(p->runq)) { - p->runq[t%nelem(p->runq)] = gp; - runtime_atomicstore(&p->runqtail, t+1); // store-release, makes the item available for consumption - return; - } - if(runqputslow(p, gp, h, t)) - return; - // the queue is not full, now the put above must suceed - goto retry; -} - -// Put g and a batch of work from local runnable queue on global queue. -// Executed only by the owner P. -static bool -runqputslow(P *p, G *gp, uint32 h, uint32 t) -{ - G *batch[nelem(p->runq)/2+1]; - uint32 n, i; - - // First, grab a batch from local queue. - n = t-h; - n = n/2; - if(n != nelem(p->runq)/2) - runtime_throw("runqputslow: queue is not full"); - for(i=0; i<n; i++) - batch[i] = p->runq[(h+i)%nelem(p->runq)]; - if(!runtime_cas(&p->runqhead, h, h+n)) // cas-release, commits consume - return false; - batch[n] = gp; - // Link the goroutines. - for(i=0; i<n; i++) - batch[i]->schedlink = batch[i+1]; - // Now put the batch on global queue. - runtime_lock(&runtime_sched); - globrunqputbatch(batch[0], batch[n], n+1); - runtime_unlock(&runtime_sched); - return true; -} - -// Get g from local runnable queue. -// Executed only by the owner P. -static G* -runqget(P *p) -{ - G *gp; - uint32 t, h; - - for(;;) { - h = runtime_atomicload(&p->runqhead); // load-acquire, synchronize with other consumers - t = p->runqtail; - if(t == h) - return nil; - gp = p->runq[h%nelem(p->runq)]; - if(runtime_cas(&p->runqhead, h, h+1)) // cas-release, commits consume - return gp; - } -} - -// Grabs a batch of goroutines from local runnable queue. -// batch array must be of size nelem(p->runq)/2. Returns number of grabbed goroutines. -// Can be executed by any P. -static uint32 -runqgrab(P *p, G **batch) -{ - uint32 t, h, n, i; - - for(;;) { - h = runtime_atomicload(&p->runqhead); // load-acquire, synchronize with other consumers - t = runtime_atomicload(&p->runqtail); // load-acquire, synchronize with the producer - n = t-h; - n = n - n/2; - if(n == 0) - break; - if(n > nelem(p->runq)/2) // read inconsistent h and t - continue; - for(i=0; i<n; i++) - batch[i] = p->runq[(h+i)%nelem(p->runq)]; - if(runtime_cas(&p->runqhead, h, h+n)) // cas-release, commits consume - break; - } - return n; -} - -// Steal half of elements from local runnable queue of p2 -// and put onto local runnable queue of p. -// Returns one of the stolen elements (or nil if failed). -static G* -runqsteal(P *p, P *p2) -{ - G *gp; - G *batch[nelem(p->runq)/2]; - uint32 t, h, n, i; - - n = runqgrab(p2, batch); - if(n == 0) - return nil; - n--; - gp = batch[n]; - if(n == 0) - return gp; - h = runtime_atomicload(&p->runqhead); // load-acquire, synchronize with consumers - t = p->runqtail; - if(t - h + n >= nelem(p->runq)) - runtime_throw("runqsteal: runq overflow"); - for(i=0; i<n; i++, t++) - p->runq[t%nelem(p->runq)] = batch[i]; - runtime_atomicstore(&p->runqtail, t); // store-release, makes the item available for consumption - return gp; -} - -void runtime_testSchedLocalQueue(void) - __asm__("runtime.testSchedLocalQueue"); - -void -runtime_testSchedLocalQueue(void) -{ - P p; - G gs[nelem(p.runq)]; - int32 i, j; - - runtime_memclr((byte*)&p, sizeof(p)); - - for(i = 0; i < (int32)nelem(gs); i++) { - if(runqget(&p) != nil) - runtime_throw("runq is not empty initially"); - for(j = 0; j < i; j++) - runqput(&p, &gs[i]); - for(j = 0; j < i; j++) { - if(runqget(&p) != &gs[i]) { - runtime_printf("bad element at iter %d/%d\n", i, j); - runtime_throw("bad element"); - } - } - if(runqget(&p) != nil) - runtime_throw("runq is not empty afterwards"); - } -} - -void runtime_testSchedLocalQueueSteal(void) - __asm__("runtime.testSchedLocalQueueSteal"); - -void -runtime_testSchedLocalQueueSteal(void) -{ - P p1, p2; - G gs[nelem(p1.runq)], *gp; - int32 i, j, s; - - runtime_memclr((byte*)&p1, sizeof(p1)); - runtime_memclr((byte*)&p2, sizeof(p2)); - - for(i = 0; i < (int32)nelem(gs); i++) { - for(j = 0; j < i; j++) { - gs[j].sig = 0; - runqput(&p1, &gs[j]); - } - gp = runqsteal(&p2, &p1); - s = 0; - if(gp) { - s++; - gp->sig++; - } - while((gp = runqget(&p2)) != nil) { - s++; - gp->sig++; - } - while((gp = runqget(&p1)) != nil) - gp->sig++; - for(j = 0; j < i; j++) { - if(gs[j].sig != 1) { - runtime_printf("bad element %d(%d) at iter %d\n", j, gs[j].sig, i); - runtime_throw("bad element"); - } - } - if(s != i/2 && s != i/2+1) { - runtime_printf("bad steal %d, want %d or %d, iter %d\n", - s, i/2, i/2+1, i); - runtime_throw("bad steal"); - } - } -} - -int32 -runtime_setmaxthreads(int32 in) -{ - int32 out; - - runtime_lock(&runtime_sched); - out = runtime_sched.maxmcount; - runtime_sched.maxmcount = in; - checkmcount(); - runtime_unlock(&runtime_sched); - return out; -} - -void -runtime_proc_scan(struct Workbuf** wbufp, void (*enqueue1)(struct Workbuf**, Obj)) -{ - enqueue1(wbufp, (Obj){(byte*)&runtime_sched, sizeof runtime_sched, 0}); - enqueue1(wbufp, (Obj){(byte*)&runtime_main_init_done, sizeof runtime_main_init_done, 0}); + g->m->locks--; } // Return whether we are waiting for a GC. This gc toolchain uses @@ -3442,7 +1576,7 @@ runtime_proc_scan(struct Workbuf** wbufp, void (*enqueue1)(struct Workbuf**, Obj bool runtime_gcwaiting(void) { - return runtime_sched.gcwaiting; + return runtime_sched->gcwaiting; } // os_beforeExit is called from os.Exit(0). @@ -3455,43 +1589,10 @@ os_beforeExit() { } -// Active spinning for sync.Mutex. -//go:linkname sync_runtime_canSpin sync.runtime_canSpin - -enum -{ - ACTIVE_SPIN = 4, - ACTIVE_SPIN_CNT = 30, -}; - -extern _Bool sync_runtime_canSpin(intgo i) - __asm__ (GOSYM_PREFIX "sync.runtime_canSpin"); +intgo NumCPU(void) __asm__ (GOSYM_PREFIX "runtime.NumCPU"); -_Bool -sync_runtime_canSpin(intgo i) -{ - P *p; - - // sync.Mutex is cooperative, so we are conservative with spinning. - // Spin only few times and only if running on a multicore machine and - // GOMAXPROCS>1 and there is at least one other running P and local runq is empty. - // As opposed to runtime mutex we don't do passive spinning here, - // because there can be work on global runq on on other Ps. - if (i >= ACTIVE_SPIN || runtime_ncpu <= 1 || runtime_gomaxprocs <= (int32)(runtime_sched.npidle+runtime_sched.nmspinning)+1) { - return false; - } - p = m->p; - return p != nil && p->runqhead == p->runqtail; -} - -//go:linkname sync_runtime_doSpin sync.runtime_doSpin -//go:nosplit - -extern void sync_runtime_doSpin(void) - __asm__ (GOSYM_PREFIX "sync.runtime_doSpin"); - -void -sync_runtime_doSpin() +intgo +NumCPU() { - runtime_procyield(ACTIVE_SPIN_CNT); + return (intgo)(runtime_ncpu); } |