diff options
Diffstat (limited to 'libgo/runtime/proc.c')
-rw-r--r-- | libgo/runtime/proc.c | 1352 |
1 files changed, 1318 insertions, 34 deletions
diff --git a/libgo/runtime/proc.c b/libgo/runtime/proc.c index e9b7c9083ca..b243de2424e 100644 --- a/libgo/runtime/proc.c +++ b/libgo/runtime/proc.c @@ -2,21 +2,1323 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +#include <limits.h> +#include <stdlib.h> +#include <pthread.h> +#include <unistd.h> + +#include "config.h" #include "runtime.h" #include "arch.h" -#include "malloc.h" /* so that acid generated from proc.c includes malloc data structures */ +#include "defs.h" +#include "malloc.h" +#include "go-defer.h" + +#ifdef USING_SPLIT_STACK + +/* FIXME: These are not declared anywhere. */ + +extern void __splitstack_getcontext(void *context[10]); + +extern void __splitstack_setcontext(void *context[10]); + +extern void *__splitstack_makecontext(size_t, void *context[10], size_t *); + +extern void * __splitstack_resetcontext(void *context[10], size_t *); + +extern void *__splitstack_find(void *, void *, size_t *, void **, void **, + void **); + +#endif + +#if defined(USING_SPLIT_STACK) && defined(LINKER_SUPPORTS_SPLIT_STACK) +# ifdef PTHREAD_STACK_MIN +# define StackMin PTHREAD_STACK_MIN +# else +# define StackMin 8192 +# endif +#else +# define StackMin 2 * 1024 * 1024 +#endif + +static void schedule(G*); +static M *startm(void); typedef struct Sched Sched; -G runtime_g0; M runtime_m0; +G runtime_g0; // idle goroutine for m0 #ifdef __rtems__ #define __thread #endif -__thread G *g; -__thread M *m; +static __thread G *g; +static __thread M *m; + +// We can not always refer to the TLS variables directly. The +// compiler will call tls_get_addr to get the address of the variable, +// and it may hold it in a register across a call to schedule. When +// we get back from the call we may be running in a different thread, +// in which case the register now points to the TLS variable for a +// different thread. We use non-inlinable functions to avoid this +// when necessary. + +G* runtime_g(void) __attribute__ ((noinline, no_split_stack)); + +G* +runtime_g(void) +{ + return g; +} + +M* runtime_m(void) __attribute__ ((noinline, no_split_stack)); + +M* +runtime_m(void) +{ + return m; +} + +int32 runtime_gcwaiting; + +// Go scheduler +// +// The go scheduler's job is to match ready-to-run goroutines (`g's) +// with waiting-for-work schedulers (`m's). If there are ready g's +// and no waiting m's, ready() will start a new m running in a new +// OS thread, so that all ready g's can run simultaneously, up to a limit. +// For now, m's never go away. +// +// By default, Go keeps only one kernel thread (m) running user code +// at a single time; other threads may be blocked in the operating system. +// Setting the environment variable $GOMAXPROCS or calling +// runtime.GOMAXPROCS() will change the number of user threads +// allowed to execute simultaneously. $GOMAXPROCS is thus an +// approximation of the maximum number of cores to use. +// +// Even a program that can run without deadlock in a single process +// might use more m's if given the chance. For example, the prime +// sieve will use as many m's as there are primes (up to runtime_sched.mmax), +// allowing different stages of the pipeline to execute in parallel. +// We could revisit this choice, only kicking off new m's for blocking +// system calls, but that would limit the amount of parallel computation +// that go would try to do. +// +// In general, one could imagine all sorts of refinements to the +// scheduler, but the goal now is just to get something working on +// Linux and OS X. + +struct Sched { + Lock; + + G *gfree; // available g's (status == Gdead) + int32 goidgen; + + G *ghead; // g's waiting to run + G *gtail; + int32 gwait; // number of g's waiting to run + int32 gcount; // number of g's that are alive + int32 grunning; // number of g's running on cpu or in syscall + + M *mhead; // m's waiting for work + int32 mwait; // number of m's waiting for work + int32 mcount; // number of m's that have been created + + volatile uint32 atomic; // atomic scheduling word (see below) + + int32 profilehz; // cpu profiling rate + + Note stopped; // one g can set waitstop and wait here for m's to stop +}; + +// The atomic word in sched is an atomic uint32 that +// holds these fields. +// +// [15 bits] mcpu number of m's executing on cpu +// [15 bits] mcpumax max number of m's allowed on cpu +// [1 bit] waitstop some g is waiting on stopped +// [1 bit] gwaiting gwait != 0 +// +// These fields are the information needed by entersyscall +// and exitsyscall to decide whether to coordinate with the +// scheduler. Packing them into a single machine word lets +// them use a fast path with a single atomic read/write and +// no lock/unlock. This greatly reduces contention in +// syscall- or cgo-heavy multithreaded programs. +// +// Except for entersyscall and exitsyscall, the manipulations +// to these fields only happen while holding the schedlock, +// so the routines holding schedlock only need to worry about +// what entersyscall and exitsyscall do, not the other routines +// (which also use the schedlock). +// +// In particular, entersyscall and exitsyscall only read mcpumax, +// waitstop, and gwaiting. They never write them. Thus, writes to those +// fields can be done (holding schedlock) without fear of write conflicts. +// There may still be logic conflicts: for example, the set of waitstop must +// be conditioned on mcpu >= mcpumax or else the wait may be a +// spurious sleep. The Promela model in proc.p verifies these accesses. +enum { + mcpuWidth = 15, + mcpuMask = (1<<mcpuWidth) - 1, + mcpuShift = 0, + mcpumaxShift = mcpuShift + mcpuWidth, + waitstopShift = mcpumaxShift + mcpuWidth, + gwaitingShift = waitstopShift+1, + + // The max value of GOMAXPROCS is constrained + // by the max value we can store in the bit fields + // of the atomic word. Reserve a few high values + // so that we can detect accidental decrement + // beyond zero. + maxgomaxprocs = mcpuMask - 10, +}; + +#define atomic_mcpu(v) (((v)>>mcpuShift)&mcpuMask) +#define atomic_mcpumax(v) (((v)>>mcpumaxShift)&mcpuMask) +#define atomic_waitstop(v) (((v)>>waitstopShift)&1) +#define atomic_gwaiting(v) (((v)>>gwaitingShift)&1) + +Sched runtime_sched; +int32 runtime_gomaxprocs; +bool runtime_singleproc; + +static bool canaddmcpu(void); + +// An m that is waiting for notewakeup(&m->havenextg). This may +// only be accessed while the scheduler lock is held. This is used to +// minimize the number of times we call notewakeup while the scheduler +// lock is held, since the m will normally move quickly to lock the +// scheduler itself, producing lock contention. +static M* mwakeup; + +// Scheduling helpers. Sched must be locked. +static void gput(G*); // put/get on ghead/gtail +static G* gget(void); +static void mput(M*); // put/get on mhead +static M* mget(G*); +static void gfput(G*); // put/get on gfree +static G* gfget(void); +static void matchmg(void); // match m's to g's +static void readylocked(G*); // ready, but sched is locked +static void mnextg(M*, G*); +static void mcommoninit(M*); + +void +setmcpumax(uint32 n) +{ + uint32 v, w; + + for(;;) { + v = runtime_sched.atomic; + w = v; + w &= ~(mcpuMask<<mcpumaxShift); + w |= n<<mcpumaxShift; + if(runtime_cas(&runtime_sched.atomic, v, w)) + break; + } +} + +// First function run by a new goroutine. This replaces gogocall. +static void +kickoff(void) +{ + void (*fn)(void*); + + fn = (void (*)(void*))(g->entry); + fn(g->param); + runtime_goexit(); +} + +// Switch context to a different goroutine. This is like longjmp. +static void runtime_gogo(G*) __attribute__ ((noinline)); +static void +runtime_gogo(G* newg) +{ +#ifdef USING_SPLIT_STACK + __splitstack_setcontext(&newg->stack_context[0]); +#endif + g = newg; + newg->fromgogo = true; + setcontext(&newg->context); +} + +// Save context and call fn passing g as a parameter. This is like +// setjmp. Because getcontext always returns 0, unlike setjmp, we use +// g->fromgogo as a code. It will be true if we got here via +// setcontext. g == nil the first time this is called in a new m. +static void runtime_mcall(void (*)(G*)) __attribute__ ((noinline)); +static void +runtime_mcall(void (*pfn)(G*)) +{ +#ifndef USING_SPLIT_STACK + int i; +#endif + + // Ensure that all registers are on the stack for the garbage + // collector. + __builtin_unwind_init(); + + if(g == m->g0) + runtime_throw("runtime: mcall called on m->g0 stack"); + + if(g != nil) { + +#ifdef USING_SPLIT_STACK + __splitstack_getcontext(&g->stack_context[0]); +#else + g->gcnext_sp = &i; +#endif + g->fromgogo = false; + getcontext(&g->context); + } + if (g == nil || !g->fromgogo) { +#ifdef USING_SPLIT_STACK + __splitstack_setcontext(&m->g0->stack_context[0]); +#endif + m->g0->entry = (byte*)pfn; + m->g0->param = g; + g = m->g0; + setcontext(&m->g0->context); + runtime_throw("runtime: mcall function returned"); + } +} + +// The bootstrap sequence is: +// +// call osinit +// call schedinit +// make & queue new G +// call runtime_mstart +// +// The new G does: +// +// call main_init_function +// call initdone +// call main_main +void +runtime_schedinit(void) +{ + int32 n; + const byte *p; + + m = &runtime_m0; + g = &runtime_g0; + m->g0 = g; + m->curg = g; + g->m = m; + + m->nomemprof++; + runtime_mallocinit(); + mcommoninit(m); + + runtime_goargs(); + runtime_goenvs(); + + // For debugging: + // Allocate internal symbol table representation now, + // so that we don't need to call malloc when we crash. + // runtime_findfunc(0); + + runtime_gomaxprocs = 1; + p = runtime_getenv("GOMAXPROCS"); + if(p != nil && (n = runtime_atoi(p)) != 0) { + if(n > maxgomaxprocs) + n = maxgomaxprocs; + runtime_gomaxprocs = n; + } + setmcpumax(runtime_gomaxprocs); + runtime_singleproc = runtime_gomaxprocs == 1; + + canaddmcpu(); // mcpu++ to account for bootstrap m + m->helpgc = 1; // flag to tell schedule() to mcpu-- + runtime_sched.grunning++; + + // Can not enable GC until all roots are registered. + // mstats.enablegc = 1; + m->nomemprof--; +} + +// Lock the scheduler. +static void +schedlock(void) +{ + runtime_lock(&runtime_sched); +} + +// Unlock the scheduler. +static void +schedunlock(void) +{ + M *m; + + m = mwakeup; + mwakeup = nil; + runtime_unlock(&runtime_sched); + if(m != nil) + runtime_notewakeup(&m->havenextg); +} + +void +runtime_goexit(void) +{ + g->status = Gmoribund; + runtime_gosched(); +} + +void +runtime_goroutineheader(G *g) +{ + const char *status; + + switch(g->status) { + case Gidle: + status = "idle"; + break; + case Grunnable: + status = "runnable"; + break; + case Grunning: + status = "running"; + break; + case Gsyscall: + status = "syscall"; + break; + case Gwaiting: + if(g->waitreason) + status = g->waitreason; + else + status = "waiting"; + break; + case Gmoribund: + status = "moribund"; + break; + default: + status = "???"; + break; + } + runtime_printf("goroutine %d [%s]:\n", g->goid, status); +} + +void +runtime_tracebackothers(G *me) +{ + G *g; + + for(g = runtime_allg; g != nil; g = g->alllink) { + if(g == me || g->status == Gdead) + continue; + runtime_printf("\n"); + runtime_goroutineheader(g); + // runtime_traceback(g->sched.pc, g->sched.sp, 0, g); + } +} + +// Mark this g as m's idle goroutine. +// This functionality might be used in environments where programs +// are limited to a single thread, to simulate a select-driven +// network server. It is not exposed via the standard runtime API. +void +runtime_idlegoroutine(void) +{ + if(g->idlem != nil) + runtime_throw("g is already an idle goroutine"); + g->idlem = m; +} + +static void +mcommoninit(M *m) +{ + // Add to runtime_allm so garbage collector doesn't free m + // when it is just in a register or thread-local storage. + m->alllink = runtime_allm; + // runtime_Cgocalls() iterates over allm w/o schedlock, + // so we need to publish it safely. + runtime_atomicstorep((void**)&runtime_allm, m); + + m->id = runtime_sched.mcount++; + m->fastrand = 0x49f6428aUL + m->id; + + if(m->mcache == nil) + m->mcache = runtime_allocmcache(); +} + +// Try to increment mcpu. Report whether succeeded. +static bool +canaddmcpu(void) +{ + uint32 v; + + for(;;) { + v = runtime_sched.atomic; + if(atomic_mcpu(v) >= atomic_mcpumax(v)) + return 0; + if(runtime_cas(&runtime_sched.atomic, v, v+(1<<mcpuShift))) + return 1; + } +} + +// Put on `g' queue. Sched must be locked. +static void +gput(G *g) +{ + M *m; + + // If g is wired, hand it off directly. + if((m = g->lockedm) != nil && canaddmcpu()) { + mnextg(m, g); + return; + } + + // If g is the idle goroutine for an m, hand it off. + if(g->idlem != nil) { + if(g->idlem->idleg != nil) { + runtime_printf("m%d idle out of sync: g%d g%d\n", + g->idlem->id, + g->idlem->idleg->goid, g->goid); + runtime_throw("runtime: double idle"); + } + g->idlem->idleg = g; + return; + } + + g->schedlink = nil; + if(runtime_sched.ghead == nil) + runtime_sched.ghead = g; + else + runtime_sched.gtail->schedlink = g; + runtime_sched.gtail = g; + + // increment gwait. + // if it transitions to nonzero, set atomic gwaiting bit. + if(runtime_sched.gwait++ == 0) + runtime_xadd(&runtime_sched.atomic, 1<<gwaitingShift); +} + +// Report whether gget would return something. +static bool +haveg(void) +{ + return runtime_sched.ghead != nil || m->idleg != nil; +} + +// Get from `g' queue. Sched must be locked. +static G* +gget(void) +{ + G *g; + + g = runtime_sched.ghead; + if(g){ + runtime_sched.ghead = g->schedlink; + if(runtime_sched.ghead == nil) + runtime_sched.gtail = nil; + // decrement gwait. + // if it transitions to zero, clear atomic gwaiting bit. + if(--runtime_sched.gwait == 0) + runtime_xadd(&runtime_sched.atomic, -1<<gwaitingShift); + } else if(m->idleg != nil) { + g = m->idleg; + m->idleg = nil; + } + return g; +} + +// Put on `m' list. Sched must be locked. +static void +mput(M *m) +{ + m->schedlink = runtime_sched.mhead; + runtime_sched.mhead = m; + runtime_sched.mwait++; +} + +// Get an `m' to run `g'. Sched must be locked. +static M* +mget(G *g) +{ + M *m; + + // if g has its own m, use it. + if(g && (m = g->lockedm) != nil) + return m; + + // otherwise use general m pool. + if((m = runtime_sched.mhead) != nil){ + runtime_sched.mhead = m->schedlink; + runtime_sched.mwait--; + } + return m; +} + +// Mark g ready to run. +void +runtime_ready(G *g) +{ + schedlock(); + readylocked(g); + schedunlock(); +} + +// Mark g ready to run. Sched is already locked. +// G might be running already and about to stop. +// The sched lock protects g->status from changing underfoot. +static void +readylocked(G *g) +{ + if(g->m){ + // Running on another machine. + // Ready it when it stops. + g->readyonstop = 1; + return; + } + + // Mark runnable. + if(g->status == Grunnable || g->status == Grunning) { + runtime_printf("goroutine %d has status %d\n", g->goid, g->status); + runtime_throw("bad g->status in ready"); + } + g->status = Grunnable; + + gput(g); + matchmg(); +} + +// Same as readylocked but a different symbol so that +// debuggers can set a breakpoint here and catch all +// new goroutines. +static void +newprocreadylocked(G *g) +{ + readylocked(g); +} + +// Pass g to m for running. +// Caller has already incremented mcpu. +static void +mnextg(M *m, G *g) +{ + runtime_sched.grunning++; + m->nextg = g; + if(m->waitnextg) { + m->waitnextg = 0; + if(mwakeup != nil) + runtime_notewakeup(&mwakeup->havenextg); + mwakeup = m; + } +} + +// Get the next goroutine that m should run. +// Sched must be locked on entry, is unlocked on exit. +// Makes sure that at most $GOMAXPROCS g's are +// running on cpus (not in system calls) at any given time. +static G* +nextgandunlock(void) +{ + G *gp; + uint32 v; + +top: + if(atomic_mcpu(runtime_sched.atomic) >= maxgomaxprocs) + runtime_throw("negative mcpu"); + + // If there is a g waiting as m->nextg, the mcpu++ + // happened before it was passed to mnextg. + if(m->nextg != nil) { + gp = m->nextg; + m->nextg = nil; + schedunlock(); + return gp; + } + + if(m->lockedg != nil) { + // We can only run one g, and it's not available. + // Make sure some other cpu is running to handle + // the ordinary run queue. + if(runtime_sched.gwait != 0) { + matchmg(); + // m->lockedg might have been on the queue. + if(m->nextg != nil) { + gp = m->nextg; + m->nextg = nil; + schedunlock(); + return gp; + } + } + } else { + // Look for work on global queue. + while(haveg() && canaddmcpu()) { + gp = gget(); + if(gp == nil) + runtime_throw("gget inconsistency"); + + if(gp->lockedm) { + mnextg(gp->lockedm, gp); + continue; + } + runtime_sched.grunning++; + schedunlock(); + return gp; + } + + // The while loop ended either because the g queue is empty + // or because we have maxed out our m procs running go + // code (mcpu >= mcpumax). We need to check that + // concurrent actions by entersyscall/exitsyscall cannot + // invalidate the decision to end the loop. + // + // We hold the sched lock, so no one else is manipulating the + // g queue or changing mcpumax. Entersyscall can decrement + // mcpu, but if does so when there is something on the g queue, + // the gwait bit will be set, so entersyscall will take the slow path + // and use the sched lock. So it cannot invalidate our decision. + // + // Wait on global m queue. + mput(m); + } + + v = runtime_atomicload(&runtime_sched.atomic); + if(runtime_sched.grunning == 0) + runtime_throw("all goroutines are asleep - deadlock!"); + m->nextg = nil; + m->waitnextg = 1; + runtime_noteclear(&m->havenextg); + + // Stoptheworld is waiting for all but its cpu to go to stop. + // Entersyscall might have decremented mcpu too, but if so + // it will see the waitstop and take the slow path. + // Exitsyscall never increments mcpu beyond mcpumax. + if(atomic_waitstop(v) && atomic_mcpu(v) <= atomic_mcpumax(v)) { + // set waitstop = 0 (known to be 1) + runtime_xadd(&runtime_sched.atomic, -1<<waitstopShift); + runtime_notewakeup(&runtime_sched.stopped); + } + schedunlock(); + + runtime_notesleep(&m->havenextg); + if(m->helpgc) { + runtime_gchelper(); + m->helpgc = 0; + runtime_lock(&runtime_sched); + goto top; + } + if((gp = m->nextg) == nil) + runtime_throw("bad m->nextg in nextgoroutine"); + m->nextg = nil; + return gp; +} + +int32 +runtime_helpgc(bool *extra) +{ + M *mp; + int32 n, max; + + // Figure out how many CPUs to use. + // Limited by gomaxprocs, number of actual CPUs, and MaxGcproc. + max = runtime_gomaxprocs; + if(max > runtime_ncpu) + max = runtime_ncpu > 0 ? runtime_ncpu : 1; + if(max > MaxGcproc) + max = MaxGcproc; + + // We're going to use one CPU no matter what. + // Figure out the max number of additional CPUs. + max--; + + runtime_lock(&runtime_sched); + n = 0; + while(n < max && (mp = mget(nil)) != nil) { + n++; + mp->helpgc = 1; + mp->waitnextg = 0; + runtime_notewakeup(&mp->havenextg); + } + runtime_unlock(&runtime_sched); + if(extra) + *extra = n != max; + return n; +} + +void +runtime_stoptheworld(void) +{ + uint32 v; + + schedlock(); + runtime_gcwaiting = 1; + + setmcpumax(1); + + // while mcpu > 1 + for(;;) { + v = runtime_sched.atomic; + if(atomic_mcpu(v) <= 1) + break; + + // It would be unsafe for multiple threads to be using + // the stopped note at once, but there is only + // ever one thread doing garbage collection. + runtime_noteclear(&runtime_sched.stopped); + if(atomic_waitstop(v)) + runtime_throw("invalid waitstop"); + + // atomic { waitstop = 1 }, predicated on mcpu <= 1 check above + // still being true. + if(!runtime_cas(&runtime_sched.atomic, v, v+(1<<waitstopShift))) + continue; + + schedunlock(); + runtime_notesleep(&runtime_sched.stopped); + schedlock(); + } + runtime_singleproc = runtime_gomaxprocs == 1; + schedunlock(); +} + +void +runtime_starttheworld(bool extra) +{ + M *m; + + schedlock(); + runtime_gcwaiting = 0; + setmcpumax(runtime_gomaxprocs); + matchmg(); + if(extra && canaddmcpu()) { + // Start a new m that will (we hope) be idle + // and so available to help when the next + // garbage collection happens. + // canaddmcpu above did mcpu++ + // (necessary, because m will be doing various + // initialization work so is definitely running), + // but m is not running a specific goroutine, + // so set the helpgc flag as a signal to m's + // first schedule(nil) to mcpu-- and grunning--. + m = startm(); + m->helpgc = 1; + runtime_sched.grunning++; + } + schedunlock(); +} + +// Called to start an M. +void* +runtime_mstart(void* mp) +{ + m = (M*)mp; + g = m->g0; + + g->entry = nil; + g->param = nil; + + // Record top of stack for use by mcall. + // Once we call schedule we're never coming back, + // so other calls can reuse this stack space. +#ifdef USING_SPLIT_STACK + __splitstack_getcontext(&g->stack_context[0]); +#else + g->gcinitial_sp = ∓ + g->gcstack_size = StackMin; + g->gcnext_sp = ∓ +#endif + getcontext(&g->context); + + if(g->entry != nil) { + // Got here from mcall. + void (*pfn)(G*) = (void (*)(G*))g->entry; + G* gp = (G*)g->param; + pfn(gp); + *(int*)0x21 = 0x21; + } + runtime_minit(); + schedule(nil); + return nil; +} + +typedef struct CgoThreadStart CgoThreadStart; +struct CgoThreadStart +{ + M *m; + G *g; + void (*fn)(void); +}; + +// Kick off new m's as needed (up to mcpumax). +// There are already `other' other cpus that will +// start looking for goroutines shortly. +// Sched is locked. +static void +matchmg(void) +{ + G *gp; + M *mp; + + if(m->mallocing || m->gcing) + return; + + while(haveg() && canaddmcpu()) { + gp = gget(); + if(gp == nil) + runtime_throw("gget inconsistency"); + + // Find the m that will run gp. + if((mp = mget(gp)) == nil) + mp = startm(); + mnextg(mp, gp); + } +} + +static M* +startm(void) +{ + M *m; + pthread_attr_t attr; + pthread_t tid; + + m = runtime_malloc(sizeof(M)); + mcommoninit(m); + m->g0 = runtime_malg(-1, nil, nil); + + if(pthread_attr_init(&attr) != 0) + runtime_throw("pthread_attr_init"); + if(pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED) != 0) + runtime_throw("pthread_attr_setdetachstate"); + +#ifndef PTHREAD_STACK_MIN +#define PTHREAD_STACK_MIN 8192 +#endif + if(pthread_attr_setstacksize(&attr, PTHREAD_STACK_MIN) != 0) + runtime_throw("pthread_attr_setstacksize"); + + if(pthread_create(&tid, &attr, runtime_mstart, m) != 0) + runtime_throw("pthread_create"); + + return m; +} + +// One round of scheduler: find a goroutine and run it. +// The argument is the goroutine that was running before +// schedule was called, or nil if this is the first call. +// Never returns. +static void +schedule(G *gp) +{ + int32 hz; + uint32 v; + + schedlock(); + if(gp != nil) { + // Just finished running gp. + gp->m = nil; + runtime_sched.grunning--; + + // atomic { mcpu-- } + v = runtime_xadd(&runtime_sched.atomic, -1<<mcpuShift); + if(atomic_mcpu(v) > maxgomaxprocs) + runtime_throw("negative mcpu in scheduler"); + + switch(gp->status){ + case Grunnable: + case Gdead: + // Shouldn't have been running! + runtime_throw("bad gp->status in sched"); + case Grunning: + gp->status = Grunnable; + gput(gp); + break; + case Gmoribund: + gp->status = Gdead; + if(gp->lockedm) { + gp->lockedm = nil; + m->lockedg = nil; + } + gp->idlem = nil; + gfput(gp); + if(--runtime_sched.gcount == 0) + runtime_exit(0); + break; + } + if(gp->readyonstop){ + gp->readyonstop = 0; + readylocked(gp); + } + } else if(m->helpgc) { + // Bootstrap m or new m started by starttheworld. + // atomic { mcpu-- } + v = runtime_xadd(&runtime_sched.atomic, -1<<mcpuShift); + if(atomic_mcpu(v) > maxgomaxprocs) + runtime_throw("negative mcpu in scheduler"); + // Compensate for increment in starttheworld(). + runtime_sched.grunning--; + m->helpgc = 0; + } else if(m->nextg != nil) { + // New m started by matchmg. + } else { + runtime_throw("invalid m state in scheduler"); + } + + // Find (or wait for) g to run. Unlocks runtime_sched. + gp = nextgandunlock(); + gp->readyonstop = 0; + gp->status = Grunning; + m->curg = gp; + gp->m = m; + + // Check whether the profiler needs to be turned on or off. + hz = runtime_sched.profilehz; + if(m->profilehz != hz) + runtime_resetcpuprofiler(hz); + + runtime_gogo(gp); +} + +// Enter scheduler. If g->status is Grunning, +// re-queues g and runs everyone else who is waiting +// before running g again. If g->status is Gmoribund, +// kills off g. +void +runtime_gosched(void) +{ + if(m->locks != 0) + runtime_throw("gosched holding locks"); + if(g == m->g0) + runtime_throw("gosched of g0"); + runtime_mcall(schedule); +} + +// The goroutine g is about to enter a system call. +// Record that it's not using the cpu anymore. +// This is called only from the go syscall library and cgocall, +// not from the low-level system calls used by the runtime. +// +// Entersyscall cannot split the stack: the runtime_gosave must +// make g->sched refer to the caller's stack segment, because +// entersyscall is going to return immediately after. +// It's okay to call matchmg and notewakeup even after +// decrementing mcpu, because we haven't released the +// sched lock yet, so the garbage collector cannot be running. + +void runtime_entersyscall(void) __attribute__ ((no_split_stack)); + +void +runtime_entersyscall(void) +{ + uint32 v; + + // Leave SP around for gc and traceback. +#ifdef USING_SPLIT_STACK + g->gcstack = __splitstack_find(NULL, NULL, &g->gcstack_size, + &g->gcnext_segment, &g->gcnext_sp, + &g->gcinitial_sp); +#else + g->gcnext_sp = (byte *) &v; +#endif + + // Save the registers in the g structure so that any pointers + // held in registers will be seen by the garbage collector. + // We could use getcontext here, but setjmp is more efficient + // because it doesn't need to save the signal mask. + setjmp(g->gcregs); + + g->status = Gsyscall; + + // Fast path. + // The slow path inside the schedlock/schedunlock will get + // through without stopping if it does: + // mcpu-- + // gwait not true + // waitstop && mcpu <= mcpumax not true + // If we can do the same with a single atomic add, + // then we can skip the locks. + v = runtime_xadd(&runtime_sched.atomic, -1<<mcpuShift); + if(!atomic_gwaiting(v) && (!atomic_waitstop(v) || atomic_mcpu(v) > atomic_mcpumax(v))) + return; + + schedlock(); + v = runtime_atomicload(&runtime_sched.atomic); + if(atomic_gwaiting(v)) { + matchmg(); + v = runtime_atomicload(&runtime_sched.atomic); + } + if(atomic_waitstop(v) && atomic_mcpu(v) <= atomic_mcpumax(v)) { + runtime_xadd(&runtime_sched.atomic, -1<<waitstopShift); + runtime_notewakeup(&runtime_sched.stopped); + } + + schedunlock(); +} + +// The goroutine g exited its system call. +// Arrange for it to run on a cpu again. +// This is called only from the go syscall library, not +// from the low-level system calls used by the runtime. +void +runtime_exitsyscall(void) +{ + G *gp; + uint32 v; + + // Fast path. + // If we can do the mcpu++ bookkeeping and + // find that we still have mcpu <= mcpumax, then we can + // start executing Go code immediately, without having to + // schedlock/schedunlock. + gp = g; + v = runtime_xadd(&runtime_sched.atomic, (1<<mcpuShift)); + if(m->profilehz == runtime_sched.profilehz && atomic_mcpu(v) <= atomic_mcpumax(v)) { + // There's a cpu for us, so we can run. + gp->status = Grunning; + // Garbage collector isn't running (since we are), + // so okay to clear gcstack. +#ifdef USING_SPLIT_STACK + gp->gcstack = nil; +#endif + gp->gcnext_sp = nil; + runtime_memclr(gp->gcregs, sizeof gp->gcregs); + return; + } + + // Tell scheduler to put g back on the run queue: + // mostly equivalent to g->status = Grunning, + // but keeps the garbage collector from thinking + // that g is running right now, which it's not. + gp->readyonstop = 1; + + // All the cpus are taken. + // The scheduler will ready g and put this m to sleep. + // When the scheduler takes g away from m, + // it will undo the runtime_sched.mcpu++ above. + runtime_gosched(); + + // Gosched returned, so we're allowed to run now. + // Delete the gcstack information that we left for + // the garbage collector during the system call. + // Must wait until now because until gosched returns + // we don't know for sure that the garbage collector + // is not running. +#ifdef USING_SPLIT_STACK + gp->gcstack = nil; +#endif + gp->gcnext_sp = nil; + runtime_memclr(gp->gcregs, sizeof gp->gcregs); +} + +G* +runtime_malg(int32 stacksize, byte** ret_stack, size_t* ret_stacksize) +{ + G *newg; + + newg = runtime_malloc(sizeof(G)); + if(stacksize >= 0) { +#if USING_SPLIT_STACK + *ret_stack = __splitstack_makecontext(stacksize, + &newg->stack_context[0], + ret_stacksize); +#else + *ret_stack = runtime_mallocgc(stacksize, FlagNoProfiling|FlagNoGC, 0, 0); + *ret_stacksize = stacksize; + newg->gcinitial_sp = *ret_stack; + newg->gcstack_size = stacksize; +#endif + } + return newg; +} + +G* +__go_go(void (*fn)(void*), void* arg) +{ + byte *sp; + size_t spsize; + G * volatile newg; // volatile to avoid longjmp warning + + schedlock(); + + if((newg = gfget()) != nil){ +#ifdef USING_SPLIT_STACK + sp = __splitstack_resetcontext(&newg->stack_context[0], + &spsize); +#else + sp = newg->gcinitial_sp; + spsize = newg->gcstack_size; + newg->gcnext_sp = sp; +#endif + } else { + newg = runtime_malg(StackMin, &sp, &spsize); + if(runtime_lastg == nil) + runtime_allg = newg; + else + runtime_lastg->alllink = newg; + runtime_lastg = newg; + } + newg->status = Gwaiting; + newg->waitreason = "new goroutine"; + + newg->entry = (byte*)fn; + newg->param = arg; + newg->gopc = (uintptr)__builtin_return_address(0); + + runtime_sched.gcount++; + runtime_sched.goidgen++; + newg->goid = runtime_sched.goidgen; + + if(sp == nil) + runtime_throw("nil g->stack0"); + + getcontext(&newg->context); + newg->context.uc_stack.ss_sp = sp; + newg->context.uc_stack.ss_size = spsize; + makecontext(&newg->context, kickoff, 0); + + newprocreadylocked(newg); + schedunlock(); + + return newg; +//printf(" goid=%d\n", newg->goid); +} + +// Put on gfree list. Sched must be locked. +static void +gfput(G *g) +{ + g->schedlink = runtime_sched.gfree; + runtime_sched.gfree = g; +} + +// Get from gfree list. Sched must be locked. +static G* +gfget(void) +{ + G *g; + + g = runtime_sched.gfree; + if(g) + runtime_sched.gfree = g->schedlink; + return g; +} + +// Run all deferred functions for the current goroutine. +static void +rundefer(void) +{ + Defer *d; + + while((d = g->defer) != nil) { + void (*pfn)(void*); + + pfn = d->__pfn; + d->__pfn = nil; + if (pfn != nil) + (*pfn)(d->__arg); + g->defer = d->__next; + runtime_free(d); + } +} + +void runtime_Goexit (void) asm ("libgo_runtime.runtime.Goexit"); + +void +runtime_Goexit(void) +{ + rundefer(); + runtime_goexit(); +} + +void runtime_Gosched (void) asm ("libgo_runtime.runtime.Gosched"); + +void +runtime_Gosched(void) +{ + runtime_gosched(); +} + +void runtime_LockOSThread (void) + __asm__ ("libgo_runtime.runtime.LockOSThread"); + +void +runtime_LockOSThread(void) +{ + m->lockedg = g; + g->lockedm = m; +} + +// delete when scheduler is stronger +int32 +runtime_gomaxprocsfunc(int32 n) +{ + int32 ret; + uint32 v; + + schedlock(); + ret = runtime_gomaxprocs; + if(n <= 0) + n = ret; + if(n > maxgomaxprocs) + n = maxgomaxprocs; + runtime_gomaxprocs = n; + if(runtime_gomaxprocs > 1) + runtime_singleproc = false; + if(runtime_gcwaiting != 0) { + if(atomic_mcpumax(runtime_sched.atomic) != 1) + runtime_throw("invalid mcpumax during gc"); + schedunlock(); + return ret; + } + + setmcpumax(n); + + // If there are now fewer allowed procs + // than procs running, stop. + v = runtime_atomicload(&runtime_sched.atomic); + if((int32)atomic_mcpu(v) > n) { + schedunlock(); + runtime_gosched(); + return ret; + } + // handle more procs + matchmg(); + schedunlock(); + return ret; +} + +void runtime_UnlockOSThread (void) + __asm__ ("libgo_runtime.runtime.UnlockOSThread"); + +void +runtime_UnlockOSThread(void) +{ + m->lockedg = nil; + g->lockedm = nil; +} + +bool +runtime_lockedOSThread(void) +{ + return g->lockedm != nil && m->lockedg != nil; +} + +// for testing of wire, unwire +uint32 +runtime_mid() +{ + return m->id; +} + +int32 runtime_Goroutines (void) + __asm__ ("libgo_runtime.runtime.Goroutines"); + +int32 +runtime_Goroutines() +{ + return runtime_sched.gcount; +} + +int32 +runtime_mcount(void) +{ + return runtime_sched.mcount; +} static struct { Lock; @@ -28,22 +1330,22 @@ static struct { void runtime_sigprof(uint8 *pc __attribute__ ((unused)), uint8 *sp __attribute__ ((unused)), - uint8 *lr __attribute__ ((unused))) + uint8 *lr __attribute__ ((unused)), + G *gp __attribute__ ((unused))) { - int32 n; - + // int32 n; + if(prof.fn == nil || prof.hz == 0) return; - + runtime_lock(&prof); if(prof.fn == nil) { runtime_unlock(&prof); return; } - n = 0; - // n = runtime·gentraceback(pc, sp, lr, gp, 0, prof.pcbuf, nelem(prof.pcbuf)); - if(n > 0) - prof.fn(prof.pcbuf, n); + // n = runtime_gentraceback(pc, sp, lr, gp, 0, prof.pcbuf, nelem(prof.pcbuf)); + // if(n > 0) + // prof.fn(prof.pcbuf, n); runtime_unlock(&prof); } @@ -67,28 +1369,10 @@ runtime_setcpuprofilerate(void (*fn)(uintptr*, int32), int32 hz) prof.fn = fn; prof.hz = hz; runtime_unlock(&prof); - // runtime_lock(&runtime_sched); - // runtime_sched.profilehz = hz; - // runtime_unlock(&runtime_sched); - + runtime_lock(&runtime_sched); + runtime_sched.profilehz = hz; + runtime_unlock(&runtime_sched); + if(hz != 0) runtime_resetcpuprofiler(hz); } - -/* The entersyscall and exitsyscall functions aren't used for anything - yet. Eventually they will be used to switch to a new OS thread - when making a potentially-blocking library call. */ - -void runtime_entersyscall() __asm__("libgo_syscall.syscall.entersyscall"); - -void -runtime_entersyscall() -{ -} - -void runtime_exitsyscall() __asm__("libgo_syscall.syscall.exitsyscall"); - -void -runtime_exitsyscall() -{ -} |