summaryrefslogtreecommitdiff
path: root/libgo/runtime/proc.c
diff options
context:
space:
mode:
Diffstat (limited to 'libgo/runtime/proc.c')
-rw-r--r--libgo/runtime/proc.c2977
1 files changed, 539 insertions, 2438 deletions
diff --git a/libgo/runtime/proc.c b/libgo/runtime/proc.c
index c6ac972bd4..06a9c2ad6b 100644
--- a/libgo/runtime/proc.c
+++ b/libgo/runtime/proc.c
@@ -2,6 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
+#include <errno.h>
#include <limits.h>
#include <signal.h>
#include <stdlib.h>
@@ -19,7 +20,6 @@
#include "defs.h"
#include "malloc.h"
#include "go-type.h"
-#include "go-defer.h"
#ifdef USING_SPLIT_STACK
@@ -62,7 +62,6 @@ static void gtraceback(G*);
#endif
static __thread G *g;
-static __thread M *m;
#ifndef SETCONTEXT_CLOBBERS_TLS
@@ -158,6 +157,26 @@ fixcontext(ucontext_t *c)
#endif
+// ucontext_arg returns a properly aligned ucontext_t value. On some
+// systems a ucontext_t value must be aligned to a 16-byte boundary.
+// The g structure that has fields of type ucontext_t is defined in
+// Go, and Go has no simple way to align a field to such a boundary.
+// So we make the field larger in runtime2.go and pick an appropriate
+// offset within the field here.
+static ucontext_t*
+ucontext_arg(void** go_ucontext)
+{
+ uintptr_t p = (uintptr_t)go_ucontext;
+ size_t align = __alignof__(ucontext_t);
+ if(align > 16) {
+ // We only ensured space for up to a 16 byte alignment
+ // in libgo/go/runtime/runtime2.go.
+ runtime_throw("required alignment of ucontext_t too large");
+ }
+ p = (p + align - 1) &~ (uintptr_t)(align - 1);
+ return (ucontext_t*)p;
+}
+
// We can not always refer to the TLS variables directly. The
// compiler will call tls_get_addr to get the address of the variable,
// and it may hold it in a register across a call to schedule. When
@@ -179,14 +198,15 @@ M* runtime_m(void) __attribute__ ((noinline, no_split_stack));
M*
runtime_m(void)
{
- return m;
+ if(g == nil)
+ return nil;
+ return g->m;
}
-// Set m and g.
+// Set g.
void
-runtime_setmg(M* mp, G* gp)
+runtime_setg(G* gp)
{
- m = mp;
g = gp;
}
@@ -197,6 +217,7 @@ runtime_newosproc(M *mp)
pthread_attr_t attr;
sigset_t clear, old;
pthread_t tid;
+ int tries;
int ret;
if(pthread_attr_init(&attr) != 0)
@@ -215,11 +236,21 @@ runtime_newosproc(M *mp)
sigemptyset(&old);
pthread_sigmask(SIG_BLOCK, &clear, &old);
- ret = pthread_create(&tid, &attr, runtime_mstart, mp);
+
+ for (tries = 0; tries < 20; tries++) {
+ ret = pthread_create(&tid, &attr, runtime_mstart, mp);
+ if (ret != EAGAIN) {
+ break;
+ }
+ runtime_usleep((tries + 1) * 1000); // Milliseconds.
+ }
+
pthread_sigmask(SIG_SETMASK, &old, nil);
- if (ret != 0)
+ if (ret != 0) {
+ runtime_printf("pthread_create failed: %d\n", ret);
runtime_throw("pthread_create");
+ }
}
// First function run by a new goroutine. This replaces gogocall.
@@ -227,13 +258,17 @@ static void
kickoff(void)
{
void (*fn)(void*);
+ void *param;
if(g->traceback != nil)
gtraceback(g);
fn = (void (*)(void*))(g->entry);
- fn(g->param);
- runtime_goexit();
+ param = g->param;
+ g->entry = nil;
+ g->param = nil;
+ fn(param);
+ runtime_goexit1();
}
// Switch context to a different goroutine. This is like longjmp.
@@ -242,12 +277,12 @@ void
runtime_gogo(G* newg)
{
#ifdef USING_SPLIT_STACK
- __splitstack_setcontext(&newg->stack_context[0]);
+ __splitstack_setcontext(&newg->stackcontext[0]);
#endif
g = newg;
newg->fromgogo = true;
- fixcontext(&newg->context);
- setcontext(&newg->context);
+ fixcontext(ucontext_arg(&newg->context[0]));
+ setcontext(ucontext_arg(&newg->context[0]));
runtime_throw("gogo setcontext returned");
}
@@ -261,42 +296,47 @@ runtime_mcall(void (*pfn)(G*))
{
M *mp;
G *gp;
+#ifndef USING_SPLIT_STACK
+ void *afterregs;
+#endif
// Ensure that all registers are on the stack for the garbage
// collector.
__builtin_unwind_init();
- mp = m;
gp = g;
+ mp = gp->m;
if(gp == mp->g0)
runtime_throw("runtime: mcall called on m->g0 stack");
if(gp != nil) {
#ifdef USING_SPLIT_STACK
- __splitstack_getcontext(&g->stack_context[0]);
+ __splitstack_getcontext(&g->stackcontext[0]);
#else
- gp->gcnext_sp = &pfn;
+ // We have to point to an address on the stack that is
+ // below the saved registers.
+ gp->gcnextsp = &afterregs;
#endif
gp->fromgogo = false;
- getcontext(&gp->context);
+ getcontext(ucontext_arg(&gp->context[0]));
// When we return from getcontext, we may be running
- // in a new thread. That means that m and g may have
- // changed. They are global variables so we will
- // reload them, but the addresses of m and g may be
- // cached in our local stack frame, and those
- // addresses may be wrong. Call functions to reload
- // the values for this thread.
- mp = runtime_m();
+ // in a new thread. That means that g may have
+ // changed. It is a global variables so we will
+ // reload it, but the address of g may be cached in
+ // our local stack frame, and that address may be
+ // wrong. Call the function to reload the value for
+ // this thread.
gp = runtime_g();
+ mp = gp->m;
if(gp->traceback != nil)
gtraceback(gp);
}
if (gp == nil || !gp->fromgogo) {
#ifdef USING_SPLIT_STACK
- __splitstack_setcontext(&mp->g0->stack_context[0]);
+ __splitstack_setcontext(&mp->g0->stackcontext[0]);
#endif
mp->g0->entry = (byte*)pfn;
mp->g0->param = gp;
@@ -306,8 +346,8 @@ runtime_mcall(void (*pfn)(G*))
// the getcontext call just above.
g = mp->g0;
- fixcontext(&mp->g0->context);
- setcontext(&mp->g0->context);
+ fixcontext(ucontext_arg(&mp->g0->context[0]));
+ setcontext(ucontext_arg(&mp->g0->context[0]));
runtime_throw("runtime: mcall function returned");
}
}
@@ -324,110 +364,92 @@ runtime_mcall(void (*pfn)(G*))
//
// Design doc at http://golang.org/s/go11sched.
-typedef struct Sched Sched;
-struct Sched {
- Lock;
-
- uint64 goidgen;
- M* midle; // idle m's waiting for work
- int32 nmidle; // number of idle m's waiting for work
- int32 nmidlelocked; // number of locked m's waiting for work
- int32 mcount; // number of m's that have been created
- int32 maxmcount; // maximum number of m's allowed (or die)
-
- P* pidle; // idle P's
- uint32 npidle;
- uint32 nmspinning;
-
- // Global runnable queue.
- G* runqhead;
- G* runqtail;
- int32 runqsize;
-
- // Global cache of dead G's.
- Lock gflock;
- G* gfree;
-
- uint32 gcwaiting; // gc is waiting to run
- int32 stopwait;
- Note stopnote;
- uint32 sysmonwait;
- Note sysmonnote;
- uint64 lastpoll;
-
- int32 profilehz; // cpu profiling rate
-};
-
enum
{
- // The max value of GOMAXPROCS.
- // There are no fundamental restrictions on the value.
- MaxGomaxprocs = 1<<8,
-
- // Number of goroutine ids to grab from runtime_sched.goidgen to local per-P cache at once.
+ // Number of goroutine ids to grab from runtime_sched->goidgen to local per-P cache at once.
// 16 seems to provide enough amortization, but other than that it's mostly arbitrary number.
GoidCacheBatch = 16,
};
-Sched runtime_sched;
-int32 runtime_gomaxprocs;
-uint32 runtime_needextram = 1;
+extern Sched* runtime_getsched() __asm__ (GOSYM_PREFIX "runtime.getsched");
+extern bool* runtime_getCgoHasExtraM()
+ __asm__ (GOSYM_PREFIX "runtime.getCgoHasExtraM");
+extern P** runtime_getAllP()
+ __asm__ (GOSYM_PREFIX "runtime.getAllP");
+extern G* allocg(void)
+ __asm__ (GOSYM_PREFIX "runtime.allocg");
+extern bool needaddgcproc(void)
+ __asm__ (GOSYM_PREFIX "runtime.needaddgcproc");
+extern void startm(P*, bool)
+ __asm__(GOSYM_PREFIX "runtime.startm");
+extern void newm(void(*)(void), P*)
+ __asm__(GOSYM_PREFIX "runtime.newm");
+
+Sched* runtime_sched;
M runtime_m0;
G runtime_g0; // idle goroutine for m0
G* runtime_lastg;
-M* runtime_allm;
P** runtime_allp;
-M* runtime_extram;
int8* runtime_goos;
int32 runtime_ncpu;
bool runtime_precisestack;
-static int32 newprocs;
-
-static Lock allglock; // the following vars are protected by this lock or by stoptheworld
-G** runtime_allg;
-uintptr runtime_allglen;
-static uintptr allgcap;
bool runtime_isarchive;
void* runtime_mstart(void*);
-static void runqput(P*, G*);
-static G* runqget(P*);
-static bool runqputslow(P*, G*, uint32, uint32);
-static G* runqsteal(P*, P*);
-static void mput(M*);
-static M* mget(void);
-static void mcommoninit(M*);
-static void schedule(void);
-static void procresize(int32);
-static void acquirep(P*);
-static P* releasep(void);
-static void newm(void(*)(void), P*);
-static void stopm(void);
-static void startm(P*, bool);
-static void handoffp(P*);
-static void wakep(void);
-static void stoplockedm(void);
-static void startlockedm(G*);
-static void sysmon(void);
-static uint32 retake(int64);
-static void incidlelocked(int32);
-static void checkdead(void);
static void exitsyscall0(G*);
static void park0(G*);
static void goexit0(G*);
-static void gfput(P*, G*);
-static G* gfget(P*);
-static void gfpurge(P*);
-static void globrunqput(G*);
-static void globrunqputbatch(G*, G*, int32);
-static G* globrunqget(P*, int32);
-static P* pidleget(void);
-static void pidleput(P*);
-static void injectglist(G*);
-static bool preemptall(void);
static bool exitsyscallfast(void);
-static void allgadd(G*);
+
+extern void setncpu(int32)
+ __asm__(GOSYM_PREFIX "runtime.setncpu");
+extern void setpagesize(uintptr_t)
+ __asm__(GOSYM_PREFIX "runtime.setpagesize");
+extern void allgadd(G*)
+ __asm__(GOSYM_PREFIX "runtime.allgadd");
+extern void mcommoninit(M*)
+ __asm__(GOSYM_PREFIX "runtime.mcommoninit");
+extern void stopm(void)
+ __asm__(GOSYM_PREFIX "runtime.stopm");
+extern void handoffp(P*)
+ __asm__(GOSYM_PREFIX "runtime.handoffp");
+extern void wakep(void)
+ __asm__(GOSYM_PREFIX "runtime.wakep");
+extern void stoplockedm(void)
+ __asm__(GOSYM_PREFIX "runtime.stoplockedm");
+extern void schedule(void)
+ __asm__(GOSYM_PREFIX "runtime.schedule");
+extern void execute(G*, bool)
+ __asm__(GOSYM_PREFIX "runtime.execute");
+extern void gfput(P*, G*)
+ __asm__(GOSYM_PREFIX "runtime.gfput");
+extern G* gfget(P*)
+ __asm__(GOSYM_PREFIX "runtime.gfget");
+extern void procresize(int32)
+ __asm__(GOSYM_PREFIX "runtime.procresize");
+extern void acquirep(P*)
+ __asm__(GOSYM_PREFIX "runtime.acquirep");
+extern P* releasep(void)
+ __asm__(GOSYM_PREFIX "runtime.releasep");
+extern void incidlelocked(int32)
+ __asm__(GOSYM_PREFIX "runtime.incidlelocked");
+extern void checkdead(void)
+ __asm__(GOSYM_PREFIX "runtime.checkdead");
+extern void sysmon(void)
+ __asm__(GOSYM_PREFIX "runtime.sysmon");
+extern void mput(M*)
+ __asm__(GOSYM_PREFIX "runtime.mput");
+extern M* mget(void)
+ __asm__(GOSYM_PREFIX "runtime.mget");
+extern void globrunqput(G*)
+ __asm__(GOSYM_PREFIX "runtime.globrunqput");
+extern P* pidleget(void)
+ __asm__(GOSYM_PREFIX "runtime.pidleget");
+extern bool runqempty(P*)
+ __asm__(GOSYM_PREFIX "runtime.runqempty");
+extern void runqput(P*, G*, bool)
+ __asm__(GOSYM_PREFIX "runtime.runqput");
bool runtime_isstarted;
@@ -442,11 +464,16 @@ bool runtime_isstarted;
void
runtime_schedinit(void)
{
+ M *m;
int32 n, procs;
String s;
const byte *p;
Eface i;
+ setncpu(runtime_ncpu);
+ setpagesize(getpagesize());
+ runtime_sched = runtime_getsched();
+
m = &runtime_m0;
g = &runtime_g0;
m->g0 = g;
@@ -455,13 +482,14 @@ runtime_schedinit(void)
initcontext();
- runtime_sched.maxmcount = 10000;
+ runtime_sched->maxmcount = 10000;
runtime_precisestack = 0;
// runtime_symtabinit();
runtime_mallocinit();
mcommoninit(m);
-
+ runtime_alginit(); // maps must not be used before this call
+
// Initialize the itable value for newErrorCString,
// so that the next time it gets called, possibly
// in a fault during a garbage collection, it will not
@@ -476,20 +504,20 @@ runtime_schedinit(void)
runtime_goenvs();
runtime_parsedebugvars();
- runtime_sched.lastpoll = runtime_nanotime();
+ runtime_sched->lastpoll = runtime_nanotime();
procs = 1;
s = runtime_getenv("GOMAXPROCS");
p = s.str;
if(p != nil && (n = runtime_atoi(p, s.len)) > 0) {
- if(n > MaxGomaxprocs)
- n = MaxGomaxprocs;
+ if(n > _MaxGomaxprocs)
+ n = _MaxGomaxprocs;
procs = n;
}
- runtime_allp = runtime_malloc((MaxGomaxprocs+1)*sizeof(runtime_allp[0]));
+ runtime_allp = runtime_getAllP();
procresize(procs);
// Can not enable GC until all roots are registered.
- // mstats.enablegc = 1;
+ // mstats()->enablegc = 1;
}
extern void main_init(void) __asm__ (GOSYM_PREFIX "__go_init_main");
@@ -503,54 +531,6 @@ struct field_align
Hchan *p;
};
-// main_init_done is a signal used by cgocallbackg that initialization
-// has been completed. It is made before _cgo_notify_runtime_init_done,
-// so all cgo calls can rely on it existing. When main_init is
-// complete, it is closed, meaning cgocallbackg can reliably receive
-// from it.
-Hchan *runtime_main_init_done;
-
-// The chan bool type, for runtime_main_init_done.
-
-extern const struct __go_type_descriptor bool_type_descriptor
- __asm__ (GOSYM_PREFIX "__go_tdn_bool");
-
-static struct __go_channel_type chan_bool_type_descriptor =
- {
- /* __common */
- {
- /* __code */
- GO_CHAN,
- /* __align */
- __alignof (Hchan *),
- /* __field_align */
- offsetof (struct field_align, p) - 1,
- /* __size */
- sizeof (Hchan *),
- /* __hash */
- 0, /* This value doesn't matter. */
- /* __hashfn */
- &__go_type_hash_error_descriptor,
- /* __equalfn */
- &__go_type_equal_error_descriptor,
- /* __gc */
- NULL, /* This value doesn't matter */
- /* __reflection */
- NULL, /* This value doesn't matter */
- /* __uncommon */
- NULL,
- /* __pointer_to_this */
- NULL
- },
- /* __element_type */
- &bool_type_descriptor,
- /* __dir */
- CHANNEL_BOTH_DIR
- };
-
-extern Hchan *__go_new_channel (ChanType *, uintptr);
-extern void closechan(Hchan *) __asm__ (GOSYM_PREFIX "runtime.closechan");
-
static void
initDone(void *arg __attribute__ ((unused))) {
runtime_unlockOSThread();
@@ -583,37 +563,37 @@ runtime_main(void* dummy __attribute__((unused)))
runtime_lockOSThread();
// Defer unlock so that runtime.Goexit during init does the unlock too.
- d.__pfn = initDone;
- d.__next = g->defer;
- d.__arg = (void*)-1;
- d.__panic = g->panic;
- d.__retaddr = nil;
- d.__makefunc_can_recover = 0;
- d.__frame = &frame;
- d.__special = true;
- g->defer = &d;
-
- if(m != &runtime_m0)
+ d.pfn = (uintptr)(void*)initDone;
+ d.link = g->_defer;
+ d.arg = (void*)-1;
+ d._panic = g->_panic;
+ d.retaddr = 0;
+ d.makefunccanrecover = 0;
+ d.frame = &frame;
+ d.special = true;
+ g->_defer = &d;
+
+ if(g->m != &runtime_m0)
runtime_throw("runtime_main not on m0");
__go_go(runtime_MHeap_Scavenger, nil);
- runtime_main_init_done = __go_new_channel(&chan_bool_type_descriptor, 0);
+ makeMainInitDone();
_cgo_notify_runtime_init_done();
main_init();
- closechan(runtime_main_init_done);
+ closeMainInitDone();
- if(g->defer != &d || d.__pfn != initDone)
+ if(g->_defer != &d || (void*)d.pfn != initDone)
runtime_throw("runtime: bad defer entry after init");
- g->defer = d.__next;
+ g->_defer = d.link;
runtime_unlockOSThread();
// For gccgo we have to wait until after main is initialized
// to enable GC, because initializing main registers the GC
// roots.
- mstats.enablegc = 1;
+ mstats()->enablegc = 1;
if(runtime_isarchive) {
// This is not a complete program, but is instead a
@@ -629,7 +609,7 @@ runtime_main(void* dummy __attribute__((unused)))
// another goroutine at the same time as main returns,
// let the other goroutine finish printing the panic trace.
// Once it does, it will exit. See issue 3934.
- if(runtime_panicking)
+ if(runtime_panicking())
runtime_park(nil, nil, "panicwait");
runtime_exit(0);
@@ -637,150 +617,24 @@ runtime_main(void* dummy __attribute__((unused)))
*(int32*)0 = 0;
}
-void
-runtime_goroutineheader(G *gp)
-{
- const char *status;
- int64 waitfor;
-
- switch(gp->status) {
- case Gidle:
- status = "idle";
- break;
- case Grunnable:
- status = "runnable";
- break;
- case Grunning:
- status = "running";
- break;
- case Gsyscall:
- status = "syscall";
- break;
- case Gwaiting:
- if(gp->waitreason)
- status = gp->waitreason;
- else
- status = "waiting";
- break;
- default:
- status = "???";
- break;
- }
-
- // approx time the G is blocked, in minutes
- waitfor = 0;
- if((gp->status == Gwaiting || gp->status == Gsyscall) && gp->waitsince != 0)
- waitfor = (runtime_nanotime() - gp->waitsince) / (60LL*1000*1000*1000);
-
- if(waitfor < 1)
- runtime_printf("goroutine %D [%s]:\n", gp->goid, status);
- else
- runtime_printf("goroutine %D [%s, %D minutes]:\n", gp->goid, status, waitfor);
-}
-
-void
-runtime_printcreatedby(G *g)
-{
- if(g != nil && g->gopc != 0 && g->goid != 1) {
- String fn;
- String file;
- intgo line;
-
- if(__go_file_line(g->gopc - 1, &fn, &file, &line)) {
- runtime_printf("created by %S\n", fn);
- runtime_printf("\t%S:%D\n", file, (int64) line);
- }
- }
-}
-
-struct Traceback
-{
- G* gp;
- Location locbuf[TracebackMaxFrames];
- int32 c;
-};
+void getTraceback(G*, G*) __asm__(GOSYM_PREFIX "runtime.getTraceback");
-void
-runtime_tracebackothers(G * volatile me)
+// getTraceback stores a traceback of gp in the g's traceback field
+// and then returns to me. We expect that gp's traceback is not nil.
+// It works by saving me's current context, and checking gp's traceback field.
+// If gp's traceback field is not nil, it starts running gp.
+// In places where we call getcontext, we check the traceback field.
+// If it is not nil, we collect a traceback, and then return to the
+// goroutine stored in the traceback field, which is me.
+void getTraceback(G* me, G* gp)
{
- G * volatile gp;
- Traceback tb;
- int32 traceback;
- volatile uintptr i;
-
- tb.gp = me;
- traceback = runtime_gotraceback(nil);
-
- // Show the current goroutine first, if we haven't already.
- if((gp = m->curg) != nil && gp != me) {
- runtime_printf("\n");
- runtime_goroutineheader(gp);
- gp->traceback = &tb;
-
-#ifdef USING_SPLIT_STACK
- __splitstack_getcontext(&me->stack_context[0]);
-#endif
- getcontext(&me->context);
-
- if(gp->traceback != nil) {
- runtime_gogo(gp);
- }
-
- runtime_printtrace(tb.locbuf, tb.c, false);
- runtime_printcreatedby(gp);
- }
-
- runtime_lock(&allglock);
- for(i = 0; i < runtime_allglen; i++) {
- gp = runtime_allg[i];
- if(gp == me || gp == m->curg || gp->status == Gdead)
- continue;
- if(gp->issystem && traceback < 2)
- continue;
- runtime_printf("\n");
- runtime_goroutineheader(gp);
-
- // Our only mechanism for doing a stack trace is
- // _Unwind_Backtrace. And that only works for the
- // current thread, not for other random goroutines.
- // So we need to switch context to the goroutine, get
- // the backtrace, and then switch back.
-
- // This means that if g is running or in a syscall, we
- // can't reliably print a stack trace. FIXME.
-
- if(gp->status == Grunning) {
- runtime_printf("\tgoroutine running on other thread; stack unavailable\n");
- runtime_printcreatedby(gp);
- } else if(gp->status == Gsyscall) {
- runtime_printf("\tgoroutine in C code; stack unavailable\n");
- runtime_printcreatedby(gp);
- } else {
- gp->traceback = &tb;
-
#ifdef USING_SPLIT_STACK
- __splitstack_getcontext(&me->stack_context[0]);
+ __splitstack_getcontext(&me->stackcontext[0]);
#endif
- getcontext(&me->context);
-
- if(gp->traceback != nil) {
- runtime_gogo(gp);
- }
+ getcontext(ucontext_arg(&me->context[0]));
- runtime_printtrace(tb.locbuf, tb.c, false);
- runtime_printcreatedby(gp);
- }
- }
- runtime_unlock(&allglock);
-}
-
-static void
-checkmcount(void)
-{
- // sched lock is held
- if(runtime_sched.mcount > runtime_sched.maxmcount) {
- runtime_printf("runtime: program exceeds %d-thread limit\n", runtime_sched.maxmcount);
- runtime_throw("thread exhaustion");
+ if (gp->traceback != nil) {
+ runtime_gogo(gp);
}
}
@@ -794,286 +648,56 @@ gtraceback(G* gp)
traceback = gp->traceback;
gp->traceback = nil;
+ if(gp->m != nil)
+ runtime_throw("gtraceback: m is not nil");
+ gp->m = traceback->gp->m;
traceback->c = runtime_callers(1, traceback->locbuf,
sizeof traceback->locbuf / sizeof traceback->locbuf[0], false);
+ gp->m = nil;
runtime_gogo(traceback->gp);
}
-static void
-mcommoninit(M *mp)
-{
- // If there is no mcache runtime_callers() will crash,
- // and we are most likely in sysmon thread so the stack is senseless anyway.
- if(m->mcache)
- runtime_callers(1, mp->createstack, nelem(mp->createstack), false);
-
- mp->fastrand = 0x49f6428aUL + mp->id + runtime_cputicks();
-
- runtime_lock(&runtime_sched);
- mp->id = runtime_sched.mcount++;
- checkmcount();
- runtime_mpreinit(mp);
-
- // Add to runtime_allm so garbage collector doesn't free m
- // when it is just in a register or thread-local storage.
- mp->alllink = runtime_allm;
- // runtime_NumCgoCall() iterates over allm w/o schedlock,
- // so we need to publish it safely.
- runtime_atomicstorep(&runtime_allm, mp);
- runtime_unlock(&runtime_sched);
-}
-
-// Mark gp ready to run.
-void
-runtime_ready(G *gp)
-{
- // Mark runnable.
- m->locks++; // disable preemption because it can be holding p in a local var
- if(gp->status != Gwaiting) {
- runtime_printf("goroutine %D has status %d\n", gp->goid, gp->status);
- runtime_throw("bad g->status in ready");
- }
- gp->status = Grunnable;
- runqput(m->p, gp);
- if(runtime_atomicload(&runtime_sched.npidle) != 0 && runtime_atomicload(&runtime_sched.nmspinning) == 0) // TODO: fast atomic
- wakep();
- m->locks--;
-}
-
-int32
-runtime_gcprocs(void)
-{
- int32 n;
-
- // Figure out how many CPUs to use during GC.
- // Limited by gomaxprocs, number of actual CPUs, and MaxGcproc.
- runtime_lock(&runtime_sched);
- n = runtime_gomaxprocs;
- if(n > runtime_ncpu)
- n = runtime_ncpu > 0 ? runtime_ncpu : 1;
- if(n > MaxGcproc)
- n = MaxGcproc;
- if(n > runtime_sched.nmidle+1) // one M is currently running
- n = runtime_sched.nmidle+1;
- runtime_unlock(&runtime_sched);
- return n;
-}
-
-static bool
-needaddgcproc(void)
-{
- int32 n;
-
- runtime_lock(&runtime_sched);
- n = runtime_gomaxprocs;
- if(n > runtime_ncpu)
- n = runtime_ncpu;
- if(n > MaxGcproc)
- n = MaxGcproc;
- n -= runtime_sched.nmidle+1; // one M is currently running
- runtime_unlock(&runtime_sched);
- return n > 0;
-}
-
-void
-runtime_helpgc(int32 nproc)
-{
- M *mp;
- int32 n, pos;
-
- runtime_lock(&runtime_sched);
- pos = 0;
- for(n = 1; n < nproc; n++) { // one M is currently running
- if(runtime_allp[pos]->mcache == m->mcache)
- pos++;
- mp = mget();
- if(mp == nil)
- runtime_throw("runtime_gcprocs inconsistency");
- mp->helpgc = n;
- mp->mcache = runtime_allp[pos]->mcache;
- pos++;
- runtime_notewakeup(&mp->park);
- }
- runtime_unlock(&runtime_sched);
-}
-
-// Similar to stoptheworld but best-effort and can be called several times.
-// There is no reverse operation, used during crashing.
-// This function must not lock any mutexes.
-void
-runtime_freezetheworld(void)
-{
- int32 i;
-
- if(runtime_gomaxprocs == 1)
- return;
- // stopwait and preemption requests can be lost
- // due to races with concurrently executing threads,
- // so try several times
- for(i = 0; i < 5; i++) {
- // this should tell the scheduler to not start any new goroutines
- runtime_sched.stopwait = 0x7fffffff;
- runtime_atomicstore((uint32*)&runtime_sched.gcwaiting, 1);
- // this should stop running goroutines
- if(!preemptall())
- break; // no running goroutines
- runtime_usleep(1000);
- }
- // to be sure
- runtime_usleep(1000);
- preemptall();
- runtime_usleep(1000);
-}
-
-void
-runtime_stoptheworld(void)
-{
- int32 i;
- uint32 s;
- P *p;
- bool wait;
-
- runtime_lock(&runtime_sched);
- runtime_sched.stopwait = runtime_gomaxprocs;
- runtime_atomicstore((uint32*)&runtime_sched.gcwaiting, 1);
- preemptall();
- // stop current P
- m->p->status = Pgcstop;
- runtime_sched.stopwait--;
- // try to retake all P's in Psyscall status
- for(i = 0; i < runtime_gomaxprocs; i++) {
- p = runtime_allp[i];
- s = p->status;
- if(s == Psyscall && runtime_cas(&p->status, s, Pgcstop))
- runtime_sched.stopwait--;
- }
- // stop idle P's
- while((p = pidleget()) != nil) {
- p->status = Pgcstop;
- runtime_sched.stopwait--;
- }
- wait = runtime_sched.stopwait > 0;
- runtime_unlock(&runtime_sched);
-
- // wait for remaining P's to stop voluntarily
- if(wait) {
- runtime_notesleep(&runtime_sched.stopnote);
- runtime_noteclear(&runtime_sched.stopnote);
- }
- if(runtime_sched.stopwait)
- runtime_throw("stoptheworld: not stopped");
- for(i = 0; i < runtime_gomaxprocs; i++) {
- p = runtime_allp[i];
- if(p->status != Pgcstop)
- runtime_throw("stoptheworld: not stopped");
- }
-}
-
-static void
-mhelpgc(void)
-{
- m->helpgc = -1;
-}
-
-void
-runtime_starttheworld(void)
-{
- P *p, *p1;
- M *mp;
- G *gp;
- bool add;
-
- m->locks++; // disable preemption because it can be holding p in a local var
- gp = runtime_netpoll(false); // non-blocking
- injectglist(gp);
- add = needaddgcproc();
- runtime_lock(&runtime_sched);
- if(newprocs) {
- procresize(newprocs);
- newprocs = 0;
- } else
- procresize(runtime_gomaxprocs);
- runtime_sched.gcwaiting = 0;
-
- p1 = nil;
- while((p = pidleget()) != nil) {
- // procresize() puts p's with work at the beginning of the list.
- // Once we reach a p without a run queue, the rest don't have one either.
- if(p->runqhead == p->runqtail) {
- pidleput(p);
- break;
- }
- p->m = mget();
- p->link = p1;
- p1 = p;
- }
- if(runtime_sched.sysmonwait) {
- runtime_sched.sysmonwait = false;
- runtime_notewakeup(&runtime_sched.sysmonnote);
- }
- runtime_unlock(&runtime_sched);
-
- while(p1) {
- p = p1;
- p1 = p1->link;
- if(p->m) {
- mp = p->m;
- p->m = nil;
- if(mp->nextp)
- runtime_throw("starttheworld: inconsistent mp->nextp");
- mp->nextp = p;
- runtime_notewakeup(&mp->park);
- } else {
- // Start M to run P. Do not start another M below.
- newm(nil, p);
- add = false;
- }
- }
-
- if(add) {
- // If GC could have used another helper proc, start one now,
- // in the hope that it will be available next time.
- // It would have been even better to start it before the collection,
- // but doing so requires allocating memory, so it's tricky to
- // coordinate. This lazy approach works out in practice:
- // we don't mind if the first couple gc rounds don't have quite
- // the maximum number of procs.
- newm(mhelpgc, nil);
- }
- m->locks--;
-}
-
// Called to start an M.
void*
runtime_mstart(void* mp)
{
+ M *m;
+ G *gp;
+
m = (M*)mp;
g = m->g0;
+ g->m = m;
+ gp = g;
initcontext();
- g->entry = nil;
- g->param = nil;
+ gp->entry = nil;
+ gp->param = nil;
// Record top of stack for use by mcall.
// Once we call schedule we're never coming back,
// so other calls can reuse this stack space.
#ifdef USING_SPLIT_STACK
- __splitstack_getcontext(&g->stack_context[0]);
+ __splitstack_getcontext(&g->stackcontext[0]);
#else
- g->gcinitial_sp = &mp;
- // Setting gcstack_size to 0 is a marker meaning that gcinitial_sp
+ gp->gcinitialsp = &mp;
+ // Setting gcstacksize to 0 is a marker meaning that gcinitialsp
// is the top of the stack, not the bottom.
- g->gcstack_size = 0;
- g->gcnext_sp = &mp;
+ gp->gcstacksize = 0;
+ gp->gcnextsp = &mp;
#endif
- getcontext(&g->context);
+ getcontext(ucontext_arg(&gp->context[0]));
+
+ if(gp->traceback != nil)
+ gtraceback(gp);
- if(g->entry != nil) {
+ if(gp->entry != nil) {
// Got here from mcall.
- void (*pfn)(G*) = (void (*)(G*))g->entry;
- G* gp = (G*)g->param;
- pfn(gp);
+ void (*pfn)(G*) = (void (*)(G*))gp->entry;
+ G* gp1 = (G*)gp->param;
+ gp->entry = nil;
+ gp->param = nil;
+ pfn(gp1);
*(int*)0x21 = 0x21;
}
runtime_minit();
@@ -1088,23 +712,25 @@ runtime_mstart(void* mp)
// Install signal handlers; after minit so that minit can
// prepare the thread to be able to handle the signals.
if(m == &runtime_m0) {
- if(runtime_iscgo && !runtime_cgoHasExtraM) {
- runtime_cgoHasExtraM = true;
- runtime_newextram();
- runtime_needextram = 0;
+ if(runtime_iscgo) {
+ bool* cgoHasExtraM = runtime_getCgoHasExtraM();
+ if(!*cgoHasExtraM) {
+ *cgoHasExtraM = true;
+ runtime_newextram();
+ }
}
runtime_initsig(false);
}
if(m->mstartfn)
- m->mstartfn();
+ ((void (*)(void))m->mstartfn)();
if(m->helpgc) {
m->helpgc = 0;
stopm();
} else if(m != &runtime_m0) {
- acquirep(m->nextp);
- m->nextp = nil;
+ acquirep((P*)m->nextp);
+ m->nextp = 0;
}
schedule();
@@ -1124,15 +750,18 @@ struct CgoThreadStart
void (*fn)(void);
};
+M* runtime_allocm(P*, bool, byte**, uintptr*)
+ __asm__(GOSYM_PREFIX "runtime.allocm");
+
// Allocate a new m unassociated with any thread.
// Can use p for allocation context if needed.
M*
-runtime_allocm(P *p, int32 stacksize, byte** ret_g0_stack, size_t* ret_g0_stacksize)
+runtime_allocm(P *p, bool allocatestack, byte** ret_g0_stack, uintptr* ret_g0_stacksize)
{
M *mp;
- m->locks++; // disable GC because it can be called from sysmon
- if(m->p == nil)
+ g->m->locks++; // disable GC because it can be called from sysmon
+ if(g->m->p == 0)
acquirep(p); // temporarily borrow p for mallocs in this function
#if 0
if(mtype == nil) {
@@ -1144,355 +773,106 @@ runtime_allocm(P *p, int32 stacksize, byte** ret_g0_stack, size_t* ret_g0_stacks
mp = runtime_mal(sizeof *mp);
mcommoninit(mp);
- mp->g0 = runtime_malg(stacksize, ret_g0_stack, ret_g0_stacksize);
+ mp->g0 = runtime_malg(allocatestack, false, ret_g0_stack, ret_g0_stacksize);
+ mp->g0->m = mp;
- if(p == m->p)
+ if(p == (P*)g->m->p)
releasep();
- m->locks--;
+ g->m->locks--;
return mp;
}
-static G*
-allocg(void)
-{
- G *gp;
- // static Type *gtype;
-
- // if(gtype == nil) {
- // Eface e;
- // runtime_gc_g_ptr(&e);
- // gtype = ((PtrType*)e.__type_descriptor)->__element_type;
- // }
- // gp = runtime_cnew(gtype);
- gp = runtime_malloc(sizeof(G));
- return gp;
-}
+void setGContext(void) __asm__ (GOSYM_PREFIX "runtime.setGContext");
-static M* lockextra(bool nilokay);
-static void unlockextra(M*);
-
-// needm is called when a cgo callback happens on a
-// thread without an m (a thread not created by Go).
-// In this case, needm is expected to find an m to use
-// and return with m, g initialized correctly.
-// Since m and g are not set now (likely nil, but see below)
-// needm is limited in what routines it can call. In particular
-// it can only call nosplit functions (textflag 7) and cannot
-// do any scheduling that requires an m.
-//
-// In order to avoid needing heavy lifting here, we adopt
-// the following strategy: there is a stack of available m's
-// that can be stolen. Using compare-and-swap
-// to pop from the stack has ABA races, so we simulate
-// a lock by doing an exchange (via casp) to steal the stack
-// head and replace the top pointer with MLOCKED (1).
-// This serves as a simple spin lock that we can use even
-// without an m. The thread that locks the stack in this way
-// unlocks the stack by storing a valid stack head pointer.
-//
-// In order to make sure that there is always an m structure
-// available to be stolen, we maintain the invariant that there
-// is always one more than needed. At the beginning of the
-// program (if cgo is in use) the list is seeded with a single m.
-// If needm finds that it has taken the last m off the list, its job
-// is - once it has installed its own m so that it can do things like
-// allocate memory - to create a spare m and put it on the list.
-//
-// Each of these extra m's also has a g0 and a curg that are
-// pressed into service as the scheduling stack and current
-// goroutine for the duration of the cgo callback.
-//
-// When the callback is done with the m, it calls dropm to
-// put the m back on the list.
-//
-// Unlike the gc toolchain, we start running on curg, since we are
-// just going to return and let the caller continue.
+// setGContext sets up a new goroutine context for the current g.
void
-runtime_needm(void)
+setGContext()
{
- M *mp;
-
- if(runtime_needextram) {
- // Can happen if C/C++ code calls Go from a global ctor.
- // Can not throw, because scheduler is not initialized yet.
- int rv __attribute__((unused));
- rv = runtime_write(2, "fatal error: cgo callback before cgo call\n",
- sizeof("fatal error: cgo callback before cgo call\n")-1);
- runtime_exit(1);
- }
+ int val;
+ G *gp;
- // Lock extra list, take head, unlock popped list.
- // nilokay=false is safe here because of the invariant above,
- // that the extra list always contains or will soon contain
- // at least one m.
- mp = lockextra(false);
-
- // Set needextram when we've just emptied the list,
- // so that the eventual call into cgocallbackg will
- // allocate a new m for the extra list. We delay the
- // allocation until then so that it can be done
- // after exitsyscall makes sure it is okay to be
- // running at all (that is, there's no garbage collection
- // running right now).
- mp->needextram = mp->schedlink == nil;
- unlockextra(mp->schedlink);
-
- // Install m and g (= m->curg).
- runtime_setmg(mp, mp->curg);
-
- // Initialize g's context as in mstart.
initcontext();
- g->status = Gsyscall;
- g->entry = nil;
- g->param = nil;
+ gp = g;
+ gp->entry = nil;
+ gp->param = nil;
#ifdef USING_SPLIT_STACK
- __splitstack_getcontext(&g->stack_context[0]);
+ __splitstack_getcontext(&gp->stackcontext[0]);
+ val = 0;
+ __splitstack_block_signals(&val, nil);
#else
- g->gcinitial_sp = &mp;
- g->gcstack = nil;
- g->gcstack_size = 0;
- g->gcnext_sp = &mp;
+ gp->gcinitialsp = &val;
+ gp->gcstack = nil;
+ gp->gcstacksize = 0;
+ gp->gcnextsp = &val;
#endif
- getcontext(&g->context);
+ getcontext(ucontext_arg(&gp->context[0]));
- if(g->entry != nil) {
+ if(gp->entry != nil) {
// Got here from mcall.
- void (*pfn)(G*) = (void (*)(G*))g->entry;
- G* gp = (G*)g->param;
- pfn(gp);
+ void (*pfn)(G*) = (void (*)(G*))gp->entry;
+ G* gp1 = (G*)gp->param;
+ gp->entry = nil;
+ gp->param = nil;
+ pfn(gp1);
*(int*)0x22 = 0x22;
}
-
- // Initialize this thread to use the m.
- runtime_minit();
-
-#ifdef USING_SPLIT_STACK
- {
- int dont_block_signals = 0;
- __splitstack_block_signals(&dont_block_signals, nil);
- }
-#endif
}
-// newextram allocates an m and puts it on the extra list.
-// It is called with a working local m, so that it can do things
-// like call schedlock and allocate.
-void
-runtime_newextram(void)
-{
- M *mp, *mnext;
- G *gp;
- byte *g0_sp, *sp;
- size_t g0_spsize, spsize;
-
- // Create extra goroutine locked to extra m.
- // The goroutine is the context in which the cgo callback will run.
- // The sched.pc will never be returned to, but setting it to
- // runtime.goexit makes clear to the traceback routines where
- // the goroutine stack ends.
- mp = runtime_allocm(nil, StackMin, &g0_sp, &g0_spsize);
- gp = runtime_malg(StackMin, &sp, &spsize);
- gp->status = Gdead;
- mp->curg = gp;
- mp->locked = LockInternal;
- mp->lockedg = gp;
- gp->lockedm = mp;
- gp->goid = runtime_xadd64(&runtime_sched.goidgen, 1);
- // put on allg for garbage collector
- allgadd(gp);
-
- // The context for gp will be set up in runtime_needm. But
- // here we need to set up the context for g0.
- getcontext(&mp->g0->context);
- mp->g0->context.uc_stack.ss_sp = g0_sp;
- mp->g0->context.uc_stack.ss_size = g0_spsize;
- makecontext(&mp->g0->context, kickoff, 0);
-
- // Add m to the extra list.
- mnext = lockextra(true);
- mp->schedlink = mnext;
- unlockextra(mp);
-}
+void makeGContext(G*, byte*, uintptr)
+ __asm__(GOSYM_PREFIX "runtime.makeGContext");
-// dropm is called when a cgo callback has called needm but is now
-// done with the callback and returning back into the non-Go thread.
-// It puts the current m back onto the extra list.
-//
-// The main expense here is the call to signalstack to release the
-// m's signal stack, and then the call to needm on the next callback
-// from this thread. It is tempting to try to save the m for next time,
-// which would eliminate both these costs, but there might not be
-// a next time: the current thread (which Go does not control) might exit.
-// If we saved the m for that thread, there would be an m leak each time
-// such a thread exited. Instead, we acquire and release an m on each
-// call. These should typically not be scheduling operations, just a few
-// atomics, so the cost should be small.
-//
-// TODO(rsc): An alternative would be to allocate a dummy pthread per-thread
-// variable using pthread_key_create. Unlike the pthread keys we already use
-// on OS X, this dummy key would never be read by Go code. It would exist
-// only so that we could register at thread-exit-time destructor.
-// That destructor would put the m back onto the extra list.
-// This is purely a performance optimization. The current version,
-// in which dropm happens on each cgo call, is still correct too.
-// We may have to keep the current version on systems with cgo
-// but without pthreads, like Windows.
+// makeGContext makes a new context for a g.
void
-runtime_dropm(void)
-{
- M *mp, *mnext;
-
- // Undo whatever initialization minit did during needm.
- runtime_unminit();
+makeGContext(G* gp, byte* sp, uintptr spsize) {
+ ucontext_t *uc;
- // Clear m and g, and return m to the extra list.
- // After the call to setmg we can only call nosplit functions.
- mp = m;
- runtime_setmg(nil, nil);
-
- mp->curg->status = Gdead;
- mp->curg->gcstack = nil;
- mp->curg->gcnext_sp = nil;
-
- mnext = lockextra(true);
- mp->schedlink = mnext;
- unlockextra(mp);
-}
-
-#define MLOCKED ((M*)1)
-
-// lockextra locks the extra list and returns the list head.
-// The caller must unlock the list by storing a new list head
-// to runtime.extram. If nilokay is true, then lockextra will
-// return a nil list head if that's what it finds. If nilokay is false,
-// lockextra will keep waiting until the list head is no longer nil.
-static M*
-lockextra(bool nilokay)
-{
- M *mp;
- void (*yield)(void);
-
- for(;;) {
- mp = runtime_atomicloadp(&runtime_extram);
- if(mp == MLOCKED) {
- yield = runtime_osyield;
- yield();
- continue;
- }
- if(mp == nil && !nilokay) {
- runtime_usleep(1);
- continue;
- }
- if(!runtime_casp(&runtime_extram, mp, MLOCKED)) {
- yield = runtime_osyield;
- yield();
- continue;
- }
- break;
- }
- return mp;
-}
-
-static void
-unlockextra(M *mp)
-{
- runtime_atomicstorep(&runtime_extram, mp);
-}
-
-static int32
-countextra()
-{
- M *mp, *mc;
- int32 c;
-
- for(;;) {
- mp = runtime_atomicloadp(&runtime_extram);
- if(mp == MLOCKED) {
- runtime_osyield();
- continue;
- }
- if(!runtime_casp(&runtime_extram, mp, MLOCKED)) {
- runtime_osyield();
- continue;
- }
- c = 0;
- for(mc = mp; mc != nil; mc = mc->schedlink)
- c++;
- runtime_atomicstorep(&runtime_extram, mp);
- return c;
- }
+ uc = ucontext_arg(&gp->context[0]);
+ getcontext(uc);
+ uc->uc_stack.ss_sp = sp;
+ uc->uc_stack.ss_size = (size_t)spsize;
+ makecontext(uc, kickoff, 0);
}
// Create a new m. It will start off with a call to fn, or else the scheduler.
-static void
+void
newm(void(*fn)(void), P *p)
{
M *mp;
- mp = runtime_allocm(p, -1, nil, nil);
- mp->nextp = p;
- mp->mstartfn = fn;
+ mp = runtime_allocm(p, false, nil, nil);
+ mp->nextp = (uintptr)p;
+ mp->mstartfn = (uintptr)(void*)fn;
runtime_newosproc(mp);
}
-// Stops execution of the current m until new work is available.
-// Returns with acquired P.
-static void
-stopm(void)
-{
- if(m->locks)
- runtime_throw("stopm holding locks");
- if(m->p)
- runtime_throw("stopm holding p");
- if(m->spinning) {
- m->spinning = false;
- runtime_xadd(&runtime_sched.nmspinning, -1);
- }
-
-retry:
- runtime_lock(&runtime_sched);
- mput(m);
- runtime_unlock(&runtime_sched);
- runtime_notesleep(&m->park);
- runtime_noteclear(&m->park);
- if(m->helpgc) {
- runtime_gchelper();
- m->helpgc = 0;
- m->mcache = nil;
- goto retry;
- }
- acquirep(m->nextp);
- m->nextp = nil;
-}
-
static void
mspinning(void)
{
- m->spinning = true;
+ g->m->spinning = true;
}
// Schedules some M to run the p (creates an M if necessary).
// If p==nil, tries to get an idle P, if no idle P's does nothing.
-static void
+void
startm(P *p, bool spinning)
{
M *mp;
void (*fn)(void);
- runtime_lock(&runtime_sched);
+ runtime_lock(&runtime_sched->lock);
if(p == nil) {
p = pidleget();
if(p == nil) {
- runtime_unlock(&runtime_sched);
+ runtime_unlock(&runtime_sched->lock);
if(spinning)
- runtime_xadd(&runtime_sched.nmspinning, -1);
+ runtime_xadd(&runtime_sched->nmspinning, -1);
return;
}
}
mp = mget();
- runtime_unlock(&runtime_sched);
+ runtime_unlock(&runtime_sched->lock);
if(mp == nil) {
fn = nil;
if(spinning)
@@ -1504,369 +884,39 @@ startm(P *p, bool spinning)
runtime_throw("startm: m is spinning");
if(mp->nextp)
runtime_throw("startm: m has p");
- mp->spinning = spinning;
- mp->nextp = p;
- runtime_notewakeup(&mp->park);
-}
-
-// Hands off P from syscall or locked M.
-static void
-handoffp(P *p)
-{
- // if it has local work, start it straight away
- if(p->runqhead != p->runqtail || runtime_sched.runqsize) {
- startm(p, false);
- return;
- }
- // no local work, check that there are no spinning/idle M's,
- // otherwise our help is not required
- if(runtime_atomicload(&runtime_sched.nmspinning) + runtime_atomicload(&runtime_sched.npidle) == 0 && // TODO: fast atomic
- runtime_cas(&runtime_sched.nmspinning, 0, 1)) {
- startm(p, true);
- return;
- }
- runtime_lock(&runtime_sched);
- if(runtime_sched.gcwaiting) {
- p->status = Pgcstop;
- if(--runtime_sched.stopwait == 0)
- runtime_notewakeup(&runtime_sched.stopnote);
- runtime_unlock(&runtime_sched);
- return;
- }
- if(runtime_sched.runqsize) {
- runtime_unlock(&runtime_sched);
- startm(p, false);
- return;
- }
- // If this is the last running P and nobody is polling network,
- // need to wakeup another M to poll network.
- if(runtime_sched.npidle == (uint32)runtime_gomaxprocs-1 && runtime_atomicload64(&runtime_sched.lastpoll) != 0) {
- runtime_unlock(&runtime_sched);
- startm(p, false);
- return;
- }
- pidleput(p);
- runtime_unlock(&runtime_sched);
-}
-
-// Tries to add one more P to execute G's.
-// Called when a G is made runnable (newproc, ready).
-static void
-wakep(void)
-{
- // be conservative about spinning threads
- if(!runtime_cas(&runtime_sched.nmspinning, 0, 1))
- return;
- startm(nil, true);
-}
-
-// Stops execution of the current m that is locked to a g until the g is runnable again.
-// Returns with acquired P.
-static void
-stoplockedm(void)
-{
- P *p;
-
- if(m->lockedg == nil || m->lockedg->lockedm != m)
- runtime_throw("stoplockedm: inconsistent locking");
- if(m->p) {
- // Schedule another M to run this p.
- p = releasep();
- handoffp(p);
+ if(spinning && !runqempty(p)) {
+ runtime_throw("startm: p has runnable gs");
}
- incidlelocked(1);
- // Wait until another thread schedules lockedg again.
- runtime_notesleep(&m->park);
- runtime_noteclear(&m->park);
- if(m->lockedg->status != Grunnable)
- runtime_throw("stoplockedm: not runnable");
- acquirep(m->nextp);
- m->nextp = nil;
-}
-
-// Schedules the locked m to run the locked gp.
-static void
-startlockedm(G *gp)
-{
- M *mp;
- P *p;
-
- mp = gp->lockedm;
- if(mp == m)
- runtime_throw("startlockedm: locked to me");
- if(mp->nextp)
- runtime_throw("startlockedm: m has p");
- // directly handoff current P to the locked m
- incidlelocked(-1);
- p = releasep();
- mp->nextp = p;
+ mp->spinning = spinning;
+ mp->nextp = (uintptr)p;
runtime_notewakeup(&mp->park);
- stopm();
-}
-
-// Stops the current m for stoptheworld.
-// Returns when the world is restarted.
-static void
-gcstopm(void)
-{
- P *p;
-
- if(!runtime_sched.gcwaiting)
- runtime_throw("gcstopm: not waiting for gc");
- if(m->spinning) {
- m->spinning = false;
- runtime_xadd(&runtime_sched.nmspinning, -1);
- }
- p = releasep();
- runtime_lock(&runtime_sched);
- p->status = Pgcstop;
- if(--runtime_sched.stopwait == 0)
- runtime_notewakeup(&runtime_sched.stopnote);
- runtime_unlock(&runtime_sched);
- stopm();
-}
-
-// Schedules gp to run on the current M.
-// Never returns.
-static void
-execute(G *gp)
-{
- int32 hz;
-
- if(gp->status != Grunnable) {
- runtime_printf("execute: bad g status %d\n", gp->status);
- runtime_throw("execute: bad g status");
- }
- gp->status = Grunning;
- gp->waitsince = 0;
- m->p->schedtick++;
- m->curg = gp;
- gp->m = m;
-
- // Check whether the profiler needs to be turned on or off.
- hz = runtime_sched.profilehz;
- if(m->profilehz != hz)
- runtime_resetcpuprofiler(hz);
-
- runtime_gogo(gp);
}
-// Finds a runnable goroutine to execute.
-// Tries to steal from other P's, get g from global queue, poll network.
-static G*
-findrunnable(void)
-{
- G *gp;
- P *p;
- int32 i;
-
-top:
- if(runtime_sched.gcwaiting) {
- gcstopm();
- goto top;
- }
- if(runtime_fingwait && runtime_fingwake && (gp = runtime_wakefing()) != nil)
- runtime_ready(gp);
- // local runq
- gp = runqget(m->p);
- if(gp)
- return gp;
- // global runq
- if(runtime_sched.runqsize) {
- runtime_lock(&runtime_sched);
- gp = globrunqget(m->p, 0);
- runtime_unlock(&runtime_sched);
- if(gp)
- return gp;
- }
- // poll network
- gp = runtime_netpoll(false); // non-blocking
- if(gp) {
- injectglist(gp->schedlink);
- gp->status = Grunnable;
- return gp;
- }
- // If number of spinning M's >= number of busy P's, block.
- // This is necessary to prevent excessive CPU consumption
- // when GOMAXPROCS>>1 but the program parallelism is low.
- if(!m->spinning && 2 * runtime_atomicload(&runtime_sched.nmspinning) >= runtime_gomaxprocs - runtime_atomicload(&runtime_sched.npidle)) // TODO: fast atomic
- goto stop;
- if(!m->spinning) {
- m->spinning = true;
- runtime_xadd(&runtime_sched.nmspinning, 1);
- }
- // random steal from other P's
- for(i = 0; i < 2*runtime_gomaxprocs; i++) {
- if(runtime_sched.gcwaiting)
- goto top;
- p = runtime_allp[runtime_fastrand1()%runtime_gomaxprocs];
- if(p == m->p)
- gp = runqget(p);
- else
- gp = runqsteal(m->p, p);
- if(gp)
- return gp;
- }
-stop:
- // return P and block
- runtime_lock(&runtime_sched);
- if(runtime_sched.gcwaiting) {
- runtime_unlock(&runtime_sched);
- goto top;
- }
- if(runtime_sched.runqsize) {
- gp = globrunqget(m->p, 0);
- runtime_unlock(&runtime_sched);
- return gp;
- }
- p = releasep();
- pidleput(p);
- runtime_unlock(&runtime_sched);
- if(m->spinning) {
- m->spinning = false;
- runtime_xadd(&runtime_sched.nmspinning, -1);
- }
- // check all runqueues once again
- for(i = 0; i < runtime_gomaxprocs; i++) {
- p = runtime_allp[i];
- if(p && p->runqhead != p->runqtail) {
- runtime_lock(&runtime_sched);
- p = pidleget();
- runtime_unlock(&runtime_sched);
- if(p) {
- acquirep(p);
- goto top;
- }
- break;
- }
- }
- // poll network
- if(runtime_xchg64(&runtime_sched.lastpoll, 0) != 0) {
- if(m->p)
- runtime_throw("findrunnable: netpoll with p");
- if(m->spinning)
- runtime_throw("findrunnable: netpoll with spinning");
- gp = runtime_netpoll(true); // block until new work is available
- runtime_atomicstore64(&runtime_sched.lastpoll, runtime_nanotime());
- if(gp) {
- runtime_lock(&runtime_sched);
- p = pidleget();
- runtime_unlock(&runtime_sched);
- if(p) {
- acquirep(p);
- injectglist(gp->schedlink);
- gp->status = Grunnable;
- return gp;
- }
- injectglist(gp);
- }
- }
- stopm();
- goto top;
-}
-
-static void
-resetspinning(void)
-{
- int32 nmspinning;
-
- if(m->spinning) {
- m->spinning = false;
- nmspinning = runtime_xadd(&runtime_sched.nmspinning, -1);
- if(nmspinning < 0)
- runtime_throw("findrunnable: negative nmspinning");
- } else
- nmspinning = runtime_atomicload(&runtime_sched.nmspinning);
-
- // M wakeup policy is deliberately somewhat conservative (see nmspinning handling),
- // so see if we need to wakeup another P here.
- if (nmspinning == 0 && runtime_atomicload(&runtime_sched.npidle) > 0)
- wakep();
-}
-
-// Injects the list of runnable G's into the scheduler.
-// Can run concurrently with GC.
-static void
-injectglist(G *glist)
+// Puts the current goroutine into a waiting state and calls unlockf.
+// If unlockf returns false, the goroutine is resumed.
+void
+runtime_park(bool(*unlockf)(G*, void*), void *lock, const char *reason)
{
- int32 n;
- G *gp;
-
- if(glist == nil)
- return;
- runtime_lock(&runtime_sched);
- for(n = 0; glist; n++) {
- gp = glist;
- glist = gp->schedlink;
- gp->status = Grunnable;
- globrunqput(gp);
- }
- runtime_unlock(&runtime_sched);
-
- for(; n && runtime_sched.npidle; n--)
- startm(nil, false);
+ if(g->atomicstatus != _Grunning)
+ runtime_throw("bad g status");
+ g->m->waitlock = lock;
+ g->m->waitunlockf = unlockf;
+ g->waitreason = runtime_gostringnocopy((const byte*)reason);
+ runtime_mcall(park0);
}
-// One round of scheduler: find a runnable goroutine and execute it.
-// Never returns.
-static void
-schedule(void)
-{
- G *gp;
- uint32 tick;
-
- if(m->locks)
- runtime_throw("schedule: holding locks");
-
-top:
- if(runtime_sched.gcwaiting) {
- gcstopm();
- goto top;
- }
+void gopark(FuncVal *, void *, String, byte, int)
+ __asm__ ("runtime.gopark");
- gp = nil;
- // Check the global runnable queue once in a while to ensure fairness.
- // Otherwise two goroutines can completely occupy the local runqueue
- // by constantly respawning each other.
- tick = m->p->schedtick;
- // This is a fancy way to say tick%61==0,
- // it uses 2 MUL instructions instead of a single DIV and so is faster on modern processors.
- if(tick - (((uint64)tick*0x4325c53fu)>>36)*61 == 0 && runtime_sched.runqsize > 0) {
- runtime_lock(&runtime_sched);
- gp = globrunqget(m->p, 1);
- runtime_unlock(&runtime_sched);
- if(gp)
- resetspinning();
- }
- if(gp == nil) {
- gp = runqget(m->p);
- if(gp && m->spinning)
- runtime_throw("schedule: spinning with local work");
- }
- if(gp == nil) {
- gp = findrunnable(); // blocks until work is available
- resetspinning();
- }
-
- if(gp->lockedm) {
- // Hands off own p to the locked m,
- // then blocks waiting for a new p.
- startlockedm(gp);
- goto top;
- }
-
- execute(gp);
-}
-
-// Puts the current goroutine into a waiting state and calls unlockf.
-// If unlockf returns false, the goroutine is resumed.
void
-runtime_park(bool(*unlockf)(G*, void*), void *lock, const char *reason)
+gopark(FuncVal *unlockf, void *lock, String reason,
+ byte traceEv __attribute__ ((unused)),
+ int traceskip __attribute__ ((unused)))
{
- if(g->status != Grunning)
+ if(g->atomicstatus != _Grunning)
runtime_throw("bad g status");
- m->waitlock = lock;
- m->waitunlockf = unlockf;
+ g->m->waitlock = lock;
+ g->m->waitunlockf = unlockf == nil ? nil : (void*)unlockf->fn;
g->waitreason = reason;
runtime_mcall(park0);
}
@@ -1887,27 +937,44 @@ runtime_parkunlock(Lock *lock, const char *reason)
runtime_park(parkunlock, lock, reason);
}
+void goparkunlock(Lock *, String, byte, int)
+ __asm__ (GOSYM_PREFIX "runtime.goparkunlock");
+
+void
+goparkunlock(Lock *lock, String reason, byte traceEv __attribute__ ((unused)),
+ int traceskip __attribute__ ((unused)))
+{
+ if(g->atomicstatus != _Grunning)
+ runtime_throw("bad g status");
+ g->m->waitlock = lock;
+ g->m->waitunlockf = parkunlock;
+ g->waitreason = reason;
+ runtime_mcall(park0);
+}
+
// runtime_park continuation on g0.
static void
park0(G *gp)
{
+ M *m;
bool ok;
- gp->status = Gwaiting;
+ m = g->m;
+ gp->atomicstatus = _Gwaiting;
gp->m = nil;
m->curg = nil;
if(m->waitunlockf) {
- ok = m->waitunlockf(gp, m->waitlock);
+ ok = ((bool (*)(G*, void*))m->waitunlockf)(gp, m->waitlock);
m->waitunlockf = nil;
m->waitlock = nil;
if(!ok) {
- gp->status = Grunnable;
- execute(gp); // Schedule it back, never returns.
+ gp->atomicstatus = _Grunnable;
+ execute(gp, true); // Schedule it back, never returns.
}
}
if(m->lockedg) {
stoplockedm();
- execute(gp); // Never returns.
+ execute(gp, true); // Never returns.
}
schedule();
}
@@ -1916,7 +983,7 @@ park0(G *gp)
void
runtime_gosched(void)
{
- if(g->status != Grunning)
+ if(g->atomicstatus != _Grunning)
runtime_throw("bad g status");
runtime_mcall(runtime_gosched0);
}
@@ -1925,15 +992,18 @@ runtime_gosched(void)
void
runtime_gosched0(G *gp)
{
- gp->status = Grunnable;
+ M *m;
+
+ m = g->m;
+ gp->atomicstatus = _Grunnable;
gp->m = nil;
m->curg = nil;
- runtime_lock(&runtime_sched);
+ runtime_lock(&runtime_sched->lock);
globrunqput(gp);
- runtime_unlock(&runtime_sched);
+ runtime_unlock(&runtime_sched->lock);
if(m->lockedg) {
stoplockedm();
- execute(gp); // Never returns.
+ execute(gp, true); // Never returns.
}
schedule();
}
@@ -1942,38 +1012,43 @@ runtime_gosched0(G *gp)
// Need to mark it as nosplit, because it runs with sp > stackbase (as runtime_lessstack).
// Since it does not return it does not matter. But if it is preempted
// at the split stack check, GC will complain about inconsistent sp.
-void runtime_goexit(void) __attribute__ ((noinline));
+void runtime_goexit1(void) __attribute__ ((noinline));
void
-runtime_goexit(void)
+runtime_goexit1(void)
{
- if(g->status != Grunning)
+ if(g->atomicstatus != _Grunning)
runtime_throw("bad g status");
runtime_mcall(goexit0);
}
-// runtime_goexit continuation on g0.
+// runtime_goexit1 continuation on g0.
static void
goexit0(G *gp)
{
- gp->status = Gdead;
+ M *m;
+
+ m = g->m;
+ gp->atomicstatus = _Gdead;
gp->entry = nil;
gp->m = nil;
gp->lockedm = nil;
gp->paniconfault = 0;
- gp->defer = nil; // should be true already but just in case.
- gp->panic = nil; // non-nil for Goexit during panic. points at stack-allocated data.
- gp->writenbuf = 0;
- gp->writebuf = nil;
- gp->waitreason = nil;
+ gp->_defer = nil; // should be true already but just in case.
+ gp->_panic = nil; // non-nil for Goexit during panic. points at stack-allocated data.
+ gp->writebuf.__values = nil;
+ gp->writebuf.__count = 0;
+ gp->writebuf.__capacity = 0;
+ gp->waitreason = runtime_gostringnocopy(nil);
gp->param = nil;
+ m->curg->m = nil;
m->curg = nil;
m->lockedg = nil;
- if(m->locked & ~LockExternal) {
+ if(m->locked & ~_LockExternal) {
runtime_printf("invalid m->locked = %d\n", m->locked);
runtime_throw("internal lockOSThread error");
}
m->locked = 0;
- gfput(m->p, gp);
+ gfput((P*)m->p, gp);
schedule();
}
@@ -1986,15 +1061,16 @@ goexit0(G *gp)
// make g->sched refer to the caller's stack segment, because
// entersyscall is going to return immediately after.
-void runtime_entersyscall(void) __attribute__ ((no_split_stack));
-static void doentersyscall(void) __attribute__ ((no_split_stack, noinline));
+void runtime_entersyscall(int32) __attribute__ ((no_split_stack));
+static void doentersyscall(uintptr, uintptr)
+ __attribute__ ((no_split_stack, noinline));
void
-runtime_entersyscall()
+runtime_entersyscall(int32 dummy __attribute__ ((unused)))
{
// Save the registers in the g structure so that any pointers
// held in registers will be seen by the garbage collector.
- getcontext(&g->gcregs);
+ getcontext(ucontext_arg(&g->gcregs[0]));
// Do the work in a separate function, so that this function
// doesn't save any registers on its own stack. If this
@@ -2005,84 +1081,99 @@ runtime_entersyscall()
// callee-saved registers to access the TLS variable g. We
// don't want to put the ucontext_t on the stack because it is
// large and we can not split the stack here.
- doentersyscall();
+ doentersyscall((uintptr)runtime_getcallerpc(&dummy),
+ (uintptr)runtime_getcallersp(&dummy));
}
static void
-doentersyscall()
+doentersyscall(uintptr pc, uintptr sp)
{
- // Disable preemption because during this function g is in Gsyscall status,
+ // Disable preemption because during this function g is in _Gsyscall status,
// but can have inconsistent g->sched, do not let GC observe it.
- m->locks++;
+ g->m->locks++;
// Leave SP around for GC and traceback.
#ifdef USING_SPLIT_STACK
- g->gcstack = __splitstack_find(nil, nil, &g->gcstack_size,
- &g->gcnext_segment, &g->gcnext_sp,
- &g->gcinitial_sp);
+ {
+ size_t gcstacksize;
+ g->gcstack = __splitstack_find(nil, nil, &gcstacksize,
+ &g->gcnextsegment, &g->gcnextsp,
+ &g->gcinitialsp);
+ g->gcstacksize = (uintptr)gcstacksize;
+ }
#else
{
void *v;
- g->gcnext_sp = (byte *) &v;
+ g->gcnextsp = (byte *) &v;
}
#endif
- g->status = Gsyscall;
+ g->syscallsp = sp;
+ g->syscallpc = pc;
- if(runtime_atomicload(&runtime_sched.sysmonwait)) { // TODO: fast atomic
- runtime_lock(&runtime_sched);
- if(runtime_atomicload(&runtime_sched.sysmonwait)) {
- runtime_atomicstore(&runtime_sched.sysmonwait, 0);
- runtime_notewakeup(&runtime_sched.sysmonnote);
+ g->atomicstatus = _Gsyscall;
+
+ if(runtime_atomicload(&runtime_sched->sysmonwait)) { // TODO: fast atomic
+ runtime_lock(&runtime_sched->lock);
+ if(runtime_atomicload(&runtime_sched->sysmonwait)) {
+ runtime_atomicstore(&runtime_sched->sysmonwait, 0);
+ runtime_notewakeup(&runtime_sched->sysmonnote);
}
- runtime_unlock(&runtime_sched);
+ runtime_unlock(&runtime_sched->lock);
}
- m->mcache = nil;
- m->p->m = nil;
- runtime_atomicstore(&m->p->status, Psyscall);
- if(runtime_atomicload(&runtime_sched.gcwaiting)) {
- runtime_lock(&runtime_sched);
- if (runtime_sched.stopwait > 0 && runtime_cas(&m->p->status, Psyscall, Pgcstop)) {
- if(--runtime_sched.stopwait == 0)
- runtime_notewakeup(&runtime_sched.stopnote);
+ g->m->mcache = nil;
+ ((P*)(g->m->p))->m = 0;
+ runtime_atomicstore(&((P*)g->m->p)->status, _Psyscall);
+ if(runtime_atomicload(&runtime_sched->gcwaiting)) {
+ runtime_lock(&runtime_sched->lock);
+ if (runtime_sched->stopwait > 0 && runtime_cas(&((P*)g->m->p)->status, _Psyscall, _Pgcstop)) {
+ if(--runtime_sched->stopwait == 0)
+ runtime_notewakeup(&runtime_sched->stopnote);
}
- runtime_unlock(&runtime_sched);
+ runtime_unlock(&runtime_sched->lock);
}
- m->locks--;
+ g->m->locks--;
}
// The same as runtime_entersyscall(), but with a hint that the syscall is blocking.
void
-runtime_entersyscallblock(void)
+runtime_entersyscallblock(int32 dummy __attribute__ ((unused)))
{
P *p;
- m->locks++; // see comment in entersyscall
+ g->m->locks++; // see comment in entersyscall
// Leave SP around for GC and traceback.
#ifdef USING_SPLIT_STACK
- g->gcstack = __splitstack_find(nil, nil, &g->gcstack_size,
- &g->gcnext_segment, &g->gcnext_sp,
- &g->gcinitial_sp);
+ {
+ size_t gcstacksize;
+ g->gcstack = __splitstack_find(nil, nil, &gcstacksize,
+ &g->gcnextsegment, &g->gcnextsp,
+ &g->gcinitialsp);
+ g->gcstacksize = (uintptr)gcstacksize;
+ }
#else
- g->gcnext_sp = (byte *) &p;
+ g->gcnextsp = (byte *) &p;
#endif
// Save the registers in the g structure so that any pointers
// held in registers will be seen by the garbage collector.
- getcontext(&g->gcregs);
+ getcontext(ucontext_arg(&g->gcregs[0]));
+
+ g->syscallpc = (uintptr)runtime_getcallerpc(&dummy);
+ g->syscallsp = (uintptr)runtime_getcallersp(&dummy);
- g->status = Gsyscall;
+ g->atomicstatus = _Gsyscall;
p = releasep();
handoffp(p);
if(g->isbackground) // do not consider blocked scavenger for deadlock detection
incidlelocked(1);
- m->locks--;
+ g->m->locks--;
}
// The goroutine g exited its system call.
@@ -2090,33 +1181,34 @@ runtime_entersyscallblock(void)
// This is called only from the go syscall library, not
// from the low-level system calls used by the runtime.
void
-runtime_exitsyscall(void)
+runtime_exitsyscall(int32 dummy __attribute__ ((unused)))
{
G *gp;
- m->locks++; // see comment in entersyscall
-
gp = g;
+ gp->m->locks++; // see comment in entersyscall
+
if(gp->isbackground) // do not consider blocked scavenger for deadlock detection
incidlelocked(-1);
- g->waitsince = 0;
+ gp->waitsince = 0;
if(exitsyscallfast()) {
// There's a cpu for us, so we can run.
- m->p->syscalltick++;
- gp->status = Grunning;
+ ((P*)gp->m->p)->syscalltick++;
+ gp->atomicstatus = _Grunning;
// Garbage collector isn't running (since we are),
// so okay to clear gcstack and gcsp.
#ifdef USING_SPLIT_STACK
gp->gcstack = nil;
#endif
- gp->gcnext_sp = nil;
- runtime_memclr(&gp->gcregs, sizeof gp->gcregs);
- m->locks--;
+ gp->gcnextsp = nil;
+ runtime_memclr(&gp->gcregs[0], sizeof gp->gcregs);
+ gp->syscallsp = 0;
+ gp->m->locks--;
return;
}
- m->locks--;
+ gp->m->locks--;
// Call the scheduler.
runtime_mcall(exitsyscall0);
@@ -2130,42 +1222,47 @@ runtime_exitsyscall(void)
#ifdef USING_SPLIT_STACK
gp->gcstack = nil;
#endif
- gp->gcnext_sp = nil;
- runtime_memclr(&gp->gcregs, sizeof gp->gcregs);
+ gp->gcnextsp = nil;
+ runtime_memclr(&gp->gcregs[0], sizeof gp->gcregs);
+
+ gp->syscallsp = 0;
- // Don't refer to m again, we might be running on a different
- // thread after returning from runtime_mcall.
- runtime_m()->p->syscalltick++;
+ // Note that this gp->m might be different than the earlier
+ // gp->m after returning from runtime_mcall.
+ ((P*)gp->m->p)->syscalltick++;
}
static bool
exitsyscallfast(void)
{
+ G *gp;
P *p;
+ gp = g;
+
// Freezetheworld sets stopwait but does not retake P's.
- if(runtime_sched.stopwait) {
- m->p = nil;
+ if(runtime_sched->stopwait) {
+ gp->m->p = 0;
return false;
}
// Try to re-acquire the last P.
- if(m->p && m->p->status == Psyscall && runtime_cas(&m->p->status, Psyscall, Prunning)) {
+ if(gp->m->p && ((P*)gp->m->p)->status == _Psyscall && runtime_cas(&((P*)gp->m->p)->status, _Psyscall, _Prunning)) {
// There's a cpu for us, so we can run.
- m->mcache = m->p->mcache;
- m->p->m = m;
+ gp->m->mcache = ((P*)gp->m->p)->mcache;
+ ((P*)gp->m->p)->m = (uintptr)gp->m;
return true;
}
// Try to get any other idle P.
- m->p = nil;
- if(runtime_sched.pidle) {
- runtime_lock(&runtime_sched);
+ gp->m->p = 0;
+ if(runtime_sched->pidle) {
+ runtime_lock(&runtime_sched->lock);
p = pidleget();
- if(p && runtime_atomicload(&runtime_sched.sysmonwait)) {
- runtime_atomicstore(&runtime_sched.sysmonwait, 0);
- runtime_notewakeup(&runtime_sched.sysmonnote);
+ if(p && runtime_atomicload(&runtime_sched->sysmonwait)) {
+ runtime_atomicstore(&runtime_sched->sysmonwait, 0);
+ runtime_notewakeup(&runtime_sched->sysmonnote);
}
- runtime_unlock(&runtime_sched);
+ runtime_unlock(&runtime_sched->lock);
if(p) {
acquirep(p);
return true;
@@ -2179,75 +1276,93 @@ exitsyscallfast(void)
static void
exitsyscall0(G *gp)
{
+ M *m;
P *p;
- gp->status = Grunnable;
+ m = g->m;
+ gp->atomicstatus = _Grunnable;
gp->m = nil;
m->curg = nil;
- runtime_lock(&runtime_sched);
+ runtime_lock(&runtime_sched->lock);
p = pidleget();
if(p == nil)
globrunqput(gp);
- else if(runtime_atomicload(&runtime_sched.sysmonwait)) {
- runtime_atomicstore(&runtime_sched.sysmonwait, 0);
- runtime_notewakeup(&runtime_sched.sysmonnote);
+ else if(runtime_atomicload(&runtime_sched->sysmonwait)) {
+ runtime_atomicstore(&runtime_sched->sysmonwait, 0);
+ runtime_notewakeup(&runtime_sched->sysmonnote);
}
- runtime_unlock(&runtime_sched);
+ runtime_unlock(&runtime_sched->lock);
if(p) {
acquirep(p);
- execute(gp); // Never returns.
+ execute(gp, false); // Never returns.
}
if(m->lockedg) {
// Wait until another thread schedules gp and so m again.
stoplockedm();
- execute(gp); // Never returns.
+ execute(gp, false); // Never returns.
}
stopm();
schedule(); // Never returns.
}
-// Called from syscall package before fork.
-void syscall_runtime_BeforeFork(void)
- __asm__(GOSYM_PREFIX "syscall.runtime_BeforeFork");
+void syscall_entersyscall(void)
+ __asm__(GOSYM_PREFIX "syscall.Entersyscall");
+
+void syscall_entersyscall(void) __attribute__ ((no_split_stack));
+
void
-syscall_runtime_BeforeFork(void)
+syscall_entersyscall()
{
- // Fork can hang if preempted with signals frequently enough (see issue 5517).
- // Ensure that we stay on the same M where we disable profiling.
- runtime_m()->locks++;
- if(runtime_m()->profilehz != 0)
- runtime_resetcpuprofiler(0);
+ runtime_entersyscall(0);
}
-// Called from syscall package after fork in parent.
-void syscall_runtime_AfterFork(void)
- __asm__(GOSYM_PREFIX "syscall.runtime_AfterFork");
+void syscall_exitsyscall(void)
+ __asm__(GOSYM_PREFIX "syscall.Exitsyscall");
+
+void syscall_exitsyscall(void) __attribute__ ((no_split_stack));
+
void
-syscall_runtime_AfterFork(void)
+syscall_exitsyscall()
{
- int32 hz;
-
- hz = runtime_sched.profilehz;
- if(hz != 0)
- runtime_resetcpuprofiler(hz);
- runtime_m()->locks--;
+ runtime_exitsyscall(0);
}
// Allocate a new g, with a stack big enough for stacksize bytes.
G*
-runtime_malg(int32 stacksize, byte** ret_stack, size_t* ret_stacksize)
+runtime_malg(bool allocatestack, bool signalstack, byte** ret_stack, uintptr* ret_stacksize)
{
+ uintptr stacksize;
G *newg;
+ byte* unused_stack;
+ uintptr unused_stacksize;
+#if USING_SPLIT_STACK
+ int dont_block_signals = 0;
+ size_t ss_stacksize;
+#endif
+ if (ret_stack == nil) {
+ ret_stack = &unused_stack;
+ }
+ if (ret_stacksize == nil) {
+ ret_stacksize = &unused_stacksize;
+ }
newg = allocg();
- if(stacksize >= 0) {
-#if USING_SPLIT_STACK
- int dont_block_signals = 0;
+ if(allocatestack) {
+ stacksize = StackMin;
+ if(signalstack) {
+ stacksize = 32 * 1024; // OS X wants >= 8K, GNU/Linux >= 2K
+#ifdef SIGSTKSZ
+ if(stacksize < SIGSTKSZ)
+ stacksize = SIGSTKSZ;
+#endif
+ }
+#if USING_SPLIT_STACK
*ret_stack = __splitstack_makecontext(stacksize,
- &newg->stack_context[0],
- ret_stacksize);
- __splitstack_block_signals_context(&newg->stack_context[0],
+ &newg->stackcontext[0],
+ &ss_stacksize);
+ *ret_stacksize = (uintptr)ss_stacksize;
+ __splitstack_block_signals_context(&newg->stackcontext[0],
&dont_block_signals, nil);
#else
// In 64-bit mode, the maximum Go allocation space is
@@ -2257,7 +1372,7 @@ runtime_malg(int32 stacksize, byte** ret_stack, size_t* ret_stacksize)
// 32-bit mode, the Go allocation space is all of
// memory anyhow.
if(sizeof(void*) == 8) {
- void *p = runtime_SysAlloc(stacksize, &mstats.other_sys);
+ void *p = runtime_SysAlloc(stacksize, &mstats()->other_sys);
if(p == nil)
runtime_throw("runtime: cannot allocate memory for goroutine stack");
*ret_stack = (byte*)p;
@@ -2265,41 +1380,14 @@ runtime_malg(int32 stacksize, byte** ret_stack, size_t* ret_stacksize)
*ret_stack = runtime_mallocgc(stacksize, 0, FlagNoProfiling|FlagNoGC);
runtime_xadd(&runtime_stacks_sys, stacksize);
}
- *ret_stacksize = stacksize;
- newg->gcinitial_sp = *ret_stack;
- newg->gcstack_size = (size_t)stacksize;
+ *ret_stacksize = (uintptr)stacksize;
+ newg->gcinitialsp = *ret_stack;
+ newg->gcstacksize = (uintptr)stacksize;
#endif
}
return newg;
}
-/* For runtime package testing. */
-
-
-// Create a new g running fn with siz bytes of arguments.
-// Put it on the queue of g's waiting to run.
-// The compiler turns a go statement into a call to this.
-// Cannot split the stack because it assumes that the arguments
-// are available sequentially after &fn; they would not be
-// copied if a stack split occurred. It's OK for this to call
-// functions that split the stack.
-void runtime_testing_entersyscall(int32)
- __asm__ (GOSYM_PREFIX "runtime.entersyscall");
-void
-runtime_testing_entersyscall(int32 dummy __attribute__ ((unused)))
-{
- runtime_entersyscall();
-}
-
-void runtime_testing_exitsyscall(int32)
- __asm__ (GOSYM_PREFIX "runtime.exitsyscall");
-
-void
-runtime_testing_exitsyscall(int32 dummy __attribute__ ((unused)))
-{
- runtime_exitsyscall();
-}
-
G*
__go_go(void (*fn)(void*), void* arg)
{
@@ -2310,155 +1398,55 @@ __go_go(void (*fn)(void*), void* arg)
//runtime_printf("newproc1 %p %p narg=%d nret=%d\n", fn->fn, argp, narg, nret);
if(fn == nil) {
- m->throwing = -1; // do not dump full stacks
+ g->m->throwing = -1; // do not dump full stacks
runtime_throw("go of nil func value");
}
- m->locks++; // disable preemption because it can be holding p in a local var
+ g->m->locks++; // disable preemption because it can be holding p in a local var
- p = m->p;
+ p = (P*)g->m->p;
if((newg = gfget(p)) != nil) {
#ifdef USING_SPLIT_STACK
int dont_block_signals = 0;
- sp = __splitstack_resetcontext(&newg->stack_context[0],
+ sp = __splitstack_resetcontext(&newg->stackcontext[0],
&spsize);
- __splitstack_block_signals_context(&newg->stack_context[0],
+ __splitstack_block_signals_context(&newg->stackcontext[0],
&dont_block_signals, nil);
#else
- sp = newg->gcinitial_sp;
- spsize = newg->gcstack_size;
+ sp = newg->gcinitialsp;
+ spsize = newg->gcstacksize;
if(spsize == 0)
runtime_throw("bad spsize in __go_go");
- newg->gcnext_sp = sp;
+ newg->gcnextsp = sp;
#endif
+ newg->traceback = nil;
} else {
- newg = runtime_malg(StackMin, &sp, &spsize);
+ uintptr malsize;
+
+ newg = runtime_malg(true, false, &sp, &malsize);
+ spsize = (size_t)malsize;
+ newg->atomicstatus = _Gdead;
allgadd(newg);
}
newg->entry = (byte*)fn;
newg->param = arg;
newg->gopc = (uintptr)__builtin_return_address(0);
- newg->status = Grunnable;
+ newg->atomicstatus = _Grunnable;
if(p->goidcache == p->goidcacheend) {
- p->goidcache = runtime_xadd64(&runtime_sched.goidgen, GoidCacheBatch);
+ p->goidcache = runtime_xadd64(&runtime_sched->goidgen, GoidCacheBatch);
p->goidcacheend = p->goidcache + GoidCacheBatch;
}
newg->goid = p->goidcache++;
- {
- // Avoid warnings about variables clobbered by
- // longjmp.
- byte * volatile vsp = sp;
- size_t volatile vspsize = spsize;
- G * volatile vnewg = newg;
-
- getcontext(&vnewg->context);
- vnewg->context.uc_stack.ss_sp = vsp;
-#ifdef MAKECONTEXT_STACK_TOP
- vnewg->context.uc_stack.ss_sp += vspsize;
-#endif
- vnewg->context.uc_stack.ss_size = vspsize;
- makecontext(&vnewg->context, kickoff, 0);
-
- runqput(p, vnewg);
-
- if(runtime_atomicload(&runtime_sched.npidle) != 0 && runtime_atomicload(&runtime_sched.nmspinning) == 0 && fn != runtime_main) // TODO: fast atomic
- wakep();
- m->locks--;
- return vnewg;
- }
-}
-
-static void
-allgadd(G *gp)
-{
- G **new;
- uintptr cap;
-
- runtime_lock(&allglock);
- if(runtime_allglen >= allgcap) {
- cap = 4096/sizeof(new[0]);
- if(cap < 2*allgcap)
- cap = 2*allgcap;
- new = runtime_malloc(cap*sizeof(new[0]));
- if(new == nil)
- runtime_throw("runtime: cannot allocate memory");
- if(runtime_allg != nil) {
- runtime_memmove(new, runtime_allg, runtime_allglen*sizeof(new[0]));
- runtime_free(runtime_allg);
- }
- runtime_allg = new;
- allgcap = cap;
- }
- runtime_allg[runtime_allglen++] = gp;
- runtime_unlock(&allglock);
-}
-
-// Put on gfree list.
-// If local list is too long, transfer a batch to the global list.
-static void
-gfput(P *p, G *gp)
-{
- gp->schedlink = p->gfree;
- p->gfree = gp;
- p->gfreecnt++;
- if(p->gfreecnt >= 64) {
- runtime_lock(&runtime_sched.gflock);
- while(p->gfreecnt >= 32) {
- p->gfreecnt--;
- gp = p->gfree;
- p->gfree = gp->schedlink;
- gp->schedlink = runtime_sched.gfree;
- runtime_sched.gfree = gp;
- }
- runtime_unlock(&runtime_sched.gflock);
- }
-}
+ makeGContext(newg, sp, (uintptr)spsize);
-// Get from gfree list.
-// If local list is empty, grab a batch from global list.
-static G*
-gfget(P *p)
-{
- G *gp;
+ runqput(p, newg, true);
-retry:
- gp = p->gfree;
- if(gp == nil && runtime_sched.gfree) {
- runtime_lock(&runtime_sched.gflock);
- while(p->gfreecnt < 32 && runtime_sched.gfree) {
- p->gfreecnt++;
- gp = runtime_sched.gfree;
- runtime_sched.gfree = gp->schedlink;
- gp->schedlink = p->gfree;
- p->gfree = gp;
- }
- runtime_unlock(&runtime_sched.gflock);
- goto retry;
- }
- if(gp) {
- p->gfree = gp->schedlink;
- p->gfreecnt--;
- }
- return gp;
-}
-
-// Purge all cached G's from gfree list to the global list.
-static void
-gfpurge(P *p)
-{
- G *gp;
-
- runtime_lock(&runtime_sched.gflock);
- while(p->gfreecnt) {
- p->gfreecnt--;
- gp = p->gfree;
- p->gfree = gp->schedlink;
- gp->schedlink = runtime_sched.gfree;
- runtime_sched.gfree = gp;
- }
- runtime_unlock(&runtime_sched.gflock);
+ if(runtime_atomicload(&runtime_sched->npidle) != 0 && runtime_atomicload(&runtime_sched->nmspinning) == 0 && fn != runtime_main) // TODO: fast atomic
+ wakep();
+ g->m->locks--;
+ return newg;
}
void
@@ -2475,131 +1463,9 @@ runtime_Gosched(void)
runtime_gosched();
}
-// Implementation of runtime.GOMAXPROCS.
-// delete when scheduler is even stronger
-int32
-runtime_gomaxprocsfunc(int32 n)
-{
- int32 ret;
-
- if(n > MaxGomaxprocs)
- n = MaxGomaxprocs;
- runtime_lock(&runtime_sched);
- ret = runtime_gomaxprocs;
- if(n <= 0 || n == ret) {
- runtime_unlock(&runtime_sched);
- return ret;
- }
- runtime_unlock(&runtime_sched);
-
- runtime_semacquire(&runtime_worldsema, false);
- m->gcing = 1;
- runtime_stoptheworld();
- newprocs = n;
- m->gcing = 0;
- runtime_semrelease(&runtime_worldsema);
- runtime_starttheworld();
-
- return ret;
-}
-
-// lockOSThread is called by runtime.LockOSThread and runtime.lockOSThread below
-// after they modify m->locked. Do not allow preemption during this call,
-// or else the m might be different in this function than in the caller.
-static void
-lockOSThread(void)
-{
- m->lockedg = g;
- g->lockedm = m;
-}
-
-void runtime_LockOSThread(void) __asm__ (GOSYM_PREFIX "runtime.LockOSThread");
-void
-runtime_LockOSThread(void)
-{
- m->locked |= LockExternal;
- lockOSThread();
-}
-
-void
-runtime_lockOSThread(void)
-{
- m->locked += LockInternal;
- lockOSThread();
-}
-
-
-// unlockOSThread is called by runtime.UnlockOSThread and runtime.unlockOSThread below
-// after they update m->locked. Do not allow preemption during this call,
-// or else the m might be in different in this function than in the caller.
-static void
-unlockOSThread(void)
-{
- if(m->locked != 0)
- return;
- m->lockedg = nil;
- g->lockedm = nil;
-}
-
-void runtime_UnlockOSThread(void) __asm__ (GOSYM_PREFIX "runtime.UnlockOSThread");
-
-void
-runtime_UnlockOSThread(void)
-{
- m->locked &= ~LockExternal;
- unlockOSThread();
-}
-
-void
-runtime_unlockOSThread(void)
-{
- if(m->locked < LockInternal)
- runtime_throw("runtime: internal error: misuse of lockOSThread/unlockOSThread");
- m->locked -= LockInternal;
- unlockOSThread();
-}
-
-bool
-runtime_lockedOSThread(void)
-{
- return g->lockedm != nil && m->lockedg != nil;
-}
-
-int32
-runtime_gcount(void)
-{
- G *gp;
- int32 n, s;
- uintptr i;
-
- n = 0;
- runtime_lock(&allglock);
- // TODO(dvyukov): runtime.NumGoroutine() is O(N).
- // We do not want to increment/decrement centralized counter in newproc/goexit,
- // just to make runtime.NumGoroutine() faster.
- // Compromise solution is to introduce per-P counters of active goroutines.
- for(i = 0; i < runtime_allglen; i++) {
- gp = runtime_allg[i];
- s = gp->status;
- if(s == Grunnable || s == Grunning || s == Gsyscall || s == Gwaiting)
- n++;
- }
- runtime_unlock(&allglock);
- return n;
-}
-
-int32
-runtime_mcount(void)
-{
- return runtime_sched.mcount;
-}
-
static struct {
- Lock;
- void (*fn)(uintptr*, int32);
+ uint32 lock;
int32 hz;
- uintptr pcbuf[TracebackMaxFrames];
- Location locbuf[TracebackMaxFrames];
} prof;
static void System(void) {}
@@ -2609,11 +1475,14 @@ static void GC(void) {}
void
runtime_sigprof()
{
- M *mp = m;
+ M *mp = g->m;
int32 n, i;
bool traceback;
+ uintptr pcbuf[TracebackMaxFrames];
+ Location locbuf[TracebackMaxFrames];
+ Slice stk;
- if(prof.fn == nil || prof.hz == 0)
+ if(prof.hz == 0)
return;
if(mp == nil)
@@ -2627,12 +1496,6 @@ runtime_sigprof()
if(mp->mcache == nil)
traceback = false;
- runtime_lock(&prof);
- if(prof.fn == nil) {
- runtime_unlock(&prof);
- mp->mallocing--;
- return;
- }
n = 0;
if(runtime_atomicload(&runtime_in_callers) > 0) {
@@ -2644,797 +1507,68 @@ runtime_sigprof()
}
if(traceback) {
- n = runtime_callers(0, prof.locbuf, nelem(prof.locbuf), false);
+ n = runtime_callers(0, locbuf, nelem(locbuf), false);
for(i = 0; i < n; i++)
- prof.pcbuf[i] = prof.locbuf[i].pc;
+ pcbuf[i] = locbuf[i].pc;
}
if(!traceback || n <= 0) {
n = 2;
- prof.pcbuf[0] = (uintptr)runtime_getcallerpc(&n);
+ pcbuf[0] = (uintptr)runtime_getcallerpc(&n);
if(mp->gcing || mp->helpgc)
- prof.pcbuf[1] = (uintptr)GC;
+ pcbuf[1] = (uintptr)GC;
else
- prof.pcbuf[1] = (uintptr)System;
+ pcbuf[1] = (uintptr)System;
+ }
+
+ if (prof.hz != 0) {
+ stk.__values = &pcbuf[0];
+ stk.__count = n;
+ stk.__capacity = n;
+
+ // Simple cas-lock to coordinate with setcpuprofilerate.
+ while (!runtime_cas(&prof.lock, 0, 1)) {
+ runtime_osyield();
+ }
+ if (prof.hz != 0) {
+ runtime_cpuprofAdd(stk);
+ }
+ runtime_atomicstore(&prof.lock, 0);
}
- prof.fn(prof.pcbuf, n);
- runtime_unlock(&prof);
+
mp->mallocing--;
}
// Arrange to call fn with a traceback hz times a second.
void
-runtime_setcpuprofilerate(void (*fn)(uintptr*, int32), int32 hz)
+runtime_setcpuprofilerate_m(int32 hz)
{
// Force sane arguments.
if(hz < 0)
hz = 0;
- if(hz == 0)
- fn = nil;
- if(fn == nil)
- hz = 0;
// Disable preemption, otherwise we can be rescheduled to another thread
// that has profiling enabled.
- m->locks++;
+ g->m->locks++;
// Stop profiler on this thread so that it is safe to lock prof.
// if a profiling signal came in while we had prof locked,
// it would deadlock.
runtime_resetcpuprofiler(0);
- runtime_lock(&prof);
- prof.fn = fn;
+ while (!runtime_cas(&prof.lock, 0, 1)) {
+ runtime_osyield();
+ }
prof.hz = hz;
- runtime_unlock(&prof);
- runtime_lock(&runtime_sched);
- runtime_sched.profilehz = hz;
- runtime_unlock(&runtime_sched);
+ runtime_atomicstore(&prof.lock, 0);
+
+ runtime_lock(&runtime_sched->lock);
+ runtime_sched->profilehz = hz;
+ runtime_unlock(&runtime_sched->lock);
if(hz != 0)
runtime_resetcpuprofiler(hz);
- m->locks--;
-}
-
-// Change number of processors. The world is stopped, sched is locked.
-static void
-procresize(int32 new)
-{
- int32 i, old;
- bool empty;
- G *gp;
- P *p;
-
- old = runtime_gomaxprocs;
- if(old < 0 || old > MaxGomaxprocs || new <= 0 || new >MaxGomaxprocs)
- runtime_throw("procresize: invalid arg");
- // initialize new P's
- for(i = 0; i < new; i++) {
- p = runtime_allp[i];
- if(p == nil) {
- p = (P*)runtime_mallocgc(sizeof(*p), 0, FlagNoInvokeGC);
- p->id = i;
- p->status = Pgcstop;
- runtime_atomicstorep(&runtime_allp[i], p);
- }
- if(p->mcache == nil) {
- if(old==0 && i==0)
- p->mcache = m->mcache; // bootstrap
- else
- p->mcache = runtime_allocmcache();
- }
- }
-
- // redistribute runnable G's evenly
- // collect all runnable goroutines in global queue preserving FIFO order
- // FIFO order is required to ensure fairness even during frequent GCs
- // see http://golang.org/issue/7126
- empty = false;
- while(!empty) {
- empty = true;
- for(i = 0; i < old; i++) {
- p = runtime_allp[i];
- if(p->runqhead == p->runqtail)
- continue;
- empty = false;
- // pop from tail of local queue
- p->runqtail--;
- gp = p->runq[p->runqtail%nelem(p->runq)];
- // push onto head of global queue
- gp->schedlink = runtime_sched.runqhead;
- runtime_sched.runqhead = gp;
- if(runtime_sched.runqtail == nil)
- runtime_sched.runqtail = gp;
- runtime_sched.runqsize++;
- }
- }
- // fill local queues with at most nelem(p->runq)/2 goroutines
- // start at 1 because current M already executes some G and will acquire allp[0] below,
- // so if we have a spare G we want to put it into allp[1].
- for(i = 1; (uint32)i < (uint32)new * nelem(p->runq)/2 && runtime_sched.runqsize > 0; i++) {
- gp = runtime_sched.runqhead;
- runtime_sched.runqhead = gp->schedlink;
- if(runtime_sched.runqhead == nil)
- runtime_sched.runqtail = nil;
- runtime_sched.runqsize--;
- runqput(runtime_allp[i%new], gp);
- }
-
- // free unused P's
- for(i = new; i < old; i++) {
- p = runtime_allp[i];
- runtime_freemcache(p->mcache);
- p->mcache = nil;
- gfpurge(p);
- p->status = Pdead;
- // can't free P itself because it can be referenced by an M in syscall
- }
-
- if(m->p)
- m->p->m = nil;
- m->p = nil;
- m->mcache = nil;
- p = runtime_allp[0];
- p->m = nil;
- p->status = Pidle;
- acquirep(p);
- for(i = new-1; i > 0; i--) {
- p = runtime_allp[i];
- p->status = Pidle;
- pidleput(p);
- }
- runtime_atomicstore((uint32*)&runtime_gomaxprocs, new);
-}
-
-// Associate p and the current m.
-static void
-acquirep(P *p)
-{
- if(m->p || m->mcache)
- runtime_throw("acquirep: already in go");
- if(p->m || p->status != Pidle) {
- runtime_printf("acquirep: p->m=%p(%d) p->status=%d\n", p->m, p->m ? p->m->id : 0, p->status);
- runtime_throw("acquirep: invalid p state");
- }
- m->mcache = p->mcache;
- m->p = p;
- p->m = m;
- p->status = Prunning;
-}
-
-// Disassociate p and the current m.
-static P*
-releasep(void)
-{
- P *p;
-
- if(m->p == nil || m->mcache == nil)
- runtime_throw("releasep: invalid arg");
- p = m->p;
- if(p->m != m || p->mcache != m->mcache || p->status != Prunning) {
- runtime_printf("releasep: m=%p m->p=%p p->m=%p m->mcache=%p p->mcache=%p p->status=%d\n",
- m, m->p, p->m, m->mcache, p->mcache, p->status);
- runtime_throw("releasep: invalid p state");
- }
- m->p = nil;
- m->mcache = nil;
- p->m = nil;
- p->status = Pidle;
- return p;
-}
-
-static void
-incidlelocked(int32 v)
-{
- runtime_lock(&runtime_sched);
- runtime_sched.nmidlelocked += v;
- if(v > 0)
- checkdead();
- runtime_unlock(&runtime_sched);
-}
-
-// Check for deadlock situation.
-// The check is based on number of running M's, if 0 -> deadlock.
-static void
-checkdead(void)
-{
- G *gp;
- int32 run, grunning, s;
- uintptr i;
-
- // For -buildmode=c-shared or -buildmode=c-archive it's OK if
- // there are no running goroutines. The calling program is
- // assumed to be running.
- if(runtime_isarchive) {
- return;
- }
-
- // -1 for sysmon
- run = runtime_sched.mcount - runtime_sched.nmidle - runtime_sched.nmidlelocked - 1 - countextra();
- if(run > 0)
- return;
- // If we are dying because of a signal caught on an already idle thread,
- // freezetheworld will cause all running threads to block.
- // And runtime will essentially enter into deadlock state,
- // except that there is a thread that will call runtime_exit soon.
- if(runtime_panicking > 0)
- return;
- if(run < 0) {
- runtime_printf("runtime: checkdead: nmidle=%d nmidlelocked=%d mcount=%d\n",
- runtime_sched.nmidle, runtime_sched.nmidlelocked, runtime_sched.mcount);
- runtime_throw("checkdead: inconsistent counts");
- }
- grunning = 0;
- runtime_lock(&allglock);
- for(i = 0; i < runtime_allglen; i++) {
- gp = runtime_allg[i];
- if(gp->isbackground)
- continue;
- s = gp->status;
- if(s == Gwaiting)
- grunning++;
- else if(s == Grunnable || s == Grunning || s == Gsyscall) {
- runtime_unlock(&allglock);
- runtime_printf("runtime: checkdead: find g %D in status %d\n", gp->goid, s);
- runtime_throw("checkdead: runnable g");
- }
- }
- runtime_unlock(&allglock);
- if(grunning == 0) // possible if main goroutine calls runtime_Goexit()
- runtime_throw("no goroutines (main called runtime.Goexit) - deadlock!");
- m->throwing = -1; // do not dump full stacks
- runtime_throw("all goroutines are asleep - deadlock!");
-}
-
-static void
-sysmon(void)
-{
- uint32 idle, delay;
- int64 now, lastpoll, lasttrace;
- G *gp;
-
- lasttrace = 0;
- idle = 0; // how many cycles in succession we had not wokeup somebody
- delay = 0;
- for(;;) {
- if(idle == 0) // start with 20us sleep...
- delay = 20;
- else if(idle > 50) // start doubling the sleep after 1ms...
- delay *= 2;
- if(delay > 10*1000) // up to 10ms
- delay = 10*1000;
- runtime_usleep(delay);
- if(runtime_debug.schedtrace <= 0 &&
- (runtime_sched.gcwaiting || runtime_atomicload(&runtime_sched.npidle) == (uint32)runtime_gomaxprocs)) { // TODO: fast atomic
- runtime_lock(&runtime_sched);
- if(runtime_atomicload(&runtime_sched.gcwaiting) || runtime_atomicload(&runtime_sched.npidle) == (uint32)runtime_gomaxprocs) {
- runtime_atomicstore(&runtime_sched.sysmonwait, 1);
- runtime_unlock(&runtime_sched);
- runtime_notesleep(&runtime_sched.sysmonnote);
- runtime_noteclear(&runtime_sched.sysmonnote);
- idle = 0;
- delay = 20;
- } else
- runtime_unlock(&runtime_sched);
- }
- // poll network if not polled for more than 10ms
- lastpoll = runtime_atomicload64(&runtime_sched.lastpoll);
- now = runtime_nanotime();
- if(lastpoll != 0 && lastpoll + 10*1000*1000 < now) {
- runtime_cas64(&runtime_sched.lastpoll, lastpoll, now);
- gp = runtime_netpoll(false); // non-blocking
- if(gp) {
- // Need to decrement number of idle locked M's
- // (pretending that one more is running) before injectglist.
- // Otherwise it can lead to the following situation:
- // injectglist grabs all P's but before it starts M's to run the P's,
- // another M returns from syscall, finishes running its G,
- // observes that there is no work to do and no other running M's
- // and reports deadlock.
- incidlelocked(-1);
- injectglist(gp);
- incidlelocked(1);
- }
- }
- // retake P's blocked in syscalls
- // and preempt long running G's
- if(retake(now))
- idle = 0;
- else
- idle++;
-
- if(runtime_debug.schedtrace > 0 && lasttrace + runtime_debug.schedtrace*1000000ll <= now) {
- lasttrace = now;
- runtime_schedtrace(runtime_debug.scheddetail);
- }
- }
-}
-
-typedef struct Pdesc Pdesc;
-struct Pdesc
-{
- uint32 schedtick;
- int64 schedwhen;
- uint32 syscalltick;
- int64 syscallwhen;
-};
-static Pdesc pdesc[MaxGomaxprocs];
-
-static uint32
-retake(int64 now)
-{
- uint32 i, s, n;
- int64 t;
- P *p;
- Pdesc *pd;
-
- n = 0;
- for(i = 0; i < (uint32)runtime_gomaxprocs; i++) {
- p = runtime_allp[i];
- if(p==nil)
- continue;
- pd = &pdesc[i];
- s = p->status;
- if(s == Psyscall) {
- // Retake P from syscall if it's there for more than 1 sysmon tick (at least 20us).
- t = p->syscalltick;
- if(pd->syscalltick != t) {
- pd->syscalltick = t;
- pd->syscallwhen = now;
- continue;
- }
- // On the one hand we don't want to retake Ps if there is no other work to do,
- // but on the other hand we want to retake them eventually
- // because they can prevent the sysmon thread from deep sleep.
- if(p->runqhead == p->runqtail &&
- runtime_atomicload(&runtime_sched.nmspinning) + runtime_atomicload(&runtime_sched.npidle) > 0 &&
- pd->syscallwhen + 10*1000*1000 > now)
- continue;
- // Need to decrement number of idle locked M's
- // (pretending that one more is running) before the CAS.
- // Otherwise the M from which we retake can exit the syscall,
- // increment nmidle and report deadlock.
- incidlelocked(-1);
- if(runtime_cas(&p->status, s, Pidle)) {
- n++;
- handoffp(p);
- }
- incidlelocked(1);
- } else if(s == Prunning) {
- // Preempt G if it's running for more than 10ms.
- t = p->schedtick;
- if(pd->schedtick != t) {
- pd->schedtick = t;
- pd->schedwhen = now;
- continue;
- }
- if(pd->schedwhen + 10*1000*1000 > now)
- continue;
- // preemptone(p);
- }
- }
- return n;
-}
-
-// Tell all goroutines that they have been preempted and they should stop.
-// This function is purely best-effort. It can fail to inform a goroutine if a
-// processor just started running it.
-// No locks need to be held.
-// Returns true if preemption request was issued to at least one goroutine.
-static bool
-preemptall(void)
-{
- return false;
-}
-
-void
-runtime_schedtrace(bool detailed)
-{
- static int64 starttime;
- int64 now;
- int64 id1, id2, id3;
- int32 i, t, h;
- uintptr gi;
- const char *fmt;
- M *mp, *lockedm;
- G *gp, *lockedg;
- P *p;
-
- now = runtime_nanotime();
- if(starttime == 0)
- starttime = now;
-
- runtime_lock(&runtime_sched);
- runtime_printf("SCHED %Dms: gomaxprocs=%d idleprocs=%d threads=%d idlethreads=%d runqueue=%d",
- (now-starttime)/1000000, runtime_gomaxprocs, runtime_sched.npidle, runtime_sched.mcount,
- runtime_sched.nmidle, runtime_sched.runqsize);
- if(detailed) {
- runtime_printf(" gcwaiting=%d nmidlelocked=%d nmspinning=%d stopwait=%d sysmonwait=%d\n",
- runtime_sched.gcwaiting, runtime_sched.nmidlelocked, runtime_sched.nmspinning,
- runtime_sched.stopwait, runtime_sched.sysmonwait);
- }
- // We must be careful while reading data from P's, M's and G's.
- // Even if we hold schedlock, most data can be changed concurrently.
- // E.g. (p->m ? p->m->id : -1) can crash if p->m changes from non-nil to nil.
- for(i = 0; i < runtime_gomaxprocs; i++) {
- p = runtime_allp[i];
- if(p == nil)
- continue;
- mp = p->m;
- h = runtime_atomicload(&p->runqhead);
- t = runtime_atomicload(&p->runqtail);
- if(detailed)
- runtime_printf(" P%d: status=%d schedtick=%d syscalltick=%d m=%d runqsize=%d gfreecnt=%d\n",
- i, p->status, p->schedtick, p->syscalltick, mp ? mp->id : -1, t-h, p->gfreecnt);
- else {
- // In non-detailed mode format lengths of per-P run queues as:
- // [len1 len2 len3 len4]
- fmt = " %d";
- if(runtime_gomaxprocs == 1)
- fmt = " [%d]\n";
- else if(i == 0)
- fmt = " [%d";
- else if(i == runtime_gomaxprocs-1)
- fmt = " %d]\n";
- runtime_printf(fmt, t-h);
- }
- }
- if(!detailed) {
- runtime_unlock(&runtime_sched);
- return;
- }
- for(mp = runtime_allm; mp; mp = mp->alllink) {
- p = mp->p;
- gp = mp->curg;
- lockedg = mp->lockedg;
- id1 = -1;
- if(p)
- id1 = p->id;
- id2 = -1;
- if(gp)
- id2 = gp->goid;
- id3 = -1;
- if(lockedg)
- id3 = lockedg->goid;
- runtime_printf(" M%d: p=%D curg=%D mallocing=%d throwing=%d gcing=%d"
- " locks=%d dying=%d helpgc=%d spinning=%d blocked=%d lockedg=%D\n",
- mp->id, id1, id2,
- mp->mallocing, mp->throwing, mp->gcing, mp->locks, mp->dying, mp->helpgc,
- mp->spinning, m->blocked, id3);
- }
- runtime_lock(&allglock);
- for(gi = 0; gi < runtime_allglen; gi++) {
- gp = runtime_allg[gi];
- mp = gp->m;
- lockedm = gp->lockedm;
- runtime_printf(" G%D: status=%d(%s) m=%d lockedm=%d\n",
- gp->goid, gp->status, gp->waitreason, mp ? mp->id : -1,
- lockedm ? lockedm->id : -1);
- }
- runtime_unlock(&allglock);
- runtime_unlock(&runtime_sched);
-}
-
-// Put mp on midle list.
-// Sched must be locked.
-static void
-mput(M *mp)
-{
- mp->schedlink = runtime_sched.midle;
- runtime_sched.midle = mp;
- runtime_sched.nmidle++;
- checkdead();
-}
-
-// Try to get an m from midle list.
-// Sched must be locked.
-static M*
-mget(void)
-{
- M *mp;
-
- if((mp = runtime_sched.midle) != nil){
- runtime_sched.midle = mp->schedlink;
- runtime_sched.nmidle--;
- }
- return mp;
-}
-
-// Put gp on the global runnable queue.
-// Sched must be locked.
-static void
-globrunqput(G *gp)
-{
- gp->schedlink = nil;
- if(runtime_sched.runqtail)
- runtime_sched.runqtail->schedlink = gp;
- else
- runtime_sched.runqhead = gp;
- runtime_sched.runqtail = gp;
- runtime_sched.runqsize++;
-}
-
-// Put a batch of runnable goroutines on the global runnable queue.
-// Sched must be locked.
-static void
-globrunqputbatch(G *ghead, G *gtail, int32 n)
-{
- gtail->schedlink = nil;
- if(runtime_sched.runqtail)
- runtime_sched.runqtail->schedlink = ghead;
- else
- runtime_sched.runqhead = ghead;
- runtime_sched.runqtail = gtail;
- runtime_sched.runqsize += n;
-}
-
-// Try get a batch of G's from the global runnable queue.
-// Sched must be locked.
-static G*
-globrunqget(P *p, int32 max)
-{
- G *gp, *gp1;
- int32 n;
-
- if(runtime_sched.runqsize == 0)
- return nil;
- n = runtime_sched.runqsize/runtime_gomaxprocs+1;
- if(n > runtime_sched.runqsize)
- n = runtime_sched.runqsize;
- if(max > 0 && n > max)
- n = max;
- if((uint32)n > nelem(p->runq)/2)
- n = nelem(p->runq)/2;
- runtime_sched.runqsize -= n;
- if(runtime_sched.runqsize == 0)
- runtime_sched.runqtail = nil;
- gp = runtime_sched.runqhead;
- runtime_sched.runqhead = gp->schedlink;
- n--;
- while(n--) {
- gp1 = runtime_sched.runqhead;
- runtime_sched.runqhead = gp1->schedlink;
- runqput(p, gp1);
- }
- return gp;
-}
-
-// Put p to on pidle list.
-// Sched must be locked.
-static void
-pidleput(P *p)
-{
- p->link = runtime_sched.pidle;
- runtime_sched.pidle = p;
- runtime_xadd(&runtime_sched.npidle, 1); // TODO: fast atomic
-}
-
-// Try get a p from pidle list.
-// Sched must be locked.
-static P*
-pidleget(void)
-{
- P *p;
-
- p = runtime_sched.pidle;
- if(p) {
- runtime_sched.pidle = p->link;
- runtime_xadd(&runtime_sched.npidle, -1); // TODO: fast atomic
- }
- return p;
-}
-
-// Try to put g on local runnable queue.
-// If it's full, put onto global queue.
-// Executed only by the owner P.
-static void
-runqput(P *p, G *gp)
-{
- uint32 h, t;
-
-retry:
- h = runtime_atomicload(&p->runqhead); // load-acquire, synchronize with consumers
- t = p->runqtail;
- if(t - h < nelem(p->runq)) {
- p->runq[t%nelem(p->runq)] = gp;
- runtime_atomicstore(&p->runqtail, t+1); // store-release, makes the item available for consumption
- return;
- }
- if(runqputslow(p, gp, h, t))
- return;
- // the queue is not full, now the put above must suceed
- goto retry;
-}
-
-// Put g and a batch of work from local runnable queue on global queue.
-// Executed only by the owner P.
-static bool
-runqputslow(P *p, G *gp, uint32 h, uint32 t)
-{
- G *batch[nelem(p->runq)/2+1];
- uint32 n, i;
-
- // First, grab a batch from local queue.
- n = t-h;
- n = n/2;
- if(n != nelem(p->runq)/2)
- runtime_throw("runqputslow: queue is not full");
- for(i=0; i<n; i++)
- batch[i] = p->runq[(h+i)%nelem(p->runq)];
- if(!runtime_cas(&p->runqhead, h, h+n)) // cas-release, commits consume
- return false;
- batch[n] = gp;
- // Link the goroutines.
- for(i=0; i<n; i++)
- batch[i]->schedlink = batch[i+1];
- // Now put the batch on global queue.
- runtime_lock(&runtime_sched);
- globrunqputbatch(batch[0], batch[n], n+1);
- runtime_unlock(&runtime_sched);
- return true;
-}
-
-// Get g from local runnable queue.
-// Executed only by the owner P.
-static G*
-runqget(P *p)
-{
- G *gp;
- uint32 t, h;
-
- for(;;) {
- h = runtime_atomicload(&p->runqhead); // load-acquire, synchronize with other consumers
- t = p->runqtail;
- if(t == h)
- return nil;
- gp = p->runq[h%nelem(p->runq)];
- if(runtime_cas(&p->runqhead, h, h+1)) // cas-release, commits consume
- return gp;
- }
-}
-
-// Grabs a batch of goroutines from local runnable queue.
-// batch array must be of size nelem(p->runq)/2. Returns number of grabbed goroutines.
-// Can be executed by any P.
-static uint32
-runqgrab(P *p, G **batch)
-{
- uint32 t, h, n, i;
-
- for(;;) {
- h = runtime_atomicload(&p->runqhead); // load-acquire, synchronize with other consumers
- t = runtime_atomicload(&p->runqtail); // load-acquire, synchronize with the producer
- n = t-h;
- n = n - n/2;
- if(n == 0)
- break;
- if(n > nelem(p->runq)/2) // read inconsistent h and t
- continue;
- for(i=0; i<n; i++)
- batch[i] = p->runq[(h+i)%nelem(p->runq)];
- if(runtime_cas(&p->runqhead, h, h+n)) // cas-release, commits consume
- break;
- }
- return n;
-}
-
-// Steal half of elements from local runnable queue of p2
-// and put onto local runnable queue of p.
-// Returns one of the stolen elements (or nil if failed).
-static G*
-runqsteal(P *p, P *p2)
-{
- G *gp;
- G *batch[nelem(p->runq)/2];
- uint32 t, h, n, i;
-
- n = runqgrab(p2, batch);
- if(n == 0)
- return nil;
- n--;
- gp = batch[n];
- if(n == 0)
- return gp;
- h = runtime_atomicload(&p->runqhead); // load-acquire, synchronize with consumers
- t = p->runqtail;
- if(t - h + n >= nelem(p->runq))
- runtime_throw("runqsteal: runq overflow");
- for(i=0; i<n; i++, t++)
- p->runq[t%nelem(p->runq)] = batch[i];
- runtime_atomicstore(&p->runqtail, t); // store-release, makes the item available for consumption
- return gp;
-}
-
-void runtime_testSchedLocalQueue(void)
- __asm__("runtime.testSchedLocalQueue");
-
-void
-runtime_testSchedLocalQueue(void)
-{
- P p;
- G gs[nelem(p.runq)];
- int32 i, j;
-
- runtime_memclr((byte*)&p, sizeof(p));
-
- for(i = 0; i < (int32)nelem(gs); i++) {
- if(runqget(&p) != nil)
- runtime_throw("runq is not empty initially");
- for(j = 0; j < i; j++)
- runqput(&p, &gs[i]);
- for(j = 0; j < i; j++) {
- if(runqget(&p) != &gs[i]) {
- runtime_printf("bad element at iter %d/%d\n", i, j);
- runtime_throw("bad element");
- }
- }
- if(runqget(&p) != nil)
- runtime_throw("runq is not empty afterwards");
- }
-}
-
-void runtime_testSchedLocalQueueSteal(void)
- __asm__("runtime.testSchedLocalQueueSteal");
-
-void
-runtime_testSchedLocalQueueSteal(void)
-{
- P p1, p2;
- G gs[nelem(p1.runq)], *gp;
- int32 i, j, s;
-
- runtime_memclr((byte*)&p1, sizeof(p1));
- runtime_memclr((byte*)&p2, sizeof(p2));
-
- for(i = 0; i < (int32)nelem(gs); i++) {
- for(j = 0; j < i; j++) {
- gs[j].sig = 0;
- runqput(&p1, &gs[j]);
- }
- gp = runqsteal(&p2, &p1);
- s = 0;
- if(gp) {
- s++;
- gp->sig++;
- }
- while((gp = runqget(&p2)) != nil) {
- s++;
- gp->sig++;
- }
- while((gp = runqget(&p1)) != nil)
- gp->sig++;
- for(j = 0; j < i; j++) {
- if(gs[j].sig != 1) {
- runtime_printf("bad element %d(%d) at iter %d\n", j, gs[j].sig, i);
- runtime_throw("bad element");
- }
- }
- if(s != i/2 && s != i/2+1) {
- runtime_printf("bad steal %d, want %d or %d, iter %d\n",
- s, i/2, i/2+1, i);
- runtime_throw("bad steal");
- }
- }
-}
-
-int32
-runtime_setmaxthreads(int32 in)
-{
- int32 out;
-
- runtime_lock(&runtime_sched);
- out = runtime_sched.maxmcount;
- runtime_sched.maxmcount = in;
- checkmcount();
- runtime_unlock(&runtime_sched);
- return out;
-}
-
-void
-runtime_proc_scan(struct Workbuf** wbufp, void (*enqueue1)(struct Workbuf**, Obj))
-{
- enqueue1(wbufp, (Obj){(byte*)&runtime_sched, sizeof runtime_sched, 0});
- enqueue1(wbufp, (Obj){(byte*)&runtime_main_init_done, sizeof runtime_main_init_done, 0});
+ g->m->locks--;
}
// Return whether we are waiting for a GC. This gc toolchain uses
@@ -3442,7 +1576,7 @@ runtime_proc_scan(struct Workbuf** wbufp, void (*enqueue1)(struct Workbuf**, Obj
bool
runtime_gcwaiting(void)
{
- return runtime_sched.gcwaiting;
+ return runtime_sched->gcwaiting;
}
// os_beforeExit is called from os.Exit(0).
@@ -3455,43 +1589,10 @@ os_beforeExit()
{
}
-// Active spinning for sync.Mutex.
-//go:linkname sync_runtime_canSpin sync.runtime_canSpin
-
-enum
-{
- ACTIVE_SPIN = 4,
- ACTIVE_SPIN_CNT = 30,
-};
-
-extern _Bool sync_runtime_canSpin(intgo i)
- __asm__ (GOSYM_PREFIX "sync.runtime_canSpin");
+intgo NumCPU(void) __asm__ (GOSYM_PREFIX "runtime.NumCPU");
-_Bool
-sync_runtime_canSpin(intgo i)
-{
- P *p;
-
- // sync.Mutex is cooperative, so we are conservative with spinning.
- // Spin only few times and only if running on a multicore machine and
- // GOMAXPROCS>1 and there is at least one other running P and local runq is empty.
- // As opposed to runtime mutex we don't do passive spinning here,
- // because there can be work on global runq on on other Ps.
- if (i >= ACTIVE_SPIN || runtime_ncpu <= 1 || runtime_gomaxprocs <= (int32)(runtime_sched.npidle+runtime_sched.nmspinning)+1) {
- return false;
- }
- p = m->p;
- return p != nil && p->runqhead == p->runqtail;
-}
-
-//go:linkname sync_runtime_doSpin sync.runtime_doSpin
-//go:nosplit
-
-extern void sync_runtime_doSpin(void)
- __asm__ (GOSYM_PREFIX "sync.runtime_doSpin");
-
-void
-sync_runtime_doSpin()
+intgo
+NumCPU()
{
- runtime_procyield(ACTIVE_SPIN_CNT);
+ return (intgo)(runtime_ncpu);
}