diff options
Diffstat (limited to 'libgo/go/runtime')
110 files changed, 4302 insertions, 1979 deletions
diff --git a/libgo/go/runtime/atomic_pointer.go b/libgo/go/runtime/atomic_pointer.go index 03d8d6a48d5..49b0f2b84fd 100644 --- a/libgo/go/runtime/atomic_pointer.go +++ b/libgo/go/runtime/atomic_pointer.go @@ -13,8 +13,6 @@ import ( // because while ptr does not escape, new does. // If new is marked as not escaping, the compiler will make incorrect // escape analysis decisions about the pointer value being stored. -// Instead, these are wrappers around the actual atomics (casp1 and so on) -// that use noescape to convey which arguments do not escape. // atomicwb performs a write barrier before an atomic pointer write. // The caller should guard the call with "if writeBarrier.enabled". @@ -37,17 +35,6 @@ func atomicstorep(ptr unsafe.Pointer, new unsafe.Pointer) { atomic.StorepNoWB(noescape(ptr), new) } -//go:nosplit -func casp(ptr *unsafe.Pointer, old, new unsafe.Pointer) bool { - // The write barrier is only necessary if the CAS succeeds, - // but since it needs to happen before the write becomes - // public, we have to do it conservatively all the time. - if writeBarrier.enabled { - atomicwb(ptr, new) - } - return atomic.Casp1((*unsafe.Pointer)(noescape(unsafe.Pointer(ptr))), noescape(old), new) -} - // Like above, but implement in terms of sync/atomic's uintptr operations. // We cannot just call the runtime routines, because the race detector expects // to be able to intercept the sync/atomic forms but not the runtime forms. diff --git a/libgo/go/runtime/cgo_gccgo.go b/libgo/go/runtime/cgo_gccgo.go index e689b0e2616..aff8130a27c 100644 --- a/libgo/go/runtime/cgo_gccgo.go +++ b/libgo/go/runtime/cgo_gccgo.go @@ -84,8 +84,8 @@ func CgocallBack() { lockOSThread() - exitsyscall() gp.m.incgo = false + exitsyscall() if gp.m.ncgo == 0 { // The C call to Go came from a thread created by C. @@ -108,37 +108,41 @@ func CgocallBack() { func CgocallBackDone() { unlockOSThread() - // If we are the top level Go function called from C/C++, then - // we need to release the m. But don't release it if we are - // panicing; since this is the top level, we are going to - // crash the program, and we need the g and m to print the - // panic values. - // - // Dropping the m is going to clear g. This function is being - // called as a deferred function, so we will return to - // deferreturn which will want to clear the _defer field. - // As soon as we call dropm another thread may call needm and - // start using g, so we must not tamper with the _defer field - // after dropm. So clear _defer now. + // We are going to stop running in Go mode and return to C mode. + // We were almost certainly called by defer; if so, clean up + // the defer struct now, before we leave Go mode. But don't + // leave Go mode if we are panicing or called from Goexit, + // since in those cases we will continue executing deferred functions. gp := getg() mp := gp.m drop := false - if mp.dropextram && mp.ncgo == 0 && gp._panic == nil { + if gp.deferring && gp._panic == nil && !gp.goexiting { d := gp._defer - if d == nil || d.link != nil { - throw("unexpected g._defer in CgocallBackDone") + if d == nil { + throw("no defer struct when deferring") } - gp._defer = nil + gp._defer = d.link freedefer(d) - drop = true + + // If we are the top level Go function called from C, + // then we need to release the m. + if mp.dropextram && mp.ncgo == 0 { + drop = true + } } - gp.m.incgo = true - entersyscall() + // Don't go back to C mode if we are panicing. Just let the + // panic walk up through the Go stack. + if gp._panic == nil && !gp.goexiting { + gp.m.incgo = true + entersyscall() + } if drop { mp.dropextram = false dropm() + } else if gp.deferring && gp._panic == nil && !gp.goexiting { + gp.ranCgocallBackDone = true } } diff --git a/libgo/go/runtime/cgocall.go b/libgo/go/runtime/cgocall.go index 24bf749e5a7..57b42ff2eae 100644 --- a/libgo/go/runtime/cgocall.go +++ b/libgo/go/runtime/cgocall.go @@ -254,7 +254,7 @@ func cgoCheckUnknownPointer(p unsafe.Pointer, msg string) (base, i uintptr) { return } -// cgoIsGoPointer returns whether the pointer is a Go pointer--a +// cgoIsGoPointer reports whether the pointer is a Go pointer--a // pointer to Go memory. We only care about Go memory that might // contain pointers. //go:nosplit @@ -283,7 +283,7 @@ func cgoIsGoPointer(p unsafe.Pointer) bool { return false } -// cgoInRange returns whether p is between start and end. +// cgoInRange reports whether p is between start and end. //go:nosplit //go:nowritebarrierrec func cgoInRange(p unsafe.Pointer, start, end uintptr) bool { diff --git a/libgo/go/runtime/cgocheck.go b/libgo/go/runtime/cgocheck.go index d896fb7d79d..c9e40473771 100644 --- a/libgo/go/runtime/cgocheck.go +++ b/libgo/go/runtime/cgocheck.go @@ -43,6 +43,13 @@ func cgoCheckWriteBarrier(dst *uintptr, src uintptr) { return } + // It's OK if writing to memory allocated by persistentalloc. + // Do this check last because it is more expensive and rarely true. + // If it is false the expense doesn't matter since we are crashing. + if inPersistentAlloc(uintptr(unsafe.Pointer(dst))) { + return + } + systemstack(func() { println("write of Go pointer", hex(src), "to non-Go memory", hex(uintptr(unsafe.Pointer(dst)))) throw(cgoWriteBarrierFail) @@ -127,7 +134,7 @@ func cgoCheckTypedBlock(typ *_type, src unsafe.Pointer, off, size uintptr) { } s := spanOfUnchecked(uintptr(src)) - if s.state == _MSpanManual { + if s.state == mSpanManual { // There are no heap bits for value stored on the stack. // For a channel receive src might be on the stack of some // other goroutine, so we can't unwind the stack even if diff --git a/libgo/go/runtime/chan.go b/libgo/go/runtime/chan.go index cb369effb8c..6dfe2f3fc3e 100644 --- a/libgo/go/runtime/chan.go +++ b/libgo/go/runtime/chan.go @@ -19,6 +19,7 @@ package runtime import ( "runtime/internal/atomic" + "runtime/internal/math" "unsafe" ) @@ -88,7 +89,8 @@ func makechan(t *chantype, size int) *hchan { throw("makechan: bad alignment") } - if size < 0 || uintptr(size) > maxSliceCap(elem.size) || uintptr(size)*elem.size > maxAlloc-hchanSize { + mem, overflow := math.MulUintptr(elem.size, uintptr(size)) + if overflow || mem > maxAlloc-hchanSize || size < 0 { panic(plainError("makechan: size out of range")) } @@ -98,7 +100,7 @@ func makechan(t *chantype, size int) *hchan { // TODO(dvyukov,rlh): Rethink when collector can move allocated objects. var c *hchan switch { - case size == 0 || elem.size == 0: + case mem == 0: // Queue or element size is zero. c = (*hchan)(mallocgc(hchanSize, nil, true)) // Race detector uses this location for synchronization. @@ -106,12 +108,12 @@ func makechan(t *chantype, size int) *hchan { case elem.kind&kindNoPointers != 0: // Elements do not contain pointers. // Allocate hchan and buf in one call. - c = (*hchan)(mallocgc(hchanSize+uintptr(size)*elem.size, nil, true)) + c = (*hchan)(mallocgc(hchanSize+mem, nil, true)) c.buf = add(unsafe.Pointer(c), hchanSize) default: // Elements contain pointers. c = new(hchan) - c.buf = mallocgc(uintptr(size)*elem.size, elem, true) + c.buf = mallocgc(mem, elem, true) } c.elemsize = uint16(elem.size) @@ -247,6 +249,11 @@ func chansend(c *hchan, ep unsafe.Pointer, block bool, callerpc uintptr) bool { gp.param = nil c.sendq.enqueue(mysg) goparkunlock(&c.lock, waitReasonChanSend, traceEvGoBlockSend, 3) + // Ensure the value being sent is kept alive until the + // receiver copies it out. The sudog has a pointer to the + // stack object, but sudogs aren't considered as roots of the + // stack tracer. + KeepAlive(ep) // someone woke us up. if mysg != gp.waiting { @@ -358,7 +365,7 @@ func closechan(c *hchan) { c.closed = 1 - var glist *g + var glist gList // release all readers for { @@ -378,8 +385,7 @@ func closechan(c *hchan) { if raceenabled { raceacquireg(gp, c.raceaddr()) } - gp.schedlink.set(glist) - glist = gp + glist.push(gp) } // release all writers (they will panic) @@ -397,15 +403,13 @@ func closechan(c *hchan) { if raceenabled { raceacquireg(gp, c.raceaddr()) } - gp.schedlink.set(glist) - glist = gp + glist.push(gp) } unlock(&c.lock) // Ready all Gs now that we've dropped the channel lock. - for glist != nil { - gp := glist - glist = glist.schedlink.ptr() + for !glist.empty() { + gp := glist.pop() gp.schedlink = 0 goready(gp, 3) } diff --git a/libgo/go/runtime/crash_cgo_test.go b/libgo/go/runtime/crash_cgo_test.go index 770f85e96d4..f437b9a7e0f 100644 --- a/libgo/go/runtime/crash_cgo_test.go +++ b/libgo/go/runtime/crash_cgo_test.go @@ -269,7 +269,7 @@ func TestCgoTracebackContext(t *testing.T) { } } -func testCgoPprof(t *testing.T, buildArg, runArg string) { +func testCgoPprof(t *testing.T, buildArg, runArg, top, bottom string) { t.Parallel() if runtime.GOOS != "linux" || (runtime.GOARCH != "amd64" && runtime.GOARCH != "ppc64le") { t.Skipf("not yet supported on %s/%s", runtime.GOOS, runtime.GOARCH) @@ -296,7 +296,7 @@ func testCgoPprof(t *testing.T, buildArg, runArg string) { defer os.Remove(fn) for try := 0; try < 2; try++ { - cmd := testenv.CleanCmdEnv(exec.Command(testenv.GoToolPath(t), "tool", "pprof", "-top", "-nodecount=1")) + cmd := testenv.CleanCmdEnv(exec.Command(testenv.GoToolPath(t), "tool", "pprof", "-traces")) // Check that pprof works both with and without explicit executable on command line. if try == 0 { cmd.Args = append(cmd.Args, exe, fn) @@ -316,30 +316,38 @@ func testCgoPprof(t *testing.T, buildArg, runArg string) { cmd.Env = append(cmd.Env, "PPROF_TMPDIR="+os.TempDir()) } - top, err := cmd.CombinedOutput() - t.Logf("%s:\n%s", cmd.Args, top) + out, err := cmd.CombinedOutput() + t.Logf("%s:\n%s", cmd.Args, out) if err != nil { t.Error(err) - } else if !bytes.Contains(top, []byte("cpuHog")) { - t.Error("missing cpuHog in pprof output") + continue + } + + trace := findTrace(string(out), top) + if len(trace) == 0 { + t.Errorf("%s traceback missing.", top) + continue + } + if trace[len(trace)-1] != bottom { + t.Errorf("invalid traceback origin: got=%v; want=[%s ... %s]", trace, top, bottom) } } } func TestCgoPprof(t *testing.T) { - testCgoPprof(t, "", "CgoPprof") + testCgoPprof(t, "", "CgoPprof", "cpuHog", "runtime.main") } func TestCgoPprofPIE(t *testing.T) { - testCgoPprof(t, "-buildmode=pie", "CgoPprof") + testCgoPprof(t, "-buildmode=pie", "CgoPprof", "cpuHog", "runtime.main") } func TestCgoPprofThread(t *testing.T) { - testCgoPprof(t, "", "CgoPprofThread") + testCgoPprof(t, "", "CgoPprofThread", "cpuHogThread", "cpuHogThread2") } func TestCgoPprofThreadNoTraceback(t *testing.T) { - testCgoPprof(t, "", "CgoPprofThreadNoTraceback") + testCgoPprof(t, "", "CgoPprofThreadNoTraceback", "cpuHogThread", "runtime._ExternalCode") } func TestRaceProf(t *testing.T) { @@ -521,3 +529,35 @@ func TestBigStackCallbackCgo(t *testing.T) { t.Errorf("expected %q got %v", want, got) } } + +func nextTrace(lines []string) ([]string, []string) { + var trace []string + for n, line := range lines { + if strings.HasPrefix(line, "---") { + return trace, lines[n+1:] + } + fields := strings.Fields(strings.TrimSpace(line)) + if len(fields) == 0 { + continue + } + // Last field contains the function name. + trace = append(trace, fields[len(fields)-1]) + } + return nil, nil +} + +func findTrace(text, top string) []string { + lines := strings.Split(text, "\n") + _, lines = nextTrace(lines) // Skip the header. + for len(lines) > 0 { + var t []string + t, lines = nextTrace(lines) + if len(t) == 0 { + continue + } + if t[0] == top { + return t + } + } + return nil +} diff --git a/libgo/go/runtime/crash_test.go b/libgo/go/runtime/crash_test.go index 91a5c161097..6627bdc6726 100644 --- a/libgo/go/runtime/crash_test.go +++ b/libgo/go/runtime/crash_test.go @@ -657,6 +657,9 @@ func TestTimePprof(t *testing.T) { if runtime.Compiler == "gccgo" { t.Skip("gccgo may not have the pprof tool") } + if runtime.GOOS == "aix" { + t.Skip("pprof not yet available on AIX (see golang.org/issue/28555)") + } fn := runTestProg(t, "testprog", "TimeProf") fn = strings.TrimSpace(fn) defer os.Remove(fn) diff --git a/libgo/go/runtime/debug/mod.go b/libgo/go/runtime/debug/mod.go new file mode 100644 index 00000000000..f2948c65cbd --- /dev/null +++ b/libgo/go/runtime/debug/mod.go @@ -0,0 +1,112 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package debug + +import ( + "strings" + _ "unsafe" // for go:linkname +) + +// set using cmd/go/internal/modload.ModInfoProg +var modinfo string + +// setmodinfo is visible to code generated by cmd/go/internal/modload.ModInfoProg. +//go:linkname setmodinfo runtime..z2fdebug.setmodinfo +func setmodinfo(s string) { + modinfo = s +} + +// ReadBuildInfo returns the build information embedded +// in the running binary. The information is available only +// in binaries built with module support. +func ReadBuildInfo() (info *BuildInfo, ok bool) { + return readBuildInfo(modinfo) +} + +// BuildInfo represents the build information read from +// the running binary. +type BuildInfo struct { + Path string // The main package path + Main Module // The main module information + Deps []*Module // Module dependencies +} + +// Module represents a module. +type Module struct { + Path string // module path + Version string // module version + Sum string // checksum + Replace *Module // replaced by this module +} + +func readBuildInfo(data string) (*BuildInfo, bool) { + if len(data) < 32 { + return nil, false + } + data = data[16 : len(data)-16] + + const ( + pathLine = "path\t" + modLine = "mod\t" + depLine = "dep\t" + repLine = "=>\t" + ) + + info := &BuildInfo{} + + var line string + // Reverse of cmd/go/internal/modload.PackageBuildInfo + for len(data) > 0 { + i := strings.IndexByte(data, '\n') + if i < 0 { + break + } + line, data = data[:i], data[i+1:] + switch { + case strings.HasPrefix(line, pathLine): + elem := line[len(pathLine):] + info.Path = elem + case strings.HasPrefix(line, modLine): + elem := strings.Split(line[len(modLine):], "\t") + if len(elem) != 3 { + return nil, false + } + info.Main = Module{ + Path: elem[0], + Version: elem[1], + Sum: elem[2], + } + case strings.HasPrefix(line, depLine): + elem := strings.Split(line[len(depLine):], "\t") + if len(elem) != 2 && len(elem) != 3 { + return nil, false + } + sum := "" + if len(elem) == 3 { + sum = elem[2] + } + info.Deps = append(info.Deps, &Module{ + Path: elem[0], + Version: elem[1], + Sum: sum, + }) + case strings.HasPrefix(line, repLine): + elem := strings.Split(line[len(repLine):], "\t") + if len(elem) != 3 { + return nil, false + } + last := len(info.Deps) - 1 + if last < 0 { + return nil, false + } + info.Deps[last].Replace = &Module{ + Path: elem[0], + Version: elem[1], + Sum: elem[2], + } + } + } + return info, true +} diff --git a/libgo/go/runtime/debug_test.go b/libgo/go/runtime/debug_test.go index 38c764fadb3..12d93de3047 100644 --- a/libgo/go/runtime/debug_test.go +++ b/libgo/go/runtime/debug_test.go @@ -18,6 +18,8 @@ package runtime_test import ( "fmt" + "io/ioutil" + "regexp" "runtime" "runtime/debug" "sync/atomic" @@ -26,12 +28,23 @@ import ( ) func startDebugCallWorker(t *testing.T) (g *runtime.G, after func()) { + // This can deadlock if run under a debugger because it + // depends on catching SIGTRAP, which is usually swallowed by + // a debugger. + skipUnderDebugger(t) + // This can deadlock if there aren't enough threads or if a GC - // tries to interrupt an atomic loop (see issue #10958). - ogomaxprocs := runtime.GOMAXPROCS(2) + // tries to interrupt an atomic loop (see issue #10958). We + // use 8 Ps so there's room for the debug call worker, + // something that's trying to preempt the call worker, and the + // goroutine that's trying to stop the call worker. + ogomaxprocs := runtime.GOMAXPROCS(8) ogcpercent := debug.SetGCPercent(-1) - ready := make(chan *runtime.G) + // ready is a buffered channel so debugCallWorker won't block + // on sending to it. This makes it less likely we'll catch + // debugCallWorker while it's in the runtime. + ready := make(chan *runtime.G, 1) var stop uint32 done := make(chan error) go debugCallWorker(ready, &stop, done) @@ -61,6 +74,10 @@ func debugCallWorker(ready chan<- *runtime.G, stop *uint32, done chan<- error) { close(done) } +// Don't inline this function, since we want to test adjusting +// pointers in the arguments. +// +//go:noinline func debugCallWorker2(stop *uint32, x *int) { for atomic.LoadUint32(stop) == 0 { // Strongly encourage x to live in a register so we @@ -74,6 +91,28 @@ func debugCallTKill(tid int) error { return syscall.Tgkill(syscall.Getpid(), tid, syscall.SIGTRAP) } +// skipUnderDebugger skips the current test when running under a +// debugger (specifically if this process has a tracer). This is +// Linux-specific. +func skipUnderDebugger(t *testing.T) { + pid := syscall.Getpid() + status, err := ioutil.ReadFile(fmt.Sprintf("/proc/%d/status", pid)) + if err != nil { + t.Logf("couldn't get proc tracer: %s", err) + return + } + re := regexp.MustCompile(`TracerPid:\s+([0-9]+)`) + sub := re.FindSubmatch(status) + if sub == nil { + t.Logf("couldn't find proc tracer PID") + return + } + if string(sub[1]) == "0" { + return + } + t.Skip("test will deadlock under a debugger") +} + func TestDebugCall(t *testing.T) { g, after := startDebugCallWorker(t) defer after() @@ -161,9 +200,11 @@ func debugCallUnsafePointWorker(gpp **runtime.G, ready, stop *uint32) { } func TestDebugCallUnsafePoint(t *testing.T) { + skipUnderDebugger(t) + // This can deadlock if there aren't enough threads or if a GC // tries to interrupt an atomic loop (see issue #10958). - defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(2)) + defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(8)) defer debug.SetGCPercent(debug.SetGCPercent(-1)) // Test that the runtime refuses call injection at unsafe points. @@ -182,8 +223,10 @@ func TestDebugCallUnsafePoint(t *testing.T) { } func TestDebugCallPanic(t *testing.T) { + skipUnderDebugger(t) + // This can deadlock if there aren't enough threads. - defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(2)) + defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(8)) ready := make(chan *runtime.G) var stop uint32 diff --git a/libgo/go/runtime/env_posix.go b/libgo/go/runtime/env_posix.go index 399e88f420d..7e44f14edfc 100644 --- a/libgo/go/runtime/env_posix.go +++ b/libgo/go/runtime/env_posix.go @@ -12,9 +12,32 @@ func gogetenv(key string) string { throw("getenv before env init") } for _, s := range env { - if len(s) > len(key) && s[len(key)] == '=' && s[:len(key)] == key { + if len(s) > len(key) && s[len(key)] == '=' && envKeyEqual(s[:len(key)], key) { return s[len(key)+1:] } } return "" } + +// envKeyEqual reports whether a == b, with ASCII-only case insensitivity +// on Windows. The two strings must have the same length. +func envKeyEqual(a, b string) bool { + if GOOS == "windows" { // case insensitive + for i := 0; i < len(a); i++ { + ca, cb := a[i], b[i] + if ca == cb || lowerASCII(ca) == lowerASCII(cb) { + continue + } + return false + } + return true + } + return a == b +} + +func lowerASCII(c byte) byte { + if 'A' <= c && c <= 'Z' { + return c + ('a' - 'A') + } + return c +} diff --git a/libgo/go/runtime/export_debug_test.go b/libgo/go/runtime/export_debug_test.go index 2d2d5356edc..608d7567808 100644 --- a/libgo/go/runtime/export_debug_test.go +++ b/libgo/go/runtime/export_debug_test.go @@ -51,19 +51,31 @@ func InjectDebugCall(gp *g, fn, args interface{}, tkill func(tid int) error) (in h.gp = gp h.fv, h.argp, h.argSize = fv, argp, argSize h.handleF = h.handle // Avoid allocating closure during signal - noteclear(&h.done) defer func() { testSigtrap = nil }() - testSigtrap = h.inject - if err := tkill(tid); err != nil { - return nil, err - } - // Wait for completion. - notetsleepg(&h.done, -1) - if len(h.err) != 0 { - return nil, h.err + for i := 0; ; i++ { + testSigtrap = h.inject + noteclear(&h.done) + h.err = "" + + if err := tkill(tid); err != nil { + return nil, err + } + // Wait for completion. + notetsleepg(&h.done, -1) + if h.err != "" { + switch h.err { + case "retry _Grunnable", "executing on Go runtime stack": + // These are transient states. Try to get out of them. + if i < 100 { + Gosched() + continue + } + } + return nil, h.err + } + return h.panic, nil } - return h.panic, nil } type debugCallHandler struct { @@ -100,12 +112,18 @@ func (h *debugCallHandler) inject(info *siginfo, ctxt *sigctxt, gp2 *g) bool { h.savedRegs.fpstate = nil // Set PC to debugCallV1. ctxt.set_rip(uint64(funcPC(debugCallV1))) + // Call injected. Switch to the debugCall protocol. + testSigtrap = h.handleF + case _Grunnable: + // Ask InjectDebugCall to pause for a bit and then try + // again to interrupt this goroutine. + h.err = plainError("retry _Grunnable") + notewakeup(&h.done) default: h.err = plainError("goroutine in unexpected state at call inject") - return true + notewakeup(&h.done) } - // Switch to the debugCall protocol and resume execution. - testSigtrap = h.handleF + // Resume execution. return true } @@ -116,7 +134,7 @@ func (h *debugCallHandler) handle(info *siginfo, ctxt *sigctxt, gp2 *g) bool { return false } f := findfunc(uintptr(ctxt.rip())) - if !(hasprefix(funcname(f), "runtime.debugCall") || hasprefix(funcname(f), "debugCall")) { + if !(hasPrefix(funcname(f), "runtime.debugCall") || hasPrefix(funcname(f), "debugCall")) { println("trap in unknown function", funcname(f)) return false } @@ -150,6 +168,7 @@ func (h *debugCallHandler) handle(info *siginfo, ctxt *sigctxt, gp2 *g) bool { sp := ctxt.rsp() reason := *(*string)(unsafe.Pointer(uintptr(sp))) h.err = plainError(reason) + // Don't wake h.done. We need to transition to status 16 first. case 16: // Restore all registers except RIP and RSP. rip, rsp := ctxt.rip(), ctxt.rsp() @@ -163,6 +182,7 @@ func (h *debugCallHandler) handle(info *siginfo, ctxt *sigctxt, gp2 *g) bool { notewakeup(&h.done) default: h.err = plainError("unexpected debugCallV1 status") + notewakeup(&h.done) } // Resume execution. return true diff --git a/libgo/go/runtime/export_test.go b/libgo/go/runtime/export_test.go index 7f4811c5a0c..d919e0486b2 100644 --- a/libgo/go/runtime/export_test.go +++ b/libgo/go/runtime/export_test.go @@ -292,6 +292,7 @@ func ReadMemStatsSlow() (base, slow MemStats) { slow.TotalAlloc = 0 slow.Mallocs = 0 slow.Frees = 0 + slow.HeapReleased = 0 var bySize [_NumSizeClasses]struct { Mallocs, Frees uint64 } @@ -331,6 +332,10 @@ func ReadMemStatsSlow() (base, slow MemStats) { slow.BySize[i].Frees = bySize[i].Frees } + for i := mheap_.scav.start(); i.valid(); i = i.next() { + slow.HeapReleased += uint64(i.span().released()) + } + getg().m.mallocing-- }) @@ -454,3 +459,39 @@ func stackOverflow(x *byte) { var buf [256]byte stackOverflow(&buf[0]) } + +func MapTombstoneCheck(m map[int]int) { + // Make sure emptyOne and emptyRest are distributed correctly. + // We should have a series of filled and emptyOne cells, followed by + // a series of emptyRest cells. + h := *(**hmap)(unsafe.Pointer(&m)) + i := interface{}(m) + t := *(**maptype)(unsafe.Pointer(&i)) + + for x := 0; x < 1<<h.B; x++ { + b0 := (*bmap)(add(h.buckets, uintptr(x)*uintptr(t.bucketsize))) + n := 0 + for b := b0; b != nil; b = b.overflow(t) { + for i := 0; i < bucketCnt; i++ { + if b.tophash[i] != emptyRest { + n++ + } + } + } + k := 0 + for b := b0; b != nil; b = b.overflow(t) { + for i := 0; i < bucketCnt; i++ { + if k < n && b.tophash[i] == emptyRest { + panic("early emptyRest") + } + if k >= n && b.tophash[i] != emptyRest { + panic("late non-emptyRest") + } + if k == n-1 && b.tophash[i] == emptyOne { + panic("last non-emptyRest entry is emptyOne") + } + k++ + } + } + } +} diff --git a/libgo/go/runtime/extern.go b/libgo/go/runtime/extern.go index c9d10f16801..d07a5ed024f 100644 --- a/libgo/go/runtime/extern.go +++ b/libgo/go/runtime/extern.go @@ -50,19 +50,13 @@ It is a comma-separated list of name=val pairs setting these named variables: gcshrinkstackoff: setting gcshrinkstackoff=1 disables moving goroutines onto smaller stacks. In this mode, a goroutine's stack can only grow. - gcrescanstacks: setting gcrescanstacks=1 enables stack - re-scanning during the STW mark termination phase. This is - helpful for debugging if objects are being prematurely - garbage collected. - gcstoptheworld: setting gcstoptheworld=1 disables concurrent garbage collection, making every garbage collection a stop-the-world event. Setting gcstoptheworld=2 also disables concurrent sweeping after the garbage collection finishes. gctrace: setting gctrace=1 causes the garbage collector to emit a single line to standard error at each collection, summarizing the amount of memory collected and the - length of the pause. Setting gctrace=2 emits the same summary but also - repeats each collection. The format of this line is subject to change. + length of the pause. The format of this line is subject to change. Currently, it is: gc # @#s #%: #+#+# ms clock, #+#/#/#+# ms cpu, #->#-># MB, # MB goal, # P where the fields are as follows: @@ -95,6 +89,11 @@ It is a comma-separated list of name=val pairs setting these named variables: released: # MB released to the system consumed: # MB allocated from the system + madvdontneed: setting madvdontneed=1 will use MADV_DONTNEED + instead of MADV_FREE on Linux when returning memory to the + kernel. This is less efficient, but causes RSS numbers to drop + more quickly. + memprofilerate: setting memprofilerate=X will update the value of runtime.MemProfileRate. When set to 0 memory profiling is disabled. Refer to the description of MemProfileRate for the default value. @@ -206,6 +205,7 @@ func Version() string { // GOOS is the running program's operating system target: // one of darwin, freebsd, linux, and so on. +// To view possible combinations of GOOS and GOARCH, run "go tool dist list". const GOOS string = sys.GOOS // GOARCH is the running program's architecture target: diff --git a/libgo/go/runtime/fastlog2table.go b/libgo/go/runtime/fastlog2table.go index c36d5835f64..6ba4a7d3f24 100644 --- a/libgo/go/runtime/fastlog2table.go +++ b/libgo/go/runtime/fastlog2table.go @@ -1,4 +1,4 @@ -// AUTO-GENERATED by mkfastlog2table.go +// Code generated by mkfastlog2table.go; DO NOT EDIT. // Run go generate from src/runtime to update. // See mkfastlog2table.go for comments. diff --git a/libgo/go/runtime/gc_test.go b/libgo/go/runtime/gc_test.go index 180919bf3c4..384b75f2de3 100644 --- a/libgo/go/runtime/gc_test.go +++ b/libgo/go/runtime/gc_test.go @@ -22,6 +22,12 @@ func TestGcSys(t *testing.T) { if os.Getenv("GOGC") == "off" { t.Skip("skipping test; GOGC=off in environment") } + if runtime.GOOS == "windows" { + t.Skip("skipping test; GOOS=windows http://golang.org/issue/27156") + } + if runtime.GOOS == "linux" && runtime.GOARCH == "arm64" { + t.Skip("skipping test; GOOS=linux GOARCH=arm64 https://github.com/golang/go/issues/27636") + } got := runTestProg(t, "testprog", "GCSys") want := "OK\n" if got != want { @@ -571,8 +577,8 @@ func BenchmarkWriteBarrier(b *testing.B) { n := &node{mkTree(level - 1), mkTree(level - 1)} if level == 10 { // Seed GC with enough early pointers so it - // doesn't accidentally switch to mark 2 when - // it only has the top of the tree. + // doesn't start termination barriers when it + // only has the top of the tree. wbRoots = append(wbRoots, n) } return n diff --git a/libgo/go/runtime/gcinfo_test.go b/libgo/go/runtime/gcinfo_test.go index ca012bbcd45..89144d520e3 100644 --- a/libgo/go/runtime/gcinfo_test.go +++ b/libgo/go/runtime/gcinfo_test.go @@ -21,14 +21,46 @@ const ( func TestGCInfo(t *testing.T) { t.Skip("skipping on gccgo for now") - verifyGCInfo(t, "stack Ptr", new(Ptr), infoPtr) - verifyGCInfo(t, "stack ScalarPtr", new(ScalarPtr), infoScalarPtr) - verifyGCInfo(t, "stack PtrScalar", new(PtrScalar), infoPtrScalar) - verifyGCInfo(t, "stack BigStruct", new(BigStruct), infoBigStruct()) - verifyGCInfo(t, "stack string", new(string), infoString) - verifyGCInfo(t, "stack slice", new([]string), infoSlice) - verifyGCInfo(t, "stack eface", new(interface{}), infoEface) - verifyGCInfo(t, "stack iface", new(Iface), infoIface) + { + var x Ptr + verifyGCInfo(t, "stack Ptr", &x, infoPtr) + runtime.KeepAlive(x) + } + { + var x ScalarPtr + verifyGCInfo(t, "stack ScalarPtr", &x, infoScalarPtr) + runtime.KeepAlive(x) + } + { + var x PtrScalar + verifyGCInfo(t, "stack PtrScalar", &x, infoPtrScalar) + runtime.KeepAlive(x) + } + { + var x BigStruct + verifyGCInfo(t, "stack BigStruct", &x, infoBigStruct()) + runtime.KeepAlive(x) + } + { + var x string + verifyGCInfo(t, "stack string", &x, infoString) + runtime.KeepAlive(x) + } + { + var x []string + verifyGCInfo(t, "stack slice", &x, infoSlice) + runtime.KeepAlive(x) + } + { + var x interface{} + verifyGCInfo(t, "stack eface", &x, infoEface) + runtime.KeepAlive(x) + } + { + var x Iface + verifyGCInfo(t, "stack iface", &x, infoIface) + runtime.KeepAlive(x) + } for i := 0; i < 10; i++ { verifyGCInfo(t, "heap Ptr", escape(new(Ptr)), trimDead(padDead(infoPtr))) diff --git a/libgo/go/runtime/hash_test.go b/libgo/go/runtime/hash_test.go index 070edb6e9b6..d57be4c8f73 100644 --- a/libgo/go/runtime/hash_test.go +++ b/libgo/go/runtime/hash_test.go @@ -177,13 +177,13 @@ func twoNonZero(h *HashSet, n int) { b := make([]byte, n) // all zero - h.addB(b[:]) + h.addB(b) // one non-zero byte for i := 0; i < n; i++ { for x := 1; x < 256; x++ { b[i] = byte(x) - h.addB(b[:]) + h.addB(b) b[i] = 0 } } @@ -195,7 +195,7 @@ func twoNonZero(h *HashSet, n int) { for j := i + 1; j < n; j++ { for y := 1; y < 256; y++ { b[j] = byte(y) - h.addB(b[:]) + h.addB(b) b[j] = 0 } } diff --git a/libgo/go/runtime/heapdump.go b/libgo/go/runtime/heapdump.go index 5ebebf69474..3aa9e8a23f2 100644 --- a/libgo/go/runtime/heapdump.go +++ b/libgo/go/runtime/heapdump.go @@ -250,7 +250,7 @@ func dumpgoroutine(gp *g) { dumpint(uint64(gp.goid)) dumpint(uint64(gp.gopc)) dumpint(uint64(readgstatus(gp))) - dumpbool(isSystemGoroutine(gp)) + dumpbool(isSystemGoroutine(gp, false)) dumpbool(false) // isbackground dumpint(uint64(gp.waitsince)) dumpstr(gp.waitreason.String()) @@ -313,7 +313,7 @@ func finq_callback(fn *funcval, obj unsafe.Pointer, ft *functype, ot *ptrtype) { func dumproots() { // MSpan.types for _, s := range mheap_.allspans { - if s.state == _MSpanInUse { + if s.state == mSpanInUse { // Finalizers for sp := s.specials; sp != nil; sp = sp.next { if sp.kind != _KindSpecialFinalizer { @@ -336,7 +336,7 @@ var freemark [_PageSize / 8]bool func dumpobjs() { for _, s := range mheap_.allspans { - if s.state != _MSpanInUse { + if s.state != mSpanInUse { continue } p := s.base() @@ -485,7 +485,7 @@ func dumpmemprof_callback(b *bucket, nstk uintptr, pstk *location, size, allocs, func dumpmemprof() { iterate_memprof(dumpmemprof_callback) for _, s := range mheap_.allspans { - if s.state != _MSpanInUse { + if s.state != mSpanInUse { continue } for sp := s.specials; sp != nil; sp = sp.next { @@ -506,7 +506,7 @@ var dumphdr = []byte("go1.7 heap dump\n") func mdump() { // make sure we're done sweeping for _, s := range mheap_.allspans { - if s.state == _MSpanInUse { + if s.state == mSpanInUse { s.ensureSwept() } } @@ -529,7 +529,7 @@ func writeheapdump_m(fd uintptr) { _g_.waitreason = waitReasonDumpingHeap // Update stats so we can dump them. - // As a side effect, flushes all the MCaches so the MSpan.freelist + // As a side effect, flushes all the mcaches so the mspan.freelist // lists contain all the free objects. updatememstats() diff --git a/libgo/go/runtime/internal/atomic/atomic.c b/libgo/go/runtime/internal/atomic/atomic.c index b87fae96bd3..17c83a28c1c 100644 --- a/libgo/go/runtime/internal/atomic/atomic.c +++ b/libgo/go/runtime/internal/atomic/atomic.c @@ -13,7 +13,7 @@ uint32_t Load (uint32_t *ptr) uint32_t Load (uint32_t *ptr) { - return __atomic_load_n (ptr, __ATOMIC_ACQUIRE); + return __atomic_load_n (ptr, __ATOMIC_SEQ_CST); } void *Loadp (void *ptr) @@ -23,7 +23,7 @@ void *Loadp (void *ptr) void * Loadp (void *ptr) { - return __atomic_load_n ((void **) ptr, __ATOMIC_ACQUIRE); + return __atomic_load_n ((void **) ptr, __ATOMIC_SEQ_CST); } uint64_t Load64 (uint64_t *ptr) @@ -35,6 +35,16 @@ Load64 (uint64_t *ptr) { if (((uintptr_t) ptr & 7) != 0) panicmem (); + return __atomic_load_n (ptr, __ATOMIC_SEQ_CST); +} + +uint32_t LoadAcq (uint32_t *ptr) + __asm__ (GOSYM_PREFIX "runtime..z2finternal..z2fatomic.LoadAcq") + __attribute__ ((no_split_stack)); + +uint32_t +LoadAcq (uint32_t *ptr) +{ return __atomic_load_n (ptr, __ATOMIC_ACQUIRE); } @@ -45,7 +55,7 @@ uintptr_t Loaduintptr (uintptr_t *ptr) uintptr_t Loaduintptr (uintptr_t *ptr) { - return __atomic_load_n (ptr, __ATOMIC_ACQUIRE); + return __atomic_load_n (ptr, __ATOMIC_SEQ_CST); } uintgo Loaduint (uintgo *ptr) @@ -55,7 +65,7 @@ uintgo Loaduint (uintgo *ptr) uintgo Loaduint (uintgo *ptr) { - return __atomic_load_n (ptr, __ATOMIC_ACQUIRE); + return __atomic_load_n (ptr, __ATOMIC_SEQ_CST); } int64_t Loadint64 (int64_t *ptr) @@ -67,7 +77,7 @@ Loadint64 (int64_t *ptr) { if (((uintptr_t) ptr & 7) != 0) panicmem (); - return __atomic_load_n (ptr, __ATOMIC_ACQUIRE); + return __atomic_load_n (ptr, __ATOMIC_SEQ_CST); } uint32_t Xadd (uint32_t *ptr, int32_t delta) @@ -188,6 +198,16 @@ Cas64 (uint64_t *ptr, uint64_t old, uint64_t new) return __atomic_compare_exchange_n (ptr, &old, new, false, __ATOMIC_SEQ_CST, __ATOMIC_RELAXED); } +_Bool CasRel (uint32_t *ptr, uint32_t old, uint32_t new) + __asm__ (GOSYM_PREFIX "runtime..z2finternal..z2fatomic.CasRel") + __attribute__ ((no_split_stack)); + +_Bool +CasRel (uint32_t *ptr, uint32_t old, uint32_t new) +{ + return __atomic_compare_exchange_n (ptr, &old, new, false, __ATOMIC_RELEASE, __ATOMIC_RELAXED); +} + _Bool Casp1 (void **ptr, void *old, void *new) __asm__ (GOSYM_PREFIX "runtime..z2finternal..z2fatomic.Casp1") __attribute__ ((no_split_stack)); @@ -230,6 +250,16 @@ Store64 (uint64_t *ptr, uint64_t val) __atomic_store_n (ptr, val, __ATOMIC_SEQ_CST); } +void StoreRel (uint32_t *ptr, uint32_t val) + __asm__ (GOSYM_PREFIX "runtime..z2finternal..z2fatomic.StoreRel") + __attribute__ ((no_split_stack)); + +void +StoreRel (uint32_t *ptr, uint32_t val) +{ + __atomic_store_n (ptr, val, __ATOMIC_RELEASE); +} + void Storeuintptr (uintptr_t *ptr, uintptr_t val) __asm__ (GOSYM_PREFIX "runtime..z2finternal..z2fatomic.Storeuintptr") __attribute__ ((no_split_stack)); diff --git a/libgo/go/runtime/internal/atomic/gccgo.go b/libgo/go/runtime/internal/atomic/gccgo.go index 696736465fd..e5edbfb17f1 100644 --- a/libgo/go/runtime/internal/atomic/gccgo.go +++ b/libgo/go/runtime/internal/atomic/gccgo.go @@ -18,6 +18,9 @@ func Loadp(ptr unsafe.Pointer) unsafe.Pointer func Load64(ptr *uint64) uint64 //go:noescape +func LoadAcq(ptr *uint32) uint32 + +//go:noescape func Xadd(ptr *uint32, delta int32) uint32 //go:noescape @@ -47,11 +50,17 @@ func Or8(ptr *uint8, val uint8) func Cas64(ptr *uint64, old, new uint64) bool //go:noescape +func CasRel(ptr *uint32, old, new uint32) bool + +//go:noescape func Store(ptr *uint32, val uint32) //go:noescape func Store64(ptr *uint64, val uint64) +//go:noescape +func StoreRel(ptr *uint32, val uint32) + // StorepNoWB performs *ptr = val atomically and without a write // barrier. // diff --git a/libgo/go/runtime/internal/math/math.go b/libgo/go/runtime/internal/math/math.go new file mode 100644 index 00000000000..5385f5dd868 --- /dev/null +++ b/libgo/go/runtime/internal/math/math.go @@ -0,0 +1,19 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package math + +import "runtime/internal/sys" + +const MaxUintptr = ^uintptr(0) + +// MulUintptr returns a * b and whether the multiplication overflowed. +// On supported platforms this is an intrinsic lowered by the compiler. +func MulUintptr(a, b uintptr) (uintptr, bool) { + if a|b < 1<<(4*sys.PtrSize) || a == 0 { + return a * b, false + } + overflow := b > MaxUintptr/a + return a * b, overflow +} diff --git a/libgo/go/runtime/internal/math/math_test.go b/libgo/go/runtime/internal/math/math_test.go new file mode 100644 index 00000000000..303eb63405a --- /dev/null +++ b/libgo/go/runtime/internal/math/math_test.go @@ -0,0 +1,79 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package math_test + +import ( + . "runtime/internal/math" + "testing" +) + +const ( + UintptrSize = 32 << (^uintptr(0) >> 63) +) + +type mulUintptrTest struct { + a uintptr + b uintptr + overflow bool +} + +var mulUintptrTests = []mulUintptrTest{ + {0, 0, false}, + {1000, 1000, false}, + {MaxUintptr, 0, false}, + {MaxUintptr, 1, false}, + {MaxUintptr / 2, 2, false}, + {MaxUintptr / 2, 3, true}, + {MaxUintptr, 10, true}, + {MaxUintptr, 100, true}, + {MaxUintptr / 100, 100, false}, + {MaxUintptr / 1000, 1001, true}, + {1<<(UintptrSize/2) - 1, 1<<(UintptrSize/2) - 1, false}, + {1 << (UintptrSize / 2), 1 << (UintptrSize / 2), true}, + {MaxUintptr >> 32, MaxUintptr >> 32, false}, + {MaxUintptr, MaxUintptr, true}, +} + +func TestMulUintptr(t *testing.T) { + for _, test := range mulUintptrTests { + a, b := test.a, test.b + for i := 0; i < 2; i++ { + mul, overflow := MulUintptr(a, b) + if mul != a*b || overflow != test.overflow { + t.Errorf("MulUintptr(%v, %v) = %v, %v want %v, %v", + a, b, mul, overflow, a*b, test.overflow) + } + a, b = b, a + } + } +} + +var SinkUintptr uintptr +var SinkBool bool + +var x, y uintptr + +func BenchmarkMulUintptr(b *testing.B) { + x, y = 1, 2 + b.Run("small", func(b *testing.B) { + for i := 0; i < b.N; i++ { + var overflow bool + SinkUintptr, overflow = MulUintptr(x, y) + if overflow { + SinkUintptr = 0 + } + } + }) + x, y = MaxUintptr, MaxUintptr-1 + b.Run("large", func(b *testing.B) { + for i := 0; i < b.N; i++ { + var overflow bool + SinkUintptr, overflow = MulUintptr(x, y) + if overflow { + SinkUintptr = 0 + } + } + }) +} diff --git a/libgo/go/runtime/lfstack_64bit.go b/libgo/go/runtime/lfstack_64bit.go index 401f83d9d5b..de40a00324a 100644 --- a/libgo/go/runtime/lfstack_64bit.go +++ b/libgo/go/runtime/lfstack_64bit.go @@ -41,8 +41,8 @@ const ( ia64CntBits = 64 - ia64AddrBits + 3 // On AIX, 64-bit addresses are split into 36-bit segment number and 28-bit - // offset in segment. Segment numbers in the range 0x070000000-0x07FFFFFFF - // and 0x0A0000000-0x0AFFFFFFF(LSA) are available for mmap. + // offset in segment. Segment numbers in the range 0x0A0000000-0x0AFFFFFFF(LSA) + // are available for mmap. // We assume all lfnode addresses are from memory allocated with mmap. // We use one bit to distinguish between the two ranges. aixAddrBits = 57 @@ -77,11 +77,7 @@ func lfstackUnpack(val uint64) *lfnode { return (*lfnode)(unsafe.Pointer(uintptr(((val & (1<<(64-3) - 1)) >> ia64CntBits << 3) | val&^(1<<(64-3)-1)))) } if GOARCH == "ppc64" && GOOS == "aix" { - if val&(1<<63) != 0 { - return (*lfnode)(unsafe.Pointer(uintptr((val >> aixCntBits << 3) | 0x7<<56))) - } else { - return (*lfnode)(unsafe.Pointer(uintptr((val >> aixCntBits << 3) | 0xa<<56))) - } + return (*lfnode)(unsafe.Pointer(uintptr((val >> aixCntBits << 3) | 0xa<<56))) } return (*lfnode)(unsafe.Pointer(uintptr(val >> cntBits << 3))) } diff --git a/libgo/go/runtime/lock_futex.go b/libgo/go/runtime/lock_futex.go index f7ca1f036e5..9cede2d41c3 100644 --- a/libgo/go/runtime/lock_futex.go +++ b/libgo/go/runtime/lock_futex.go @@ -242,7 +242,7 @@ func notetsleepg(n *note, ns int64) bool { return ok } -func pauseSchedulerUntilCallback() bool { +func beforeIdle() bool { return false } diff --git a/libgo/go/runtime/lock_js.go b/libgo/go/runtime/lock_js.go index df321e51963..f58c915b630 100644 --- a/libgo/go/runtime/lock_js.go +++ b/libgo/go/runtime/lock_js.go @@ -92,7 +92,7 @@ func notetsleepg(n *note, ns int64) bool { delay = 1<<31 - 1 // cap to max int32 } - id := scheduleCallback(delay) + id := scheduleTimeoutEvent(delay) mp := acquirem() notes[n] = gp notesWithTimeout[n] = noteWithTimeout{gp: gp, deadline: deadline} @@ -100,7 +100,7 @@ func notetsleepg(n *note, ns int64) bool { gopark(nil, nil, waitReasonSleep, traceEvNone, 1) - clearScheduledCallback(id) // note might have woken early, clear timeout + clearTimeoutEvent(id) // note might have woken early, clear timeout mp = acquirem() delete(notes, n) delete(notesWithTimeout, n) @@ -127,46 +127,68 @@ func notetsleepg(n *note, ns int64) bool { func checkTimeouts() { now := nanotime() for n, nt := range notesWithTimeout { - if n.key == note_cleared && now > nt.deadline { + if n.key == note_cleared && now >= nt.deadline { n.key = note_timeout goready(nt.gp, 1) } } } -var waitingForCallback *g +var returnedEventHandler *g -// sleepUntilCallback puts the current goroutine to sleep until a callback is triggered. -// It is currently only used by the callback routine of the syscall/js package. -//go:linkname sleepUntilCallback syscall/js.sleepUntilCallback -func sleepUntilCallback() { - waitingForCallback = getg() +func init() { + // At the toplevel we need an extra goroutine that handles asynchronous events. + initg := getg() + go func() { + returnedEventHandler = getg() + goready(initg, 1) + + gopark(nil, nil, waitReasonZero, traceEvNone, 1) + returnedEventHandler = nil + + pause(getcallersp() - 16) + }() gopark(nil, nil, waitReasonZero, traceEvNone, 1) - waitingForCallback = nil } -// pauseSchedulerUntilCallback gets called from the scheduler and pauses the execution -// of Go's WebAssembly code until a callback is triggered. Then it checks for note timeouts -// and resumes goroutines that are waiting for a callback. -func pauseSchedulerUntilCallback() bool { - if waitingForCallback == nil && len(notesWithTimeout) == 0 { - return false +// beforeIdle gets called by the scheduler if no goroutine is awake. +// We resume the event handler (if available) which will pause the execution. +func beforeIdle() bool { + if returnedEventHandler != nil { + goready(returnedEventHandler, 1) + return true } + return false +} + +// pause sets SP to newsp and pauses the execution of Go's WebAssembly code until an event is triggered. +func pause(newsp uintptr) + +// scheduleTimeoutEvent tells the WebAssembly environment to trigger an event after ms milliseconds. +// It returns a timer id that can be used with clearTimeoutEvent. +func scheduleTimeoutEvent(ms int64) int32 + +// clearTimeoutEvent clears a timeout event scheduled by scheduleTimeoutEvent. +func clearTimeoutEvent(id int32) + +func handleEvent() { + prevReturnedEventHandler := returnedEventHandler + returnedEventHandler = nil - pause() checkTimeouts() - if waitingForCallback != nil { - goready(waitingForCallback, 1) - } - return true -} + eventHandler() + + returnedEventHandler = getg() + gopark(nil, nil, waitReasonZero, traceEvNone, 1) + + returnedEventHandler = prevReturnedEventHandler -// pause pauses the execution of Go's WebAssembly code until a callback is triggered. -func pause() + pause(getcallersp() - 16) +} -// scheduleCallback tells the WebAssembly environment to trigger a callback after ms milliseconds. -// It returns a timer id that can be used with clearScheduledCallback. -func scheduleCallback(ms int64) int32 +var eventHandler func() -// clearScheduledCallback clears a callback scheduled by scheduleCallback. -func clearScheduledCallback(id int32) +//go:linkname setEventHandler syscall/js.setEventHandler +func setEventHandler(fn func()) { + eventHandler = fn +} diff --git a/libgo/go/runtime/lock_sema.go b/libgo/go/runtime/lock_sema.go index 237513ca0cb..c748ca70ec3 100644 --- a/libgo/go/runtime/lock_sema.go +++ b/libgo/go/runtime/lock_sema.go @@ -295,7 +295,7 @@ func notetsleepg(n *note, ns int64) bool { return ok } -func pauseSchedulerUntilCallback() bool { +func beforeIdle() bool { return false } diff --git a/libgo/go/runtime/malloc.go b/libgo/go/runtime/malloc.go index ac4759ffbf1..36417fb54f4 100644 --- a/libgo/go/runtime/malloc.go +++ b/libgo/go/runtime/malloc.go @@ -106,6 +106,7 @@ package runtime import ( "runtime/internal/atomic" + "runtime/internal/math" "runtime/internal/sys" "unsafe" ) @@ -135,8 +136,6 @@ const ( // have the most objects per span. maxObjsPerSpan = pageSize / 8 - mSpanInUse = _MSpanInUse - concurrentSweep = _ConcurrentSweep _PageSize = 1 << _PageShift @@ -149,8 +148,7 @@ const ( _TinySize = 16 _TinySizeClass = int8(2) - _FixAllocChunk = 16 << 10 // Chunk size for FixAlloc - _MaxMHeapList = 1 << (20 - _PageShift) // Maximum page length for fixed-size list in MHeap. + _FixAllocChunk = 16 << 10 // Chunk size for FixAlloc // Per-P, per order stack segment cache size. _StackCacheSize = 32 * 1024 @@ -173,7 +171,7 @@ const ( // amd64, addresses are sign-extended beyond heapAddrBits. On // other arches, they are zero-extended. // - // On 64-bit platforms, we limit this to 48 bits based on a + // On most 64-bit platforms, we limit this to 48 bits based on a // combination of hardware and OS limitations. // // amd64 hardware limits addresses to 48 bits, sign-extended @@ -191,10 +189,9 @@ const ( // bits, in the range [0, 1<<48). // // ppc64, mips64, and s390x support arbitrary 64 bit addresses - // in hardware. However, since Go only supports Linux on - // these, we lean on OS limits. Based on Linux's processor.h, - // the user address space is limited as follows on 64-bit - // architectures: + // in hardware. On Linux, Go leans on stricter OS limits. Based + // on Linux's processor.h, the user address space is limited as + // follows on 64-bit architectures: // // Architecture Name Maximum Value (exclusive) // --------------------------------------------------------------------- @@ -211,13 +208,17 @@ const ( // exceed Go's 48 bit limit, it's extremely unlikely in // practice. // + // On aix/ppc64, the limits is increased to 1<<60 to accept addresses + // returned by mmap syscall. These are in range: + // 0x0a00000000000000 - 0x0afffffffffffff + // // On 32-bit platforms, we accept the full 32-bit address // space because doing so is cheap. // mips32 only has access to the low 2GB of virtual memory, so // we further limit it to 31 bits. // // WebAssembly currently has a limit of 4GB linear memory. - heapAddrBits = (_64bit*(1-sys.GoarchWasm))*48 + (1-_64bit+sys.GoarchWasm)*(32-(sys.GoarchMips+sys.GoarchMipsle)) + heapAddrBits = (_64bit*(1-sys.GoarchWasm)*(1-sys.GoosAix))*48 + (1-_64bit+sys.GoarchWasm)*(32-(sys.GoarchMips+sys.GoarchMipsle)) + 60*sys.GoosAix // maxAlloc is the maximum size of an allocation. On 64-bit, // it's theoretically possible to allocate 1<<heapAddrBits bytes. On @@ -229,16 +230,17 @@ const ( // The number of bits in a heap address, the size of heap // arenas, and the L1 and L2 arena map sizes are related by // - // (1 << addrBits) = arenaBytes * L1entries * L2entries + // (1 << addr bits) = arena size * L1 entries * L2 entries // // Currently, we balance these as follows: // - // Platform Addr bits Arena size L1 entries L2 size - // -------------- --------- ---------- ---------- ------- - // */64-bit 48 64MB 1 32MB - // windows/64-bit 48 4MB 64 8MB - // */32-bit 32 4MB 1 4KB - // */mips(le) 31 4MB 1 2KB + // Platform Addr bits Arena size L1 entries L2 entries + // -------------- --------- ---------- ---------- ----------- + // */64-bit 48 64MB 1 4M (32MB) + // aix/64-bit 60 256MB 4096 4M (32MB) + // windows/64-bit 48 4MB 64 1M (8MB) + // */32-bit 32 4MB 1 1024 (4KB) + // */mips(le) 31 4MB 1 512 (2KB) // heapArenaBytes is the size of a heap arena. The heap // consists of mappings of size heapArenaBytes, aligned to @@ -257,7 +259,7 @@ const ( // logHeapArenaBytes is log_2 of heapArenaBytes. For clarity, // prefer using heapArenaBytes where possible (we need the // constant to compute some other constants). - logHeapArenaBytes = (6+20)*(_64bit*(1-sys.GoosWindows)) + (2+20)*(_64bit*sys.GoosWindows) + (2+20)*(1-_64bit) + logHeapArenaBytes = (6+20)*(_64bit*(1-sys.GoosWindows)*(1-sys.GoosAix)) + (2+20)*(_64bit*sys.GoosWindows) + (2+20)*(1-_64bit) + (8+20)*sys.GoosAix // heapArenaBitmapBytes is the size of each heap arena's bitmap. heapArenaBitmapBytes = heapArenaBytes / (sys.PtrSize * 8 / 2) @@ -277,7 +279,10 @@ const ( // We use the L1 map on 64-bit Windows because the arena size // is small, but the address space is still 48 bits, and // there's a high cost to having a large L2. - arenaL1Bits = 6 * (_64bit * sys.GoosWindows) + // + // We use the L1 map on aix/ppc64 to keep the same L2 value + // as on Linux. + arenaL1Bits = 6*(_64bit*sys.GoosWindows) + 12*sys.GoosAix // arenaL2Bits is the number of bits of the arena number // covered by the second level arena index. @@ -339,27 +344,27 @@ var physPageSize uintptr // may use larger alignment, so the caller must be careful to realign the // memory obtained by sysAlloc. // -// SysUnused notifies the operating system that the contents +// sysUnused notifies the operating system that the contents // of the memory region are no longer needed and can be reused // for other purposes. -// SysUsed notifies the operating system that the contents +// sysUsed notifies the operating system that the contents // of the memory region are needed again. // -// SysFree returns it unconditionally; this is only used if +// sysFree returns it unconditionally; this is only used if // an out-of-memory error has been detected midway through -// an allocation. It is okay if SysFree is a no-op. +// an allocation. It is okay if sysFree is a no-op. // -// SysReserve reserves address space without allocating memory. +// sysReserve reserves address space without allocating memory. // If the pointer passed to it is non-nil, the caller wants the -// reservation there, but SysReserve can still choose another +// reservation there, but sysReserve can still choose another // location if that one is unavailable. -// NOTE: SysReserve returns OS-aligned memory, but the heap allocator +// NOTE: sysReserve returns OS-aligned memory, but the heap allocator // may use larger alignment, so the caller must be careful to realign the // memory obtained by sysAlloc. // -// SysMap maps previously reserved address space for use. +// sysMap maps previously reserved address space for use. // -// SysFault marks a (already sysAlloc'd) region to fault +// sysFault marks a (already sysAlloc'd) region to fault // if accessed. Used only for debugging the runtime. func mallocinit() { @@ -432,8 +437,8 @@ func mallocinit() { // allocation at 0x40 << 32 because when using 4k pages with 3-level // translation buffers, the user address space is limited to 39 bits // On darwin/arm64, the address space is even smaller. - // On AIX, mmap adresses range starts at 0x0700000000000000 for 64-bit - // processes. The new address space allocator starts at 0x0A00000000000000. + // On AIX, mmaps starts at 0x0A00000000000000 for 64-bit. + // processes. for i := 0x7f; i >= 0; i-- { var p uintptr switch { @@ -443,10 +448,11 @@ func mallocinit() { p = uintptr(i)<<40 | uintptrMask&(0x0040<<32) case GOOS == "aix": if i == 0 { - p = uintptrMask&(1<<42) | uintptrMask&(0xa0<<52) - } else { - p = uintptr(i)<<42 | uintptrMask&(0x70<<52) + // We don't use addresses directly after 0x0A00000000000000 + // to avoid collisions with others mmaps done by non-go programs. + continue } + p = uintptr(i)<<40 | uintptrMask&(0xa0<<52) case raceenabled: // The TSAN runtime requires the heap // to be in the range [0x00c000000000, @@ -480,7 +486,7 @@ func mallocinit() { // 3. We try to stake out a reasonably large initial // heap reservation. - const arenaMetaSize = unsafe.Sizeof([1 << arenaBits]heapArena{}) + const arenaMetaSize = (1 << arenaBits) * unsafe.Sizeof(heapArena{}) meta := uintptr(sysReserve(nil, arenaMetaSize)) if meta != 0 { mheap_.heapArenaAlloc.init(meta, arenaMetaSize) @@ -663,6 +669,27 @@ mapped: } } + // Add the arena to the arenas list. + if len(h.allArenas) == cap(h.allArenas) { + size := 2 * uintptr(cap(h.allArenas)) * sys.PtrSize + if size == 0 { + size = physPageSize + } + newArray := (*notInHeap)(persistentalloc(size, sys.PtrSize, &memstats.gc_sys)) + if newArray == nil { + throw("out of memory allocating allArenas") + } + oldSlice := h.allArenas + *(*notInHeapSlice)(unsafe.Pointer(&h.allArenas)) = notInHeapSlice{newArray, len(h.allArenas), int(size / sys.PtrSize)} + copy(h.allArenas, oldSlice) + // Do not free the old backing array because + // there may be concurrent readers. Since we + // double the array each time, this can lead + // to at most 2x waste. + } + h.allArenas = h.allArenas[:len(h.allArenas)+1] + h.allArenas[len(h.allArenas)-1] = ri + // Store atomically just in case an object from the // new heap arena becomes visible before the heap lock // is released (which shouldn't happen, but there's @@ -755,6 +782,9 @@ func nextFreeFast(s *mspan) gclinkptr { // weight allocation. If it is a heavy weight allocation the caller must // determine whether a new GC cycle needs to be started or if the GC is active // whether this goroutine needs to assist the GC. +// +// Must run in a non-preemptible context since otherwise the owner of +// c could change. func (c *mcache) nextFree(spc spanClass) (v gclinkptr, s *mspan, shouldhelpgc bool) { s = c.alloc[spc] shouldhelpgc = false @@ -765,9 +795,7 @@ func (c *mcache) nextFree(spc spanClass) (v gclinkptr, s *mspan, shouldhelpgc bo println("runtime: s.allocCount=", s.allocCount, "s.nelems=", s.nelems) throw("s.allocCount != s.nelems && freeIndex == s.nelems") } - systemstack(func() { - c.refill(spc) - }) + c.refill(spc) shouldhelpgc = true s = c.alloc[spc] @@ -1018,7 +1046,7 @@ func mallocgc(size uintptr, typ *_type, needzero bool) unsafe.Pointer { if shouldhelpgc { if t := (gcTrigger{kind: gcTriggerHeap}); t.test() { - gcStart(gcBackgroundMode, t) + gcStart(t) } } @@ -1076,10 +1104,11 @@ func newarray(typ *_type, n int) unsafe.Pointer { if n == 1 { return mallocgc(typ.size, typ, true) } - if n < 0 || uintptr(n) > maxSliceCap(typ.size) { + mem, overflow := math.MulUintptr(typ.size, uintptr(n)) + if overflow || mem > maxAlloc || n < 0 { panic(plainError("runtime: allocation size out of range")) } - return mallocgc(typ.size*uintptr(n), typ, true) + return mallocgc(mem, typ, true) } //go:linkname reflect_unsafe_NewArray reflect.unsafe_NewArray @@ -1164,6 +1193,15 @@ var globalAlloc struct { persistentAlloc } +// persistentChunkSize is the number of bytes we allocate when we grow +// a persistentAlloc. +const persistentChunkSize = 256 << 10 + +// persistentChunks is a list of all the persistent chunks we have +// allocated. The list is maintained through the first word in the +// persistent chunk. This is updated atomically. +var persistentChunks *notInHeap + // Wrapper around sysAlloc that can allocate small chunks. // There is no associated free operation. // Intended for things like function/type/debug-related persistent data. @@ -1184,7 +1222,6 @@ func persistentalloc(size, align uintptr, sysStat *uint64) unsafe.Pointer { //go:systemstack func persistentalloc1(size, align uintptr, sysStat *uint64) *notInHeap { const ( - chunk = 256 << 10 maxBlock = 64 << 10 // VM reservation granularity is 64K on windows ) @@ -1215,15 +1252,24 @@ func persistentalloc1(size, align uintptr, sysStat *uint64) *notInHeap { persistent = &globalAlloc.persistentAlloc } persistent.off = round(persistent.off, align) - if persistent.off+size > chunk || persistent.base == nil { - persistent.base = (*notInHeap)(sysAlloc(chunk, &memstats.other_sys)) + if persistent.off+size > persistentChunkSize || persistent.base == nil { + persistent.base = (*notInHeap)(sysAlloc(persistentChunkSize, &memstats.other_sys)) if persistent.base == nil { if persistent == &globalAlloc.persistentAlloc { unlock(&globalAlloc.mutex) } throw("runtime: cannot allocate memory") } - persistent.off = 0 + + // Add the new chunk to the persistentChunks list. + for { + chunks := uintptr(unsafe.Pointer(persistentChunks)) + *(*uintptr)(unsafe.Pointer(persistent.base)) = chunks + if atomic.Casuintptr((*uintptr)(unsafe.Pointer(&persistentChunks)), chunks, uintptr(unsafe.Pointer(persistent.base))) { + break + } + } + persistent.off = sys.PtrSize } p := persistent.base.add(persistent.off) persistent.off += size @@ -1239,6 +1285,21 @@ func persistentalloc1(size, align uintptr, sysStat *uint64) *notInHeap { return p } +// inPersistentAlloc reports whether p points to memory allocated by +// persistentalloc. This must be nosplit because it is called by the +// cgo checker code, which is called by the write barrier code. +//go:nosplit +func inPersistentAlloc(p uintptr) bool { + chunk := atomic.Loaduintptr((*uintptr)(unsafe.Pointer(&persistentChunks))) + for chunk != 0 { + if p >= chunk && p < chunk+persistentChunkSize { + return true + } + chunk = *(*uintptr)(unsafe.Pointer(chunk)) + } + return false +} + // linearAlloc is a simple linear allocator that pre-reserves a region // of memory and then maps that region as needed. The caller is // responsible for locking. diff --git a/libgo/go/runtime/malloc_test.go b/libgo/go/runtime/malloc_test.go index 30a7d844c63..c9282bae55e 100644 --- a/libgo/go/runtime/malloc_test.go +++ b/libgo/go/runtime/malloc_test.go @@ -170,6 +170,14 @@ func TestTinyAlloc(t *testing.T) { } } +func TestPhysicalMemoryUtilization(t *testing.T) { + got := runTestProg(t, "testprog", "GCPhys") + want := "OK\n" + if got != want { + t.Fatalf("expected %q, but got %q", want, got) + } +} + type acLink struct { x [1 << 20]byte } @@ -177,6 +185,14 @@ type acLink struct { var arenaCollisionSink []*acLink func TestArenaCollision(t *testing.T) { + if GOOS == "darwin" && race.Enabled { + // Skip this test on Darwin in race mode because Darwin 10.10 has + // issues following arena hints and runs out of them in race mode, so + // MAP_FIXED is used to ensure we keep the heap in the memory region the + // race detector expects. + // TODO(mknyszek): Delete this when Darwin 10.10 is no longer supported. + t.Skip("disabled on Darwin with race mode since MAP_FIXED is used") + } testenv.MustHaveExec(t) // Test that mheap.sysAlloc handles collisions with other diff --git a/libgo/go/runtime/map.go b/libgo/go/runtime/map.go index 52462c7e117..5dd5283e1ec 100644 --- a/libgo/go/runtime/map.go +++ b/libgo/go/runtime/map.go @@ -55,6 +55,7 @@ package runtime import ( "runtime/internal/atomic" + "runtime/internal/math" "runtime/internal/sys" "unsafe" ) @@ -103,11 +104,12 @@ const ( // Each bucket (including its overflow buckets, if any) will have either all or none of its // entries in the evacuated* states (except during the evacuate() method, which only happens // during map writes and thus no one else can observe the map during that time). - empty = 0 // cell is empty - evacuatedEmpty = 1 // cell is empty, bucket is evacuated. + emptyRest = 0 // this cell is empty, and there are no more non-empty cells at higher indexes or overflows. + emptyOne = 1 // this cell is empty evacuatedX = 2 // key/value is valid. Entry has been evacuated to first half of larger table. evacuatedY = 3 // same as above, but evacuated to second half of larger table. - minTopHash = 4 // minimum tophash for a normal filled cell. + evacuatedEmpty = 4 // cell is empty, bucket is evacuated. + minTopHash = 5 // minimum tophash for a normal filled cell. // flags iterator = 1 // there may be an iterator using buckets @@ -119,6 +121,11 @@ const ( noCheck = 1<<(8*sys.PtrSize) - 1 ) +// isEmpty reports whether the given tophash array entry represents an empty bucket entry. +func isEmpty(x uint8) bool { + return x <= emptyOne +} + // A header for a Go map. type hmap struct { // Note: the format of the hmap is also encoded in cmd/compile/internal/gc/reflect.go. @@ -211,7 +218,7 @@ func tophash(hash uintptr) uint8 { func evacuated(b *bmap) bool { h := b.tophash[0] - return h > empty && h < minTopHash + return h > emptyOne && h < minTopHash } func (b *bmap) overflow(t *maptype) *bmap { @@ -311,7 +318,8 @@ func makemap_small() *hmap { // If h != nil, the map can be created directly in h. // If h.buckets != nil, bucket pointed to can be used as the first bucket. func makemap(t *maptype, hint int, h *hmap) *hmap { - if hint < 0 || hint > int(maxSliceCap(t.bucket.size)) { + mem, overflow := math.MulUintptr(uintptr(hint), t.bucket.size) + if overflow || mem > maxAlloc { hint = 0 } @@ -321,7 +329,8 @@ func makemap(t *maptype, hint int, h *hmap) *hmap { } h.hash0 = fastrand() - // find size parameter which will hold the requested # of elements + // Find the size parameter B which will hold the requested # of elements. + // For hint < 0 overLoadFactor returns false since hint < bucketCnt. B := uint8(0) for overLoadFactor(hint, B) { B++ @@ -439,9 +448,13 @@ func mapaccess1(t *maptype, h *hmap, key unsafe.Pointer) unsafe.Pointer { } } top := tophash(hash) +bucketloop: for ; b != nil; b = b.overflow(t) { for i := uintptr(0); i < bucketCnt; i++ { if b.tophash[i] != top { + if b.tophash[i] == emptyRest { + break bucketloop + } continue } k := add(unsafe.Pointer(b), dataOffset+i*uintptr(t.keysize)) @@ -500,9 +513,13 @@ func mapaccess2(t *maptype, h *hmap, key unsafe.Pointer) (unsafe.Pointer, bool) } } top := tophash(hash) +bucketloop: for ; b != nil; b = b.overflow(t) { for i := uintptr(0); i < bucketCnt; i++ { if b.tophash[i] != top { + if b.tophash[i] == emptyRest { + break bucketloop + } continue } k := add(unsafe.Pointer(b), dataOffset+i*uintptr(t.keysize)) @@ -547,9 +564,13 @@ func mapaccessK(t *maptype, h *hmap, key unsafe.Pointer) (unsafe.Pointer, unsafe } } top := tophash(hash) +bucketloop: for ; b != nil; b = b.overflow(t) { for i := uintptr(0); i < bucketCnt; i++ { if b.tophash[i] != top { + if b.tophash[i] == emptyRest { + break bucketloop + } continue } k := add(unsafe.Pointer(b), dataOffset+i*uintptr(t.keysize)) @@ -612,7 +633,7 @@ func mapassign(t *maptype, h *hmap, key unsafe.Pointer) unsafe.Pointer { // Set hashWriting after calling alg.hash, since alg.hash may panic, // in which case we have not actually done a write. - h.flags |= hashWriting + h.flags ^= hashWriting if h.buckets == nil { h.buckets = newobject(t.bucket) // newarray(t.bucket, 1) @@ -629,14 +650,18 @@ again: var inserti *uint8 var insertk unsafe.Pointer var val unsafe.Pointer +bucketloop: for { for i := uintptr(0); i < bucketCnt; i++ { if b.tophash[i] != top { - if b.tophash[i] == empty && inserti == nil { + if isEmpty(b.tophash[i]) && inserti == nil { inserti = &b.tophash[i] insertk = add(unsafe.Pointer(b), dataOffset+i*uintptr(t.keysize)) val = add(unsafe.Pointer(b), dataOffset+bucketCnt*uintptr(t.keysize)+i*uintptr(t.valuesize)) } + if b.tophash[i] == emptyRest { + break bucketloop + } continue } k := add(unsafe.Pointer(b), dataOffset+i*uintptr(t.keysize)) @@ -728,18 +753,22 @@ func mapdelete(t *maptype, h *hmap, key unsafe.Pointer) { // Set hashWriting after calling alg.hash, since alg.hash may panic, // in which case we have not actually done a write (delete). - h.flags |= hashWriting + h.flags ^= hashWriting bucket := hash & bucketMask(h.B) if h.growing() { growWork(t, h, bucket) } b := (*bmap)(add(h.buckets, bucket*uintptr(t.bucketsize))) + bOrig := b top := tophash(hash) search: for ; b != nil; b = b.overflow(t) { for i := uintptr(0); i < bucketCnt; i++ { if b.tophash[i] != top { + if b.tophash[i] == emptyRest { + break search + } continue } k := add(unsafe.Pointer(b), dataOffset+i*uintptr(t.keysize)) @@ -764,7 +793,39 @@ search: } else { memclrNoHeapPointers(v, t.elem.size) } - b.tophash[i] = empty + b.tophash[i] = emptyOne + // If the bucket now ends in a bunch of emptyOne states, + // change those to emptyRest states. + // It would be nice to make this a separate function, but + // for loops are not currently inlineable. + if i == bucketCnt-1 { + if b.overflow(t) != nil && b.overflow(t).tophash[0] != emptyRest { + goto notLast + } + } else { + if b.tophash[i+1] != emptyRest { + goto notLast + } + } + for { + b.tophash[i] = emptyRest + if i == 0 { + if b == bOrig { + break // beginning of initial bucket, we're done. + } + // Find previous bucket, continue at its last entry. + c := b + for b = bOrig; b.overflow(t) != c; b = b.overflow(t) { + } + i = bucketCnt - 1 + } else { + i-- + } + if b.tophash[i] != emptyOne { + break + } + } + notLast: h.count-- break search } @@ -899,7 +960,9 @@ next: } for ; i < bucketCnt; i++ { offi := (i + it.offset) & (bucketCnt - 1) - if b.tophash[offi] == empty || b.tophash[offi] == evacuatedEmpty { + if isEmpty(b.tophash[offi]) || b.tophash[offi] == evacuatedEmpty { + // TODO: emptyRest is hard to use here, as we start iterating + // in the middle of a bucket. It's feasible, just tricky. continue } k := add(unsafe.Pointer(b), dataOffset+uintptr(offi)*uintptr(t.keysize)) @@ -990,7 +1053,7 @@ func mapclear(t *maptype, h *hmap) { throw("concurrent map writes") } - h.flags |= hashWriting + h.flags ^= hashWriting h.flags &^= sameSizeGrow h.oldbuckets = nil @@ -1158,7 +1221,7 @@ func evacuate(t *maptype, h *hmap, oldbucket uintptr) { v := add(k, bucketCnt*uintptr(t.keysize)) for i := 0; i < bucketCnt; i, k, v = i+1, add(k, uintptr(t.keysize)), add(v, uintptr(t.valuesize)) { top := b.tophash[i] - if top == empty { + if isEmpty(top) { b.tophash[i] = evacuatedEmpty continue } @@ -1195,7 +1258,7 @@ func evacuate(t *maptype, h *hmap, oldbucket uintptr) { } } - if evacuatedX+1 != evacuatedY { + if evacuatedX+1 != evacuatedY || evacuatedX^1 != evacuatedY { throw("bad evacuatedN") } @@ -1351,6 +1414,11 @@ func reflect_mapiterkey(it *hiter) unsafe.Pointer { return it.key } +//go:linkname reflect_mapitervalue reflect.mapitervalue +func reflect_mapitervalue(it *hiter) unsafe.Pointer { + return it.value +} + //go:linkname reflect_maplen reflect.maplen func reflect_maplen(h *hmap) int { if h == nil { diff --git a/libgo/go/runtime/map_benchmark_test.go b/libgo/go/runtime/map_benchmark_test.go index 025c0398d3b..d37dadcb569 100644 --- a/libgo/go/runtime/map_benchmark_test.go +++ b/libgo/go/runtime/map_benchmark_test.go @@ -5,6 +5,7 @@ package runtime_test import ( "fmt" + "math/rand" "strconv" "strings" "testing" @@ -206,6 +207,67 @@ func BenchmarkIntMap(b *testing.B) { } } +func BenchmarkMapFirst(b *testing.B) { + for n := 1; n <= 16; n++ { + b.Run(fmt.Sprintf("%d", n), func(b *testing.B) { + m := make(map[int]bool) + for i := 0; i < n; i++ { + m[i] = true + } + b.ResetTimer() + for i := 0; i < b.N; i++ { + _ = m[0] + } + }) + } +} +func BenchmarkMapMid(b *testing.B) { + for n := 1; n <= 16; n++ { + b.Run(fmt.Sprintf("%d", n), func(b *testing.B) { + m := make(map[int]bool) + for i := 0; i < n; i++ { + m[i] = true + } + b.ResetTimer() + for i := 0; i < b.N; i++ { + _ = m[n>>1] + } + }) + } +} +func BenchmarkMapLast(b *testing.B) { + for n := 1; n <= 16; n++ { + b.Run(fmt.Sprintf("%d", n), func(b *testing.B) { + m := make(map[int]bool) + for i := 0; i < n; i++ { + m[i] = true + } + b.ResetTimer() + for i := 0; i < b.N; i++ { + _ = m[n-1] + } + }) + } +} + +func BenchmarkMapCycle(b *testing.B) { + // Arrange map entries to be a permuation, so that + // we hit all entries, and one lookup is data dependent + // on the previous lookup. + const N = 3127 + p := rand.New(rand.NewSource(1)).Perm(N) + m := map[int]int{} + for i := 0; i < N; i++ { + m[i] = p[i] + } + b.ResetTimer() + j := 0 + for i := 0; i < b.N; i++ { + j = m[j] + } + sink = uint64(j) +} + // Accessing the same keys in a row. func benchmarkRepeatedLookup(b *testing.B, lookupKeySize int) { m := make(map[string]bool) @@ -228,6 +290,23 @@ func benchmarkRepeatedLookup(b *testing.B, lookupKeySize int) { func BenchmarkRepeatedLookupStrMapKey32(b *testing.B) { benchmarkRepeatedLookup(b, 32) } func BenchmarkRepeatedLookupStrMapKey1M(b *testing.B) { benchmarkRepeatedLookup(b, 1<<20) } +func BenchmarkMakeMap(b *testing.B) { + b.Run("[Byte]Byte", func(b *testing.B) { + var m map[byte]byte + for i := 0; i < b.N; i++ { + m = make(map[byte]byte, 10) + } + hugeSink = m + }) + b.Run("[Int]Int", func(b *testing.B) { + var m map[int]int + for i := 0; i < b.N; i++ { + m = make(map[int]int, 10) + } + hugeSink = m + }) +} + func BenchmarkNewEmptyMap(b *testing.B) { b.ReportAllocs() for i := 0; i < b.N; i++ { @@ -370,3 +449,37 @@ func BenchmarkGoMapClear(b *testing.B) { } }) } + +func BenchmarkMapStringConversion(b *testing.B) { + for _, length := range []int{32, 64} { + b.Run(strconv.Itoa(length), func(b *testing.B) { + bytes := make([]byte, length) + b.Run("simple", func(b *testing.B) { + b.ReportAllocs() + m := make(map[string]int) + m[string(bytes)] = 0 + for i := 0; i < b.N; i++ { + _ = m[string(bytes)] + } + }) + b.Run("struct", func(b *testing.B) { + b.ReportAllocs() + type stringstruct struct{ s string } + m := make(map[stringstruct]int) + m[stringstruct{string(bytes)}] = 0 + for i := 0; i < b.N; i++ { + _ = m[stringstruct{string(bytes)}] + } + }) + b.Run("array", func(b *testing.B) { + b.ReportAllocs() + type stringarray [1]string + m := make(map[stringarray]int) + m[stringarray{string(bytes)}] = 0 + for i := 0; i < b.N; i++ { + _ = m[stringarray{string(bytes)}] + } + }) + }) + } +} diff --git a/libgo/go/runtime/map_fast32.go b/libgo/go/runtime/map_fast32.go index a9a06a85c26..1fa5cd968e4 100644 --- a/libgo/go/runtime/map_fast32.go +++ b/libgo/go/runtime/map_fast32.go @@ -41,7 +41,7 @@ func mapaccess1_fast32(t *maptype, h *hmap, key uint32) unsafe.Pointer { } for ; b != nil; b = b.overflow(t) { for i, k := uintptr(0), b.keys(); i < bucketCnt; i, k = i+1, add(k, 4) { - if *(*uint32)(k) == key && b.tophash[i] != empty { + if *(*uint32)(k) == key && !isEmpty(b.tophash[i]) { return add(unsafe.Pointer(b), dataOffset+bucketCnt*4+i*uintptr(t.valuesize)) } } @@ -81,7 +81,7 @@ func mapaccess2_fast32(t *maptype, h *hmap, key uint32) (unsafe.Pointer, bool) { } for ; b != nil; b = b.overflow(t) { for i, k := uintptr(0), b.keys(); i < bucketCnt; i, k = i+1, add(k, 4) { - if *(*uint32)(k) == key && b.tophash[i] != empty { + if *(*uint32)(k) == key && !isEmpty(b.tophash[i]) { return add(unsafe.Pointer(b), dataOffset+bucketCnt*4+i*uintptr(t.valuesize)), true } } @@ -103,7 +103,7 @@ func mapassign_fast32(t *maptype, h *hmap, key uint32) unsafe.Pointer { hash := t.key.hashfn(noescape(unsafe.Pointer(&key)), uintptr(h.hash0)) // Set hashWriting after calling alg.hash for consistency with mapassign. - h.flags |= hashWriting + h.flags ^= hashWriting if h.buckets == nil { h.buckets = newobject(t.bucket) // newarray(t.bucket, 1) @@ -120,13 +120,17 @@ again: var inserti uintptr var insertk unsafe.Pointer +bucketloop: for { for i := uintptr(0); i < bucketCnt; i++ { - if b.tophash[i] == empty { + if isEmpty(b.tophash[i]) { if insertb == nil { inserti = i insertb = b } + if b.tophash[i] == emptyRest { + break bucketloop + } continue } k := *((*uint32)(add(unsafe.Pointer(b), dataOffset+i*4))) @@ -189,7 +193,7 @@ func mapassign_fast32ptr(t *maptype, h *hmap, key unsafe.Pointer) unsafe.Pointer hash := t.key.hashfn(noescape(unsafe.Pointer(&key)), uintptr(h.hash0)) // Set hashWriting after calling alg.hash for consistency with mapassign. - h.flags |= hashWriting + h.flags ^= hashWriting if h.buckets == nil { h.buckets = newobject(t.bucket) // newarray(t.bucket, 1) @@ -206,13 +210,17 @@ again: var inserti uintptr var insertk unsafe.Pointer +bucketloop: for { for i := uintptr(0); i < bucketCnt; i++ { - if b.tophash[i] == empty { + if isEmpty(b.tophash[i]) { if insertb == nil { inserti = i insertb = b } + if b.tophash[i] == emptyRest { + break bucketloop + } continue } k := *((*unsafe.Pointer)(add(unsafe.Pointer(b), dataOffset+i*4))) @@ -276,17 +284,18 @@ func mapdelete_fast32(t *maptype, h *hmap, key uint32) { hash := t.key.hashfn(noescape(unsafe.Pointer(&key)), uintptr(h.hash0)) // Set hashWriting after calling alg.hash for consistency with mapdelete - h.flags |= hashWriting + h.flags ^= hashWriting bucket := hash & bucketMask(h.B) if h.growing() { growWork_fast32(t, h, bucket) } b := (*bmap)(add(h.buckets, bucket*uintptr(t.bucketsize))) + bOrig := b search: for ; b != nil; b = b.overflow(t) { for i, k := uintptr(0), b.keys(); i < bucketCnt; i, k = i+1, add(k, 4) { - if key != *(*uint32)(k) || b.tophash[i] == empty { + if key != *(*uint32)(k) || isEmpty(b.tophash[i]) { continue } // Only clear key if there are pointers in it. @@ -299,7 +308,37 @@ search: } else { memclrNoHeapPointers(v, t.elem.size) } - b.tophash[i] = empty + b.tophash[i] = emptyOne + // If the bucket now ends in a bunch of emptyOne states, + // change those to emptyRest states. + if i == bucketCnt-1 { + if b.overflow(t) != nil && b.overflow(t).tophash[0] != emptyRest { + goto notLast + } + } else { + if b.tophash[i+1] != emptyRest { + goto notLast + } + } + for { + b.tophash[i] = emptyRest + if i == 0 { + if b == bOrig { + break // beginning of initial bucket, we're done. + } + // Find previous bucket, continue at its last entry. + c := b + for b = bOrig; b.overflow(t) != c; b = b.overflow(t) { + } + i = bucketCnt - 1 + } else { + i-- + } + if b.tophash[i] != emptyOne { + break + } + } + notLast: h.count-- break search } @@ -350,7 +389,7 @@ func evacuate_fast32(t *maptype, h *hmap, oldbucket uintptr) { v := add(k, bucketCnt*4) for i := 0; i < bucketCnt; i, k, v = i+1, add(k, 4), add(v, uintptr(t.valuesize)) { top := b.tophash[i] - if top == empty { + if isEmpty(top) { b.tophash[i] = evacuatedEmpty continue } diff --git a/libgo/go/runtime/map_fast64.go b/libgo/go/runtime/map_fast64.go index a2a51fcac6a..d23ac23eb14 100644 --- a/libgo/go/runtime/map_fast64.go +++ b/libgo/go/runtime/map_fast64.go @@ -41,7 +41,7 @@ func mapaccess1_fast64(t *maptype, h *hmap, key uint64) unsafe.Pointer { } for ; b != nil; b = b.overflow(t) { for i, k := uintptr(0), b.keys(); i < bucketCnt; i, k = i+1, add(k, 8) { - if *(*uint64)(k) == key && b.tophash[i] != empty { + if *(*uint64)(k) == key && !isEmpty(b.tophash[i]) { return add(unsafe.Pointer(b), dataOffset+bucketCnt*8+i*uintptr(t.valuesize)) } } @@ -81,7 +81,7 @@ func mapaccess2_fast64(t *maptype, h *hmap, key uint64) (unsafe.Pointer, bool) { } for ; b != nil; b = b.overflow(t) { for i, k := uintptr(0), b.keys(); i < bucketCnt; i, k = i+1, add(k, 8) { - if *(*uint64)(k) == key && b.tophash[i] != empty { + if *(*uint64)(k) == key && !isEmpty(b.tophash[i]) { return add(unsafe.Pointer(b), dataOffset+bucketCnt*8+i*uintptr(t.valuesize)), true } } @@ -103,7 +103,7 @@ func mapassign_fast64(t *maptype, h *hmap, key uint64) unsafe.Pointer { hash := t.key.hashfn(noescape(unsafe.Pointer(&key)), uintptr(h.hash0)) // Set hashWriting after calling alg.hash for consistency with mapassign. - h.flags |= hashWriting + h.flags ^= hashWriting if h.buckets == nil { h.buckets = newobject(t.bucket) // newarray(t.bucket, 1) @@ -120,13 +120,17 @@ again: var inserti uintptr var insertk unsafe.Pointer +bucketloop: for { for i := uintptr(0); i < bucketCnt; i++ { - if b.tophash[i] == empty { + if isEmpty(b.tophash[i]) { if insertb == nil { insertb = b inserti = i } + if b.tophash[i] == emptyRest { + break bucketloop + } continue } k := *((*uint64)(add(unsafe.Pointer(b), dataOffset+i*8))) @@ -189,7 +193,7 @@ func mapassign_fast64ptr(t *maptype, h *hmap, key unsafe.Pointer) unsafe.Pointer hash := t.key.hashfn(noescape(unsafe.Pointer(&key)), uintptr(h.hash0)) // Set hashWriting after calling alg.hash for consistency with mapassign. - h.flags |= hashWriting + h.flags ^= hashWriting if h.buckets == nil { h.buckets = newobject(t.bucket) // newarray(t.bucket, 1) @@ -206,13 +210,17 @@ again: var inserti uintptr var insertk unsafe.Pointer +bucketloop: for { for i := uintptr(0); i < bucketCnt; i++ { - if b.tophash[i] == empty { + if isEmpty(b.tophash[i]) { if insertb == nil { insertb = b inserti = i } + if b.tophash[i] == emptyRest { + break bucketloop + } continue } k := *((*unsafe.Pointer)(add(unsafe.Pointer(b), dataOffset+i*8))) @@ -276,17 +284,18 @@ func mapdelete_fast64(t *maptype, h *hmap, key uint64) { hash := t.key.hashfn(noescape(unsafe.Pointer(&key)), uintptr(h.hash0)) // Set hashWriting after calling alg.hash for consistency with mapdelete - h.flags |= hashWriting + h.flags ^= hashWriting bucket := hash & bucketMask(h.B) if h.growing() { growWork_fast64(t, h, bucket) } b := (*bmap)(add(h.buckets, bucket*uintptr(t.bucketsize))) + bOrig := b search: for ; b != nil; b = b.overflow(t) { for i, k := uintptr(0), b.keys(); i < bucketCnt; i, k = i+1, add(k, 8) { - if key != *(*uint64)(k) || b.tophash[i] == empty { + if key != *(*uint64)(k) || isEmpty(b.tophash[i]) { continue } // Only clear key if there are pointers in it. @@ -299,7 +308,37 @@ search: } else { memclrNoHeapPointers(v, t.elem.size) } - b.tophash[i] = empty + b.tophash[i] = emptyOne + // If the bucket now ends in a bunch of emptyOne states, + // change those to emptyRest states. + if i == bucketCnt-1 { + if b.overflow(t) != nil && b.overflow(t).tophash[0] != emptyRest { + goto notLast + } + } else { + if b.tophash[i+1] != emptyRest { + goto notLast + } + } + for { + b.tophash[i] = emptyRest + if i == 0 { + if b == bOrig { + break // beginning of initial bucket, we're done. + } + // Find previous bucket, continue at its last entry. + c := b + for b = bOrig; b.overflow(t) != c; b = b.overflow(t) { + } + i = bucketCnt - 1 + } else { + i-- + } + if b.tophash[i] != emptyOne { + break + } + } + notLast: h.count-- break search } @@ -350,7 +389,7 @@ func evacuate_fast64(t *maptype, h *hmap, oldbucket uintptr) { v := add(k, bucketCnt*8) for i := 0; i < bucketCnt; i, k, v = i+1, add(k, 8), add(v, uintptr(t.valuesize)) { top := b.tophash[i] - if top == empty { + if isEmpty(top) { b.tophash[i] = evacuatedEmpty continue } diff --git a/libgo/go/runtime/map_faststr.go b/libgo/go/runtime/map_faststr.go index 5812b3f1049..eced15a6d5d 100644 --- a/libgo/go/runtime/map_faststr.go +++ b/libgo/go/runtime/map_faststr.go @@ -28,7 +28,10 @@ func mapaccess1_faststr(t *maptype, h *hmap, ky string) unsafe.Pointer { // short key, doing lots of comparisons is ok for i, kptr := uintptr(0), b.keys(); i < bucketCnt; i, kptr = i+1, add(kptr, 2*sys.PtrSize) { k := (*stringStruct)(kptr) - if k.len != key.len || b.tophash[i] == empty { + if k.len != key.len || isEmpty(b.tophash[i]) { + if b.tophash[i] == emptyRest { + break + } continue } if k.str == key.str || memequal(k.str, key.str, uintptr(key.len)) { @@ -41,7 +44,10 @@ func mapaccess1_faststr(t *maptype, h *hmap, ky string) unsafe.Pointer { keymaybe := uintptr(bucketCnt) for i, kptr := uintptr(0), b.keys(); i < bucketCnt; i, kptr = i+1, add(kptr, 2*sys.PtrSize) { k := (*stringStruct)(kptr) - if k.len != key.len || b.tophash[i] == empty { + if k.len != key.len || isEmpty(b.tophash[i]) { + if b.tophash[i] == emptyRest { + break + } continue } if k.str == key.str { @@ -117,7 +123,10 @@ func mapaccess2_faststr(t *maptype, h *hmap, ky string) (unsafe.Pointer, bool) { // short key, doing lots of comparisons is ok for i, kptr := uintptr(0), b.keys(); i < bucketCnt; i, kptr = i+1, add(kptr, 2*sys.PtrSize) { k := (*stringStruct)(kptr) - if k.len != key.len || b.tophash[i] == empty { + if k.len != key.len || isEmpty(b.tophash[i]) { + if b.tophash[i] == emptyRest { + break + } continue } if k.str == key.str || memequal(k.str, key.str, uintptr(key.len)) { @@ -130,7 +139,10 @@ func mapaccess2_faststr(t *maptype, h *hmap, ky string) (unsafe.Pointer, bool) { keymaybe := uintptr(bucketCnt) for i, kptr := uintptr(0), b.keys(); i < bucketCnt; i, kptr = i+1, add(kptr, 2*sys.PtrSize) { k := (*stringStruct)(kptr) - if k.len != key.len || b.tophash[i] == empty { + if k.len != key.len || isEmpty(b.tophash[i]) { + if b.tophash[i] == emptyRest { + break + } continue } if k.str == key.str { @@ -202,7 +214,7 @@ func mapassign_faststr(t *maptype, h *hmap, s string) unsafe.Pointer { hash := t.key.hashfn(noescape(unsafe.Pointer(&s)), uintptr(h.hash0)) // Set hashWriting after calling alg.hash for consistency with mapassign. - h.flags |= hashWriting + h.flags ^= hashWriting if h.buckets == nil { h.buckets = newobject(t.bucket) // newarray(t.bucket, 1) @@ -220,13 +232,17 @@ again: var inserti uintptr var insertk unsafe.Pointer +bucketloop: for { for i := uintptr(0); i < bucketCnt; i++ { if b.tophash[i] != top { - if b.tophash[i] == empty && insertb == nil { + if isEmpty(b.tophash[i]) && insertb == nil { insertb = b inserti = i } + if b.tophash[i] == emptyRest { + break bucketloop + } continue } k := (*stringStruct)(add(unsafe.Pointer(b), dataOffset+i*2*sys.PtrSize)) @@ -294,13 +310,14 @@ func mapdelete_faststr(t *maptype, h *hmap, ky string) { hash := t.key.hashfn(noescape(unsafe.Pointer(&ky)), uintptr(h.hash0)) // Set hashWriting after calling alg.hash for consistency with mapdelete - h.flags |= hashWriting + h.flags ^= hashWriting bucket := hash & bucketMask(h.B) if h.growing() { growWork_faststr(t, h, bucket) } b := (*bmap)(add(h.buckets, bucket*uintptr(t.bucketsize))) + bOrig := b top := tophash(hash) search: for ; b != nil; b = b.overflow(t) { @@ -320,7 +337,37 @@ search: } else { memclrNoHeapPointers(v, t.elem.size) } - b.tophash[i] = empty + b.tophash[i] = emptyOne + // If the bucket now ends in a bunch of emptyOne states, + // change those to emptyRest states. + if i == bucketCnt-1 { + if b.overflow(t) != nil && b.overflow(t).tophash[0] != emptyRest { + goto notLast + } + } else { + if b.tophash[i+1] != emptyRest { + goto notLast + } + } + for { + b.tophash[i] = emptyRest + if i == 0 { + if b == bOrig { + break // beginning of initial bucket, we're done. + } + // Find previous bucket, continue at its last entry. + c := b + for b = bOrig; b.overflow(t) != c; b = b.overflow(t) { + } + i = bucketCnt - 1 + } else { + i-- + } + if b.tophash[i] != emptyOne { + break + } + } + notLast: h.count-- break search } @@ -371,7 +418,7 @@ func evacuate_faststr(t *maptype, h *hmap, oldbucket uintptr) { v := add(k, bucketCnt*2*sys.PtrSize) for i := 0; i < bucketCnt; i, k, v = i+1, add(k, 2*sys.PtrSize), add(v, uintptr(t.valuesize)) { top := b.tophash[i] - if top == empty { + if isEmpty(top) { b.tophash[i] = evacuatedEmpty continue } diff --git a/libgo/go/runtime/map_test.go b/libgo/go/runtime/map_test.go index 13f1d2ea62a..bc5f738c4fa 100644 --- a/libgo/go/runtime/map_test.go +++ b/libgo/go/runtime/map_test.go @@ -1147,3 +1147,28 @@ func TestIncrementAfterBulkClearKeyStringValueInt(t *testing.T) { t.Errorf("incremented 0 to %d", n2) } } + +func TestMapTombstones(t *testing.T) { + m := map[int]int{} + const N = 10000 + // Fill a map. + for i := 0; i < N; i++ { + m[i] = i + } + runtime.MapTombstoneCheck(m) + // Delete half of the entries. + for i := 0; i < N; i += 2 { + delete(m, i) + } + runtime.MapTombstoneCheck(m) + // Add new entries to fill in holes. + for i := N; i < 3*N/2; i++ { + m[i] = i + } + runtime.MapTombstoneCheck(m) + // Delete everything. + for i := 0; i < 3*N/2; i++ { + delete(m, i) + } + runtime.MapTombstoneCheck(m) +} diff --git a/libgo/go/runtime/mbarrier.go b/libgo/go/runtime/mbarrier.go index 48713157601..d3ffd3c518d 100644 --- a/libgo/go/runtime/mbarrier.go +++ b/libgo/go/runtime/mbarrier.go @@ -215,8 +215,6 @@ func reflect_typedmemmovepartial(typ *_type, dst, src unsafe.Pointer, off, size //go:nosplit func typedslicecopy(typ *_type, dst, src slice) int { - // TODO(rsc): If typedslicecopy becomes faster than calling - // typedmemmove repeatedly, consider using during func growslice. n := dst.len if n > src.len { n = src.len diff --git a/libgo/go/runtime/mbitmap.go b/libgo/go/runtime/mbitmap.go index 42c2015ee44..9c25a215f35 100644 --- a/libgo/go/runtime/mbitmap.go +++ b/libgo/go/runtime/mbitmap.go @@ -242,7 +242,7 @@ func (s *mspan) nextFreeIndex() uintptr { return result } -// isFree returns whether the index'th object in s is unallocated. +// isFree reports whether the index'th object in s is unallocated. func (s *mspan) isFree(index uintptr) bool { if index < s.freeindex { return false @@ -283,9 +283,7 @@ func (m markBits) isMarked() bool { return *m.bytep&m.mask != 0 } -// setMarked sets the marked bit in the markbits, atomically. Some compilers -// are not able to inline atomic.Or8 function so if it appears as a hot spot consider -// inlining it manually. +// setMarked sets the marked bit in the markbits, atomically. func (m markBits) setMarked() { // Might be racing with other updates, so use atomic update always. // We used to be clever here and use a non-atomic update in certain @@ -368,7 +366,7 @@ func findObject(p, refBase, refOff uintptr, forStack bool) (base uintptr, s *msp s = spanOf(p) // If p is a bad pointer, it may not be in s's bounds. if s == nil || p < s.base() || p >= s.limit || s.state != mSpanInUse { - if s == nil || s.state == _MSpanManual || forStack { + if s == nil || s.state == mSpanManual || forStack { // If s is nil, the virtual address has never been part of the heap. // This pointer may be to some mmap'd region, so we allow it. // Pointers into stacks are also ok, the runtime manages these explicitly. @@ -533,7 +531,7 @@ func (h heapBits) bits() uint32 { return uint32(*h.bitp) >> (h.shift & 31) } -// morePointers returns true if this word and all remaining words in this object +// morePointers reports whether this word and all remaining words in this object // are scalars. // h must not describe the second word of the object. func (h heapBits) morePointers() bool { @@ -631,7 +629,7 @@ func bulkBarrierPreWrite(dst, src, size uintptr) { } } return - } else if s.state != _MSpanInUse || dst < s.base() || s.limit <= dst { + } else if s.state != mSpanInUse || dst < s.base() || s.limit <= dst { // dst was heap memory at some point, but isn't now. // It can't be a global. It must be either our stack, // or in the case of direct channel sends, it could be @@ -667,6 +665,35 @@ func bulkBarrierPreWrite(dst, src, size uintptr) { } } +// bulkBarrierPreWriteSrcOnly is like bulkBarrierPreWrite but +// does not execute write barriers for [dst, dst+size). +// +// In addition to the requirements of bulkBarrierPreWrite +// callers need to ensure [dst, dst+size) is zeroed. +// +// This is used for special cases where e.g. dst was just +// created and zeroed with malloc. +//go:nosplit +func bulkBarrierPreWriteSrcOnly(dst, src, size uintptr) { + if (dst|src|size)&(sys.PtrSize-1) != 0 { + throw("bulkBarrierPreWrite: unaligned arguments") + } + if !writeBarrier.needed { + return + } + buf := &getg().m.p.ptr().wbBuf + h := heapBitsForAddr(dst) + for i := uintptr(0); i < size; i += sys.PtrSize { + if h.isPointer() { + srcx := (*uintptr)(unsafe.Pointer(src + i)) + if !buf.putFast(0, *srcx) { + wbBufFlush(nil, 0) + } + } + h = h.next() + } +} + // bulkBarrierBitmap executes write barriers for copying from [src, // src+size) to [dst, dst+size) using a 1-bit pointer bitmap. src is // assumed to start maskOffset bytes into the data covered by the @@ -1902,6 +1929,20 @@ Run: return totalBits } +// materializeGCProg allocates space for the (1-bit) pointer bitmask +// for an object of size ptrdata. Then it fills that space with the +// pointer bitmask specified by the program prog. +// The bitmask starts at s.startAddr. +// The result must be deallocated with dematerializeGCProg. +func materializeGCProg(ptrdata uintptr, prog *byte) *mspan { + s := mheap_.allocManual((ptrdata/(8*sys.PtrSize)+pageSize-1)/pageSize, &memstats.gc_sys) + runGCProg(addb(prog, 4), nil, (*byte)(unsafe.Pointer(s.startAddr)), 1) + return s +} +func dematerializeGCProg(s *mspan) { + mheap_.freeManual(s, &memstats.gc_sys) +} + func dumpGCProg(p *byte) { nptr := 0 for { @@ -1962,7 +2003,9 @@ func reflect_gcbits(x interface{}) []byte { return ret } -// Returns GC type info for object p for testing. +// Returns GC type info for the pointer stored in ep for testing. +// If ep points to the stack, only static live information will be returned +// (i.e. not for objects which are only dynamically live stack objects). func getgcmask(ep interface{}) (mask []byte) { e := *efaceOf(&ep) p := e.data diff --git a/libgo/go/runtime/mcache.go b/libgo/go/runtime/mcache.go index 3dacf9692ec..2045158636a 100644 --- a/libgo/go/runtime/mcache.go +++ b/libgo/go/runtime/mcache.go @@ -4,7 +4,10 @@ package runtime -import "unsafe" +import ( + "runtime/internal/atomic" + "unsafe" +) // Per-thread (in Go, per-P) cache for small objects. // No locking needed because it is per-thread (per-P). @@ -40,6 +43,12 @@ type mcache struct { local_largefree uintptr // bytes freed for large objects (>maxsmallsize) local_nlargefree uintptr // number of frees for large objects (>maxsmallsize) local_nsmallfree [_NumSizeClasses]uintptr // number of frees for small objects (<=maxsmallsize) + + // flushGen indicates the sweepgen during which this mcache + // was last flushed. If flushGen != mheap_.sweepgen, the spans + // in this mcache are stale and need to the flushed so they + // can be swept. This is done in acquirep. + flushGen uint32 } // A gclink is a node in a linked list of blocks, like mlink, @@ -63,12 +72,13 @@ func (p gclinkptr) ptr() *gclink { return (*gclink)(unsafe.Pointer(p)) } -// dummy MSpan that contains no free objects. +// dummy mspan that contains no free objects. var emptymspan mspan func allocmcache() *mcache { lock(&mheap_.lock) c := (*mcache)(mheap_.cachealloc.alloc()) + c.flushGen = mheap_.sweepgen unlock(&mheap_.lock) for i := range c.alloc { c.alloc[i] = &emptymspan @@ -93,21 +103,24 @@ func freemcache(c *mcache) { }) } -// Gets a span that has a free object in it and assigns it -// to be the cached span for the given sizeclass. Returns this span. +// refill acquires a new span of span class spc for c. This span will +// have at least one free object. The current span in c must be full. +// +// Must run in a non-preemptible context since otherwise the owner of +// c could change. func (c *mcache) refill(spc spanClass) { - _g_ := getg() - - _g_.m.locks++ // Return the current cached span to the central lists. s := c.alloc[spc] if uintptr(s.allocCount) != s.nelems { throw("refill of span with free space remaining") } - if s != &emptymspan { - s.incache = false + // Mark this span as no longer cached. + if s.sweepgen != mheap_.sweepgen+3 { + throw("bad sweepgen in refill") + } + atomic.Store(&s.sweepgen, mheap_.sweepgen) } // Get a new cached span from the central lists. @@ -120,8 +133,11 @@ func (c *mcache) refill(spc spanClass) { throw("span has no free space") } + // Indicate that this span is cached and prevent asynchronous + // sweeping in the next sweep phase. + s.sweepgen = mheap_.sweepgen + 3 + c.alloc[spc] = s - _g_.m.locks-- } func (c *mcache) releaseAll() { @@ -136,3 +152,25 @@ func (c *mcache) releaseAll() { c.tiny = 0 c.tinyoffset = 0 } + +// prepareForSweep flushes c if the system has entered a new sweep phase +// since c was populated. This must happen between the sweep phase +// starting and the first allocation from c. +func (c *mcache) prepareForSweep() { + // Alternatively, instead of making sure we do this on every P + // between starting the world and allocating on that P, we + // could leave allocate-black on, allow allocation to continue + // as usual, use a ragged barrier at the beginning of sweep to + // ensure all cached spans are swept, and then disable + // allocate-black. However, with this approach it's difficult + // to avoid spilling mark bits into the *next* GC cycle. + sg := mheap_.sweepgen + if c.flushGen == sg { + return + } else if c.flushGen != sg-2 { + println("bad flushGen", c.flushGen, "in prepareForSweep; sweepgen", sg) + throw("bad flushGen") + } + c.releaseAll() + atomic.Store(&c.flushGen, mheap_.sweepgen) // Synchronizes with gcStart +} diff --git a/libgo/go/runtime/mcentral.go b/libgo/go/runtime/mcentral.go index 50a4791e8e6..0196ba44c5d 100644 --- a/libgo/go/runtime/mcentral.go +++ b/libgo/go/runtime/mcentral.go @@ -6,8 +6,8 @@ // // See malloc.go for an overview. // -// The MCentral doesn't actually contain the list of free objects; the MSpan does. -// Each MCentral is two lists of MSpans: those with free objects (c->nonempty) +// The mcentral doesn't actually contain the list of free objects; the mspan does. +// Each mcentral is two lists of mspans: those with free objects (c->nonempty) // and those that are completely allocated (c->empty). package runtime @@ -36,7 +36,7 @@ func (c *mcentral) init(spc spanClass) { c.empty.init() } -// Allocate a span to use in an MCache. +// Allocate a span to use in an mcache. func (c *mcentral) cacheSpan() *mspan { // Deduct credit for this span allocation and sweep if necessary. spanBytes := uintptr(class_to_allocnpages[c.spanclass.sizeclass()]) * _PageSize @@ -126,8 +126,7 @@ havespan: if trace.enabled && !traceDone { traceGCSweepDone() } - cap := int32((s.npages << _PageShift) / s.elemsize) - n := cap - int32(s.allocCount) + n := int(s.nelems) - int(s.allocCount) if n == 0 || s.freeindex == s.nelems || uintptr(s.allocCount) == s.nelems { throw("span has no free objects") } @@ -144,7 +143,6 @@ havespan: // heap_live changed. gcController.revise() } - s.incache = true freeByteBase := s.freeindex &^ (64 - 1) whichByte := freeByteBase / 8 // Init alloc bits cache. @@ -157,30 +155,56 @@ havespan: return s } -// Return span from an MCache. +// Return span from an mcache. func (c *mcentral) uncacheSpan(s *mspan) { - lock(&c.lock) - - s.incache = false - if s.allocCount == 0 { throw("uncaching span but s.allocCount == 0") } - cap := int32((s.npages << _PageShift) / s.elemsize) - n := cap - int32(s.allocCount) + sg := mheap_.sweepgen + stale := s.sweepgen == sg+1 + if stale { + // Span was cached before sweep began. It's our + // responsibility to sweep it. + // + // Set sweepgen to indicate it's not cached but needs + // sweeping and can't be allocated from. sweep will + // set s.sweepgen to indicate s is swept. + atomic.Store(&s.sweepgen, sg-1) + } else { + // Indicate that s is no longer cached. + atomic.Store(&s.sweepgen, sg) + } + + n := int(s.nelems) - int(s.allocCount) if n > 0 { - c.empty.remove(s) - c.nonempty.insert(s) - // mCentral_CacheSpan conservatively counted - // unallocated slots in heap_live. Undo this. - atomic.Xadd64(&memstats.heap_live, -int64(n)*int64(s.elemsize)) // cacheSpan updated alloc assuming all objects on s // were going to be allocated. Adjust for any that - // weren't. + // weren't. We must do this before potentially + // sweeping the span. atomic.Xadd64(&c.nmalloc, -int64(n)) + + lock(&c.lock) + c.empty.remove(s) + c.nonempty.insert(s) + if !stale { + // mCentral_CacheSpan conservatively counted + // unallocated slots in heap_live. Undo this. + // + // If this span was cached before sweep, then + // heap_live was totally recomputed since + // caching this span, so we don't do this for + // stale spans. + atomic.Xadd64(&memstats.heap_live, -int64(n)*int64(s.elemsize)) + } + unlock(&c.lock) + } + + if stale { + // Now that s is in the right mcentral list, we can + // sweep it. + s.sweep(false) } - unlock(&c.lock) } // freeSpan updates c and s after sweeping s. @@ -188,17 +212,17 @@ func (c *mcentral) uncacheSpan(s *mspan) { // and, based on the number of free objects in s, // moves s to the appropriate list of c or returns it // to the heap. -// freeSpan returns true if s was returned to the heap. +// freeSpan reports whether s was returned to the heap. // If preserve=true, it does not move s (the caller // must take care of it). func (c *mcentral) freeSpan(s *mspan, preserve bool, wasempty bool) bool { - if s.incache { + if sg := mheap_.sweepgen; s.sweepgen == sg+1 || s.sweepgen == sg+3 { throw("freeSpan given cached span") } s.needzero = 1 if preserve { - // preserve is set only when called from MCentral_CacheSpan above, + // preserve is set only when called from (un)cacheSpan above, // the span must be in the empty list. if !s.inList() { throw("can't preserve unlinked span") @@ -216,7 +240,7 @@ func (c *mcentral) freeSpan(s *mspan, preserve bool, wasempty bool) bool { } // delay updating sweepgen until here. This is the signal that - // the span may be used in an MCache, so it must come after the + // the span may be used in an mcache, so it must come after the // linked list operations above (actually, just after the // lock of c above.) atomic.Store(&s.sweepgen, mheap_.sweepgen) @@ -228,7 +252,7 @@ func (c *mcentral) freeSpan(s *mspan, preserve bool, wasempty bool) bool { c.nonempty.remove(s) unlock(&c.lock) - mheap_.freeSpan(s, 0) + mheap_.freeSpan(s, false) return true } diff --git a/libgo/go/runtime/mfinal.go b/libgo/go/runtime/mfinal.go index 1a7792c2205..caf2e7e1cce 100644 --- a/libgo/go/runtime/mfinal.go +++ b/libgo/go/runtime/mfinal.go @@ -135,6 +135,7 @@ func runfinq() { ) gp := getg() + gp.isFinalizerGoroutine = true for { lock(&finlock) fb := finq diff --git a/libgo/go/runtime/mfixalloc.go b/libgo/go/runtime/mfixalloc.go index 1febe782bb6..f9dd6ca474d 100644 --- a/libgo/go/runtime/mfixalloc.go +++ b/libgo/go/runtime/mfixalloc.go @@ -12,7 +12,7 @@ import "unsafe" // FixAlloc is a simple free-list allocator for fixed size objects. // Malloc uses a FixAlloc wrapped around sysAlloc to manage its -// MCache and MSpan objects. +// mcache and mspan objects. // // Memory returned by fixalloc.alloc is zeroed by default, but the // caller may take responsibility for zeroing allocations by setting diff --git a/libgo/go/runtime/mgc.go b/libgo/go/runtime/mgc.go index de84084b391..0973f1de6f7 100644 --- a/libgo/go/runtime/mgc.go +++ b/libgo/go/runtime/mgc.go @@ -28,8 +28,7 @@ // b. Sweep any unswept spans. There will only be unswept spans if // this GC cycle was forced before the expected time. // -// 2. GC performs the "mark 1" sub-phase. In this sub-phase, Ps are -// allowed to locally cache parts of the work queue. +// 2. GC performs the mark phase. // // a. Prepare for the mark phase by setting gcphase to _GCmark // (from _GCoff), enabling the write barrier, enabling mutator @@ -54,28 +53,21 @@ // object to black and shading all pointers found in the object // (which in turn may add those pointers to the work queue). // -// 3. Once the global work queue is empty (but local work queue caches -// may still contain work), GC performs the "mark 2" sub-phase. +// e. Because GC work is spread across local caches, GC uses a +// distributed termination algorithm to detect when there are no +// more root marking jobs or grey objects (see gcMarkDone). At this +// point, GC transitions to mark termination. // -// a. GC stops all workers, disables local work queue caches, -// flushes each P's local work queue cache to the global work queue -// cache, and reenables workers. -// -// b. GC again drains the work queue, as in 2d above. -// -// 4. Once the work queue is empty, GC performs mark termination. +// 3. GC performs mark termination. // // a. Stop the world. // // b. Set gcphase to _GCmarktermination, and disable workers and // assists. // -// c. Drain any remaining work from the work queue (typically there -// will be none). -// -// d. Perform other housekeeping like flushing mcaches. +// c. Perform housekeeping like flushing mcaches. // -// 5. GC performs the sweep phase. +// 4. GC performs the sweep phase. // // a. Prepare for the sweep phase by setting gcphase to _GCoff, // setting up sweep state and disabling the write barrier. @@ -86,7 +78,7 @@ // c. GC does concurrent sweeping in the background and in response // to allocation. See description below. // -// 6. When sufficient allocation has taken place, replay the sequence +// 5. When sufficient allocation has taken place, replay the sequence // starting with 1 above. See discussion of GC rate below. // Concurrent sweep. @@ -137,8 +129,8 @@ package runtime import ( + "internal/cpu" "runtime/internal/atomic" - "runtime/internal/sys" "unsafe" ) @@ -262,21 +254,6 @@ var writeBarrier struct { // gcphase == _GCmark. var gcBlackenEnabled uint32 -// gcBlackenPromptly indicates that optimizations that may -// hide work from the global work queue should be disabled. -// -// If gcBlackenPromptly is true, per-P gcWork caches should -// be flushed immediately and new objects should be allocated black. -// -// There is a tension between allocating objects white and -// allocating them black. If white and the objects die before being -// marked they can be collected during this GC cycle. On the other -// hand allocating them black will reduce _GCmarktermination latency -// since more work is done in the mark phase. This tension is resolved -// by allocating white until the mark phase is approaching its end and -// then allocating black for the remainder of the mark phase. -var gcBlackenPromptly bool - const ( _GCoff = iota // GC not running; sweeping in background, write barrier disabled _GCmark // GC marking roots and workbufs: allocate black, write barrier ENABLED @@ -408,14 +385,14 @@ type gcControllerState struct { // each P that isn't running a dedicated worker. // // For example, if the utilization goal is 25% and there are - // no dedicated workers, this will be 0.25. If there goal is + // no dedicated workers, this will be 0.25. If the goal is // 25%, there is one dedicated worker, and GOMAXPROCS is 5, // this will be 0.05 to make up the missing 5%. // // If this is zero, no fractional workers are needed. fractionalUtilizationGoal float64 - _ [sys.CacheLineSize]byte + _ cpu.CacheLinePad } // startCycle resets the GC controller's state and computes estimates @@ -479,6 +456,12 @@ func (c *gcControllerState) startCycle() { c.fractionalUtilizationGoal = 0 } + // In STW mode, we just want dedicated workers. + if debug.gcstoptheworld > 0 { + c.dedicatedMarkWorkersNeeded = int64(gomaxprocs) + c.fractionalUtilizationGoal = 0 + } + // Clear per-P state for _, p := range allp { p.gcAssistTime = 0 @@ -753,7 +736,7 @@ func (c *gcControllerState) findRunnableGCWorker(_p_ *p) *g { return gp } -// pollFractionalWorkerExit returns true if a fractional mark worker +// pollFractionalWorkerExit reports whether a fractional mark worker // should self-preempt. It assumes it is called from the fractional // worker. func pollFractionalWorkerExit() bool { @@ -807,7 +790,7 @@ func gcSetTriggerRatio(triggerRatio float64) { trigger = uint64(float64(memstats.heap_marked) * (1 + triggerRatio)) // Don't trigger below the minimum heap size. minTrigger := heapminimum - if !gosweepdone() { + if !isSweepDone() { // Concurrent sweep happens in the heap growth // from heap_live to gc_trigger, so ensure // that concurrent sweep has some heap growth @@ -852,7 +835,7 @@ func gcSetTriggerRatio(triggerRatio float64) { } // Update sweep pacing. - if gosweepdone() { + if isSweepDone() { mheap_.sweepPagesPerByte = 0 } else { // Concurrent sweep needs to sweep all of the in-use @@ -902,7 +885,7 @@ const gcGoalUtilization = 0.30 // mutator latency. const gcBackgroundUtilization = 0.25 -// gcCreditSlack is the amount of scan work credit that can can +// gcCreditSlack is the amount of scan work credit that can // accumulate locally before updating gcController.scanWork and, // optionally, gcController.bgScanCredit. Lower values give a more // accurate assist ratio and make it more likely that assists will @@ -920,9 +903,9 @@ const gcAssistTimeSlack = 5000 const gcOverAssistWork = 64 << 10 var work struct { - full lfstack // lock-free list of full blocks workbuf - empty lfstack // lock-free list of empty blocks workbuf - pad0 [sys.CacheLineSize]uint8 // prevents false-sharing between full/empty and nproc/nwait + full lfstack // lock-free list of full blocks workbuf + empty lfstack // lock-free list of empty blocks workbuf + pad0 cpu.CacheLinePad // prevents false-sharing between full/empty and nproc/nwait wbufSpans struct { lock mutex @@ -956,32 +939,15 @@ var work struct { markrootNext uint32 // next markroot job markrootJobs uint32 // number of markroot jobs - nproc uint32 - tstart int64 - nwait uint32 - ndone uint32 - alldone note - - // helperDrainBlock indicates that GC mark termination helpers - // should pass gcDrainBlock to gcDrain to block in the - // getfull() barrier. Otherwise, they should pass gcDrainNoBlock. - // - // TODO: This is a temporary fallback to work around races - // that cause early mark termination. - helperDrainBlock bool + nproc uint32 + tstart int64 + nwait uint32 + ndone uint32 // Number of roots of various root types. Set by gcMarkRootPrepare. nFlushCacheRoots int nDataRoots, nSpanRoots, nStackRoots int - // markrootDone indicates that roots have been marked at least - // once during the current GC cycle. This is checked by root - // marking operations that have to happen only during the - // first root marking pass, whether that's during the - // concurrent mark phase in current GC or mark termination in - // STW GC. - markrootDone bool - // Each type of GC state transition is protected by a lock. // Since multiple threads can simultaneously detect the state // transition condition, any thread that detects a transition @@ -997,8 +963,7 @@ var work struct { // startSema protects the transition from "off" to mark or // mark termination. startSema uint32 - // markDoneSema protects transitions from mark 1 to mark 2 and - // from mark 2 to mark termination. + // markDoneSema protects transitions from mark to mark termination. markDoneSema uint32 bgMarkReady note // signal background mark worker has started @@ -1024,15 +989,15 @@ var work struct { // there was neither enough credit to steal or enough work to // do. assistQueue struct { - lock mutex - head, tail guintptr + lock mutex + q gQueue } // sweepWaiters is a list of blocked goroutines to wake when // we transition from mark termination to sweep. sweepWaiters struct { lock mutex - head guintptr + list gList } // cycles is the number of completed GC cycles, where a GC @@ -1088,7 +1053,7 @@ func GC() { // We're now in sweep N or later. Trigger GC cycle N+1, which // will first finish sweep N if necessary and then enter sweep // termination N+1. - gcStart(gcBackgroundMode, gcTrigger{kind: gcTriggerCycle, n: n + 1}) + gcStart(gcTrigger{kind: gcTriggerCycle, n: n + 1}) // Wait for mark termination N+1 to complete. gcWaitOnMark(n + 1) @@ -1097,7 +1062,7 @@ func GC() { // complete the cycle and because runtime.GC() is often used // as part of tests and benchmarks to get the system into a // relatively stable and isolated state. - for atomic.Load(&work.cycles) == n+1 && gosweepone() != ^uintptr(0) { + for atomic.Load(&work.cycles) == n+1 && sweepone() != ^uintptr(0) { sweep.nbgsweep++ Gosched() } @@ -1147,9 +1112,7 @@ func gcWaitOnMark(n uint32) { // Wait until sweep termination, mark, and mark // termination of cycle N complete. - gp := getg() - gp.schedlink = work.sweepWaiters.head - work.sweepWaiters.head.set(gp) + work.sweepWaiters.list.push(getg()) goparkunlock(&work.sweepWaiters.lock, waitReasonWaitForGCCycle, traceEvGoBlock, 1) } } @@ -1195,7 +1158,7 @@ const ( gcTriggerCycle ) -// test returns true if the trigger condition is satisfied, meaning +// test reports whether the trigger condition is satisfied, meaning // that the exit condition for the _GCoff phase has been met. The exit // condition should be tested when allocating. func (t gcTrigger) test() bool { @@ -1228,13 +1191,13 @@ func (t gcTrigger) test() bool { return true } -// gcStart transitions the GC from _GCoff to _GCmark (if -// !mode.stwMark) or _GCmarktermination (if mode.stwMark) by -// performing sweep termination and GC initialization. +// gcStart starts the GC. It transitions from _GCoff to _GCmark (if +// debug.gcstoptheworld == 0) or performs all of GC (if +// debug.gcstoptheworld != 0). // // This may return without performing this transition in some cases, // such as when called on a system stack or with locks held. -func gcStart(mode gcMode, trigger gcTrigger) { +func gcStart(trigger gcTrigger) { // Since this is called from malloc and malloc is called in // the guts of a number of libraries that might be holding // locks, don't attempt to start GC in non-preemptible or @@ -1257,7 +1220,7 @@ func gcStart(mode gcMode, trigger gcTrigger) { // // We check the transition condition continuously here in case // this G gets delayed in to the next GC cycle. - for trigger.test() && gosweepone() != ^uintptr(0) { + for trigger.test() && sweepone() != ^uintptr(0) { sweep.nbgsweep++ } @@ -1277,12 +1240,11 @@ func gcStart(mode gcMode, trigger gcTrigger) { // We do this after re-checking the transition condition so // that multiple goroutines that detect the heap trigger don't // start multiple STW GCs. - if mode == gcBackgroundMode { - if debug.gcstoptheworld == 1 { - mode = gcForceMode - } else if debug.gcstoptheworld == 2 { - mode = gcForceBlockMode - } + mode := gcBackgroundMode + if debug.gcstoptheworld == 1 { + mode = gcForceMode + } else if debug.gcstoptheworld == 2 { + mode = gcForceBlockMode } // Ok, we're doing it! Stop everybody else @@ -1292,10 +1254,16 @@ func gcStart(mode gcMode, trigger gcTrigger) { traceGCStart() } - if mode == gcBackgroundMode { - gcBgMarkStartWorkers() + // Check that all Ps have finished deferred mcache flushes. + for _, p := range allp { + if fg := atomic.Load(&p.mcache.flushGen); fg != mheap_.sweepgen { + println("runtime: p", p.id, "flushGen", fg, "!= sweepgen", mheap_.sweepgen) + throw("p mcache not flushed") + } } + gcBgMarkStartWorkers() + gcResetMarkState() work.stwprocs, work.maxprocs = gomaxprocs, gomaxprocs @@ -1324,199 +1292,299 @@ func gcStart(mode gcMode, trigger gcTrigger) { clearpools() work.cycles++ - if mode == gcBackgroundMode { // Do as much work concurrently as possible - gcController.startCycle() - work.heapGoal = memstats.next_gc - // Enter concurrent mark phase and enable - // write barriers. - // - // Because the world is stopped, all Ps will - // observe that write barriers are enabled by - // the time we start the world and begin - // scanning. - // - // Write barriers must be enabled before assists are - // enabled because they must be enabled before - // any non-leaf heap objects are marked. Since - // allocations are blocked until assists can - // happen, we want enable assists as early as - // possible. - setGCPhase(_GCmark) - - gcBgMarkPrepare() // Must happen before assist enable. - gcMarkRootPrepare() - - // Mark all active tinyalloc blocks. Since we're - // allocating from these, they need to be black like - // other allocations. The alternative is to blacken - // the tiny block on every allocation from it, which - // would slow down the tiny allocator. - gcMarkTinyAllocs() - - // At this point all Ps have enabled the write - // barrier, thus maintaining the no white to - // black invariant. Enable mutator assists to - // put back-pressure on fast allocating - // mutators. - atomic.Store(&gcBlackenEnabled, 1) - - // Assists and workers can start the moment we start - // the world. - gcController.markStartTime = now - - // Concurrent mark. - systemstack(func() { - now = startTheWorldWithSema(trace.enabled) - }) + gcController.startCycle() + work.heapGoal = memstats.next_gc + + // In STW mode, disable scheduling of user Gs. This may also + // disable scheduling of this goroutine, so it may block as + // soon as we start the world again. + if mode != gcBackgroundMode { + schedEnableUser(false) + } + + // Enter concurrent mark phase and enable + // write barriers. + // + // Because the world is stopped, all Ps will + // observe that write barriers are enabled by + // the time we start the world and begin + // scanning. + // + // Write barriers must be enabled before assists are + // enabled because they must be enabled before + // any non-leaf heap objects are marked. Since + // allocations are blocked until assists can + // happen, we want enable assists as early as + // possible. + setGCPhase(_GCmark) + + gcBgMarkPrepare() // Must happen before assist enable. + gcMarkRootPrepare() + + // Mark all active tinyalloc blocks. Since we're + // allocating from these, they need to be black like + // other allocations. The alternative is to blacken + // the tiny block on every allocation from it, which + // would slow down the tiny allocator. + gcMarkTinyAllocs() + + // At this point all Ps have enabled the write + // barrier, thus maintaining the no white to + // black invariant. Enable mutator assists to + // put back-pressure on fast allocating + // mutators. + atomic.Store(&gcBlackenEnabled, 1) + + // Assists and workers can start the moment we start + // the world. + gcController.markStartTime = now + + // Concurrent mark. + systemstack(func() { + now = startTheWorldWithSema(trace.enabled) work.pauseNS += now - work.pauseStart work.tMark = now - } else { - if trace.enabled { - // Switch to mark termination STW. - traceGCSTWDone() - traceGCSTWStart(0) - } - t := nanotime() - work.tMark, work.tMarkTerm = t, t - work.heapGoal = work.heap0 - - // Perform mark termination. This will restart the world. - gcMarkTermination(memstats.triggerRatio) + }) + // In STW mode, we could block the instant systemstack + // returns, so don't do anything important here. Make sure we + // block rather than returning to user code. + if mode != gcBackgroundMode { + Gosched() } semrelease(&work.startSema) } -// gcMarkDone transitions the GC from mark 1 to mark 2 and from mark 2 -// to mark termination. +// gcMarkDoneFlushed counts the number of P's with flushed work. // -// This should be called when all mark work has been drained. In mark -// 1, this includes all root marking jobs, global work buffers, and -// active work buffers in assists and background workers; however, -// work may still be cached in per-P work buffers. In mark 2, per-P -// caches are disabled. +// Ideally this would be a captured local in gcMarkDone, but forEachP +// escapes its callback closure, so it can't capture anything. +// +// This is protected by markDoneSema. +var gcMarkDoneFlushed uint32 + +// debugCachedWork enables extra checks for debugging premature mark +// termination. +// +// For debugging issue #27993. +const debugCachedWork = false + +// gcWorkPauseGen is for debugging the mark completion algorithm. +// gcWork put operations spin while gcWork.pauseGen == gcWorkPauseGen. +// Only used if debugCachedWork is true. +// +// For debugging issue #27993. +var gcWorkPauseGen uint32 = 1 + +// gcMarkDone transitions the GC from mark to mark termination if all +// reachable objects have been marked (that is, there are no grey +// objects and can be no more in the future). Otherwise, it flushes +// all local work to the global queues where it can be discovered by +// other workers. +// +// This should be called when all local mark work has been drained and +// there are no remaining workers. Specifically, when +// +// work.nwait == work.nproc && !gcMarkWorkAvailable(p) // // The calling context must be preemptible. // -// Note that it is explicitly okay to have write barriers in this -// function because completion of concurrent mark is best-effort -// anyway. Any work created by write barriers here will be cleaned up -// by mark termination. +// Flushing local work is important because idle Ps may have local +// work queued. This is the only way to make that work visible and +// drive GC to completion. +// +// It is explicitly okay to have write barriers in this function. If +// it does transition to mark termination, then all reachable objects +// have been marked, so the write barrier cannot shade any more +// objects. func gcMarkDone() { -top: + // Ensure only one thread is running the ragged barrier at a + // time. semacquire(&work.markDoneSema) +top: // Re-check transition condition under transition lock. + // + // It's critical that this checks the global work queues are + // empty before performing the ragged barrier. Otherwise, + // there could be global work that a P could take after the P + // has passed the ragged barrier. if !(gcphase == _GCmark && work.nwait == work.nproc && !gcMarkWorkAvailable(nil)) { semrelease(&work.markDoneSema) return } - // Disallow starting new workers so that any remaining workers - // in the current mark phase will drain out. - // - // TODO(austin): Should dedicated workers keep an eye on this - // and exit gcDrain promptly? - atomic.Xaddint64(&gcController.dedicatedMarkWorkersNeeded, -0xffffffff) - prevFractionalGoal := gcController.fractionalUtilizationGoal - gcController.fractionalUtilizationGoal = 0 - - if !gcBlackenPromptly { - // Transition from mark 1 to mark 2. - // - // The global work list is empty, but there can still be work - // sitting in the per-P work caches. - // Flush and disable work caches. - - // Disallow caching workbufs and indicate that we're in mark 2. - gcBlackenPromptly = true - - // Prevent completion of mark 2 until we've flushed - // cached workbufs. - atomic.Xadd(&work.nwait, -1) - - // GC is set up for mark 2. Let Gs blocked on the - // transition lock go while we flush caches. - semrelease(&work.markDoneSema) - - systemstack(func() { - // Flush all currently cached workbufs and - // ensure all Ps see gcBlackenPromptly. This - // also blocks until any remaining mark 1 - // workers have exited their loop so we can - // start new mark 2 workers. - forEachP(func(_p_ *p) { - wbBufFlush1(_p_) - _p_.gcw.dispose() - }) + // Flush all local buffers and collect flushedWork flags. + gcMarkDoneFlushed = 0 + systemstack(func() { + gp := getg().m.curg + // Mark the user stack as preemptible so that it may be scanned. + // Otherwise, our attempt to force all P's to a safepoint could + // result in a deadlock as we attempt to preempt a worker that's + // trying to preempt us (e.g. for a stack scan). + casgstatus(gp, _Grunning, _Gwaiting) + forEachP(func(_p_ *p) { + // Flush the write barrier buffer, since this may add + // work to the gcWork. + wbBufFlush1(_p_) + // For debugging, shrink the write barrier + // buffer so it flushes immediately. + // wbBuf.reset will keep it at this size as + // long as throwOnGCWork is set. + if debugCachedWork { + b := &_p_.wbBuf + b.end = uintptr(unsafe.Pointer(&b.buf[wbBufEntryPointers])) + b.debugGen = gcWorkPauseGen + } + // Flush the gcWork, since this may create global work + // and set the flushedWork flag. + // + // TODO(austin): Break up these workbufs to + // better distribute work. + _p_.gcw.dispose() + // Collect the flushedWork flag. + if _p_.gcw.flushedWork { + atomic.Xadd(&gcMarkDoneFlushed, 1) + _p_.gcw.flushedWork = false + } else if debugCachedWork { + // For debugging, freeze the gcWork + // until we know whether we've reached + // completion or not. If we think + // we've reached completion, but + // there's a paused gcWork, then + // that's a bug. + _p_.gcw.pauseGen = gcWorkPauseGen + // Capture the G's stack. + for i := range _p_.gcw.pauseStack { + _p_.gcw.pauseStack[i].pc = 0 + } + callers(1, _p_.gcw.pauseStack[:]) + } }) + casgstatus(gp, _Gwaiting, _Grunning) + }) - // Check that roots are marked. We should be able to - // do this before the forEachP, but based on issue - // #16083 there may be a (harmless) race where we can - // enter mark 2 while some workers are still scanning - // stacks. The forEachP ensures these scans are done. - // - // TODO(austin): Figure out the race and fix this - // properly. - gcMarkRootCheck() + if gcMarkDoneFlushed != 0 { + if debugCachedWork { + // Release paused gcWorks. + atomic.Xadd(&gcWorkPauseGen, 1) + } + // More grey objects were discovered since the + // previous termination check, so there may be more + // work to do. Keep going. It's possible the + // transition condition became true again during the + // ragged barrier, so re-check it. + goto top + } - // Now we can start up mark 2 workers. - atomic.Xaddint64(&gcController.dedicatedMarkWorkersNeeded, 0xffffffff) - gcController.fractionalUtilizationGoal = prevFractionalGoal + if debugCachedWork { + throwOnGCWork = true + // Release paused gcWorks. If there are any, they + // should now observe throwOnGCWork and panic. + atomic.Xadd(&gcWorkPauseGen, 1) + } - incnwait := atomic.Xadd(&work.nwait, +1) - if incnwait == work.nproc && !gcMarkWorkAvailable(nil) { - // This loop will make progress because - // gcBlackenPromptly is now true, so it won't - // take this same "if" branch. - goto top + // There was no global work, no local work, and no Ps + // communicated work since we took markDoneSema. Therefore + // there are no grey objects and no more objects can be + // shaded. Transition to mark termination. + now := nanotime() + work.tMarkTerm = now + work.pauseStart = now + getg().m.preemptoff = "gcing" + if trace.enabled { + traceGCSTWStart(0) + } + systemstack(stopTheWorldWithSema) + // The gcphase is _GCmark, it will transition to _GCmarktermination + // below. The important thing is that the wb remains active until + // all marking is complete. This includes writes made by the GC. + + if debugCachedWork { + // For debugging, double check that no work was added after we + // went around above and disable write barrier buffering. + for _, p := range allp { + gcw := &p.gcw + if !gcw.empty() { + printlock() + print("runtime: P ", p.id, " flushedWork ", gcw.flushedWork) + if gcw.wbuf1 == nil { + print(" wbuf1=<nil>") + } else { + print(" wbuf1.n=", gcw.wbuf1.nobj) + } + if gcw.wbuf2 == nil { + print(" wbuf2=<nil>") + } else { + print(" wbuf2.n=", gcw.wbuf2.nobj) + } + print("\n") + if gcw.pauseGen == gcw.putGen { + println("runtime: checkPut already failed at this generation") + } + throw("throwOnGCWork") + } } } else { - // Transition to mark termination. - now := nanotime() - work.tMarkTerm = now - work.pauseStart = now - getg().m.preemptoff = "gcing" - if trace.enabled { - traceGCSTWStart(0) + // For unknown reasons (see issue #27993), there is + // sometimes work left over when we enter mark + // termination. Detect this and resume concurrent + // mark. This is obviously unfortunate. + // + // Switch to the system stack to call wbBufFlush1, + // though in this case it doesn't matter because we're + // non-preemptible anyway. + restart := false + systemstack(func() { + for _, p := range allp { + wbBufFlush1(p) + if !p.gcw.empty() { + restart = true + break + } + } + }) + if restart { + getg().m.preemptoff = "" + systemstack(func() { + now := startTheWorldWithSema(true) + work.pauseNS += now - work.pauseStart + }) + goto top } - systemstack(stopTheWorldWithSema) - // The gcphase is _GCmark, it will transition to _GCmarktermination - // below. The important thing is that the wb remains active until - // all marking is complete. This includes writes made by the GC. + } - // Record that one root marking pass has completed. - work.markrootDone = true + // Disable assists and background workers. We must do + // this before waking blocked assists. + atomic.Store(&gcBlackenEnabled, 0) - // Disable assists and background workers. We must do - // this before waking blocked assists. - atomic.Store(&gcBlackenEnabled, 0) + // Wake all blocked assists. These will run when we + // start the world again. + gcWakeAllAssists() - // Wake all blocked assists. These will run when we - // start the world again. - gcWakeAllAssists() + // Likewise, release the transition lock. Blocked + // workers and assists will run when we start the + // world again. + semrelease(&work.markDoneSema) - // Likewise, release the transition lock. Blocked - // workers and assists will run when we start the - // world again. - semrelease(&work.markDoneSema) + // In STW mode, re-enable user goroutines. These will be + // queued to run after we start the world. + schedEnableUser(true) - // endCycle depends on all gcWork cache stats being - // flushed. This is ensured by mark 2. - nextTriggerRatio := gcController.endCycle() + // endCycle depends on all gcWork cache stats being flushed. + // The termination algorithm above ensured that up to + // allocations since the ragged barrier. + nextTriggerRatio := gcController.endCycle() - // Perform mark termination. This will restart the world. - gcMarkTermination(nextTriggerRatio) - } + // Perform mark termination. This will restart the world. + gcMarkTermination(nextTriggerRatio) } func gcMarkTermination(nextTriggerRatio float64) { // World is stopped. // Start marktermination which includes enabling the write barrier. atomic.Store(&gcBlackenEnabled, 0) - gcBlackenPromptly = false setGCPhase(_GCmarktermination) work.heap1 = memstats.heap_live @@ -1549,35 +1617,22 @@ func gcMarkTermination(nextTriggerRatio float64) { systemstack(func() { work.heap2 = work.bytesMarked if debug.gccheckmark > 0 { - // Run a full stop-the-world mark using checkmark bits, - // to check that we didn't forget to mark anything during - // the concurrent mark process. + // Run a full non-parallel, stop-the-world + // mark using checkmark bits, to check that we + // didn't forget to mark anything during the + // concurrent mark process. gcResetMarkState() initCheckmarks() - gcMark(startTime) + gcw := &getg().m.p.ptr().gcw + gcDrain(gcw, 0) + wbBufFlush1(getg().m.p.ptr()) + gcw.dispose() clearCheckmarks() } // marking is complete so we can turn the write barrier off setGCPhase(_GCoff) gcSweep(work.mode) - - if debug.gctrace > 1 { - startTime = nanotime() - // The g stacks have been scanned so - // they have gcscanvalid==true and gcworkdone==true. - // Reset these so that all stacks will be rescanned. - gcResetMarkState() - finishsweep_m() - - // Still in STW but gcphase is _GCoff, reset to _GCmarktermination - // At this point all objects will be found during the gcMark which - // does a complete STW mark and object scan. - setGCPhase(_GCmarktermination) - gcMark(startTime) - setGCPhase(_GCoff) // marking is done, turn off wb. - gcSweep(work.mode) - } }) _g_.m.traceback = 0 @@ -1633,8 +1688,7 @@ func gcMarkTermination(nextTriggerRatio float64) { // Bump GC cycle count and wake goroutines waiting on sweep. lock(&work.sweepWaiters.lock) memstats.numgc++ - injectglist(work.sweepWaiters.head.ptr()) - work.sweepWaiters.head = 0 + injectglist(&work.sweepWaiters.list) unlock(&work.sweepWaiters.lock) // Finish the current heap profiling cycle and start a new @@ -1653,6 +1707,16 @@ func gcMarkTermination(nextTriggerRatio float64) { // asynchronously because it can take non-trivial time. prepareFreeWorkbufs() + // Ensure all mcaches are flushed. Each P will flush its own + // mcache before allocating, but idle Ps may not. Since this + // is necessary to sweep all spans, we need to ensure all + // mcaches are flushed before we start the next GC cycle. + systemstack(func() { + forEachP(func(_p_ *p) { + _p_.mcache.prepareForSweep() + }) + }) + // Print gctrace before dropping worldsema. As soon as we drop // worldsema another cycle could start and smash the stats // we're trying to print. @@ -1856,7 +1920,7 @@ func gcBgMarkWorker(_p_ *p) { } // Go back to draining, this time // without preemption. - gcDrain(&_p_.gcw, gcDrainNoBlock|gcDrainFlushBgCredit) + gcDrain(&_p_.gcw, gcDrainFlushBgCredit) case gcMarkWorkerFractionalMode: gcDrain(&_p_.gcw, gcDrainFractional|gcDrainUntilPreempt|gcDrainFlushBgCredit) case gcMarkWorkerIdleMode: @@ -1865,16 +1929,6 @@ func gcBgMarkWorker(_p_ *p) { casgstatus(gp, _Gwaiting, _Grunning) }) - // If we are nearing the end of mark, dispose - // of the cache promptly. We must do this - // before signaling that we're no longer - // working so that other workers can't observe - // no workers and no work while we have this - // cached, and before we compute done. - if gcBlackenPromptly { - _p_.gcw.dispose() - } - // Account for time. duration := nanotime() - startTime switch _p_.gcMarkWorkerMode { @@ -1921,7 +1975,7 @@ func gcBgMarkWorker(_p_ *p) { } } -// gcMarkWorkAvailable returns true if executing a mark worker +// gcMarkWorkAvailable reports whether executing a mark worker // on p is potentially useful. p may be nil, in which case it only // checks the global sources of work. func gcMarkWorkAvailable(p *p) bool { @@ -1951,50 +2005,11 @@ func gcMark(start_time int64) { } work.tstart = start_time - // Queue root marking jobs. - gcMarkRootPrepare() - - work.nwait = 0 - work.ndone = 0 - work.nproc = uint32(gcprocs()) - - if work.full == 0 && work.nDataRoots+work.nSpanRoots+work.nStackRoots == 0 { - // There's no work on the work queue and no root jobs - // that can produce work, so don't bother entering the - // getfull() barrier. - // - // This will be the situation the vast majority of the - // time after concurrent mark. However, we still need - // a fallback for STW GC and because there are some - // known races that occasionally leave work around for - // mark termination. - // - // We're still hedging our bets here: if we do - // accidentally produce some work, we'll still process - // it, just not necessarily in parallel. - // - // TODO(austin): Fix the races and and remove - // work draining from mark termination so we don't - // need the fallback path. - work.helperDrainBlock = false - } else { - work.helperDrainBlock = true - } - - if work.nproc > 1 { - noteclear(&work.alldone) - helpgc(int32(work.nproc)) - } - - gchelperstart() - - gcw := &getg().m.p.ptr().gcw - if work.helperDrainBlock { - gcDrain(gcw, gcDrainBlock) - } else { - gcDrain(gcw, gcDrainNoBlock) + // Check that there's no marking work remaining. + if work.full != 0 || work.markrootNext < work.markrootJobs { + print("runtime: full=", hex(work.full), " next=", work.markrootNext, " jobs=", work.markrootJobs, " nDataRoots=", work.nDataRoots, " nSpanRoots=", work.nSpanRoots, " nStackRoots=", work.nStackRoots, "\n") + panic("non-empty mark queue after concurrent mark") } - gcw.dispose() if debug.gccheckmark > 0 { // This is expensive when there's a large number of @@ -2005,25 +2020,52 @@ func gcMark(start_time int64) { throw("work.full != 0") } - if work.nproc > 1 { - notesleep(&work.alldone) - } - - // Record that at least one root marking pass has completed. - work.markrootDone = true - - // Double-check that all gcWork caches are empty. This should - // be ensured by mark 2 before we enter mark termination. + // Clear out buffers and double-check that all gcWork caches + // are empty. This should be ensured by gcMarkDone before we + // enter mark termination. + // + // TODO: We could clear out buffers just before mark if this + // has a non-negligible impact on STW time. for _, p := range allp { + // The write barrier may have buffered pointers since + // the gcMarkDone barrier. However, since the barrier + // ensured all reachable objects were marked, all of + // these must be pointers to black objects. Hence we + // can just discard the write barrier buffer. + if debug.gccheckmark > 0 || throwOnGCWork { + // For debugging, flush the buffer and make + // sure it really was all marked. + wbBufFlush1(p) + } else { + p.wbBuf.reset() + } + gcw := &p.gcw if !gcw.empty() { + printlock() + print("runtime: P ", p.id, " flushedWork ", gcw.flushedWork) + if gcw.wbuf1 == nil { + print(" wbuf1=<nil>") + } else { + print(" wbuf1.n=", gcw.wbuf1.nobj) + } + if gcw.wbuf2 == nil { + print(" wbuf2=<nil>") + } else { + print(" wbuf2.n=", gcw.wbuf2.nobj) + } + print("\n") throw("P has cached GC work at end of mark termination") } - if gcw.scanWork != 0 || gcw.bytesMarked != 0 { - throw("P has unflushed stats at end of mark termination") - } + // There may still be cached empty buffers, which we + // need to flush since we're going to free them. Also, + // there may be non-zero stats because we allocated + // black after the gcMarkDone barrier. + gcw.dispose() } + throwOnGCWork = false + cachestats() // Update the marked heap stat. @@ -2055,6 +2097,9 @@ func gcSweep(mode gcMode) { throw("non-empty swept list") } mheap_.pagesSwept = 0 + mheap_.sweepArenas = mheap_.allArenas + mheap_.reclaimIndex = 0 + mheap_.reclaimCredit = 0 unlock(&mheap_.lock) if !_ConcurrentSweep || mode == gcForceBlockMode { @@ -2104,9 +2149,20 @@ func gcResetMarkState() { } unlock(&allglock) + // Clear page marks. This is just 1MB per 64GB of heap, so the + // time here is pretty trivial. + lock(&mheap_.lock) + arenas := mheap_.allArenas + unlock(&mheap_.lock) + for _, ai := range arenas { + ha := mheap_.arenas[ai.l1()][ai.l2()] + for i := range ha.pageMarks { + ha.pageMarks[i] = 0 + } + } + work.bytesMarked = 0 work.initialHeapLive = atomic.Load64(&memstats.heap_live) - work.markrootDone = false } // Hooks for other packages @@ -2151,48 +2207,6 @@ func clearpools() { unlock(&sched.deferlock) } -// gchelper runs mark termination tasks on Ps other than the P -// coordinating mark termination. -// -// The caller is responsible for ensuring that this has a P to run on, -// even though it's running during STW. Because of this, it's allowed -// to have write barriers. -// -//go:yeswritebarrierrec -func gchelper() { - _g_ := getg() - _g_.m.traceback = 2 - gchelperstart() - - // Parallel mark over GC roots and heap - if gcphase == _GCmarktermination { - gcw := &_g_.m.p.ptr().gcw - if work.helperDrainBlock { - gcDrain(gcw, gcDrainBlock) // blocks in getfull - } else { - gcDrain(gcw, gcDrainNoBlock) - } - gcw.dispose() - } - - nproc := atomic.Load(&work.nproc) // work.nproc can change right after we increment work.ndone - if atomic.Xadd(&work.ndone, +1) == nproc-1 { - notewakeup(&work.alldone) - } - _g_.m.traceback = 0 -} - -func gchelperstart() { - _g_ := getg() - - if _g_.m.helpgc < 0 || _g_.m.helpgc >= _MaxGcproc { - throw("gchelperstart: bad m->helpgc") - } - if _g_ != _g_.m.g0 { - throw("gchelper not running on g0 stack") - } -} - // Timing // itoaDiv formats val/(10**dec) into buf. diff --git a/libgo/go/runtime/mgc_gccgo.go b/libgo/go/runtime/mgc_gccgo.go index b396d35146c..85045170b3c 100644 --- a/libgo/go/runtime/mgc_gccgo.go +++ b/libgo/go/runtime/mgc_gccgo.go @@ -146,7 +146,7 @@ func registerGCRoots(r *gcRootList) { // and carries on. func checkPreempt() { gp := getg() - if !gp.preempt || gp != gp.m.curg || gp.m.locks != 0 || gp.m.mallocing != 0 || gp.m.preemptoff != "" { + if !gp.preempt || gp != gp.m.curg || gp.m.locks != 0 || gp.m.mallocing != 0 || gp.m.preemptoff != "" || gp.m.incgo { return } @@ -165,9 +165,6 @@ func checkPreempt() { mp := acquirem() gcw := &gp.m.p.ptr().gcw scanstack(gp, gcw) - if gcBlackenPromptly { - gcw.dispose() - } releasem(mp) gp.gcscandone = true } diff --git a/libgo/go/runtime/mgclarge.go b/libgo/go/runtime/mgclarge.go index e7fa831937a..7b01a117808 100644 --- a/libgo/go/runtime/mgclarge.go +++ b/libgo/go/runtime/mgclarge.go @@ -46,13 +46,57 @@ type treapNode struct { priority uint32 // random number used by treap algorithm to keep tree probabilistically balanced } -func (t *treapNode) init() { - t.right = nil - t.left = nil - t.parent = nil - t.spanKey = nil - t.npagesKey = 0 - t.priority = 0 +func (t *treapNode) pred() *treapNode { + if t.left != nil { + // If it has a left child, its predecessor will be + // its right most left (grand)child. + t = t.left + for t.right != nil { + t = t.right + } + return t + } + // If it has no left child, its predecessor will be + // the first grandparent who's right child is its + // ancestor. + // + // We compute this by walking up the treap until the + // current node's parent is its parent's right child. + // + // If we find at any point walking up the treap + // that the current node doesn't have a parent, + // we've hit the root. This means that t is already + // the left-most node in the treap and therefore + // has no predecessor. + for t.parent != nil && t.parent.right != t { + if t.parent.left != t { + println("runtime: predecessor t=", t, "t.spanKey=", t.spanKey) + throw("node is not its parent's child") + } + t = t.parent + } + return t.parent +} + +func (t *treapNode) succ() *treapNode { + if t.right != nil { + // If it has a right child, its successor will be + // its left-most right (grand)child. + t = t.right + for t.left != nil { + t = t.left + } + return t + } + // See pred. + for t.parent != nil && t.parent.left != t { + if t.parent.right != t { + println("runtime: predecessor t=", t, "t.spanKey=", t.spanKey) + throw("node is not its parent's child") + } + t = t.parent + } + return t.parent } // isSpanInTreap is handy for debugging. One should hold the heap lock, usually @@ -109,6 +153,68 @@ func checkTreapNode(t *treapNode) { } } +// treapIter is a bidirectional iterator type which may be used to iterate over a +// an mTreap in-order forwards (increasing order) or backwards (decreasing order). +// Its purpose is to hide details about the treap from users when trying to iterate +// over it. +// +// To create iterators over the treap, call start or end on an mTreap. +type treapIter struct { + t *treapNode +} + +// span returns the span at the current position in the treap. +// If the treap is not valid, span will panic. +func (i *treapIter) span() *mspan { + return i.t.spanKey +} + +// valid returns whether the iterator represents a valid position +// in the mTreap. +func (i *treapIter) valid() bool { + return i.t != nil +} + +// next moves the iterator forward by one. Once the iterator +// ceases to be valid, calling next will panic. +func (i treapIter) next() treapIter { + i.t = i.t.succ() + return i +} + +// prev moves the iterator backwards by one. Once the iterator +// ceases to be valid, calling prev will panic. +func (i treapIter) prev() treapIter { + i.t = i.t.pred() + return i +} + +// start returns an iterator which points to the start of the treap (the +// left-most node in the treap). +func (root *mTreap) start() treapIter { + t := root.treap + if t == nil { + return treapIter{} + } + for t.left != nil { + t = t.left + } + return treapIter{t: t} +} + +// end returns an iterator which points to the end of the treap (the +// right-most node in the treap). +func (root *mTreap) end() treapIter { + t := root.treap + if t == nil { + return treapIter{} + } + for t.right != nil { + t = t.right + } + return treapIter{t: t} +} + // insert adds span to the large span treap. func (root *mTreap) insert(span *mspan) { npages := span.npages @@ -120,10 +226,10 @@ func (root *mTreap) insert(span *mspan) { pt = &t.right } else if t.npagesKey > npages { pt = &t.left - } else if uintptr(unsafe.Pointer(t.spanKey)) < uintptr(unsafe.Pointer(span)) { + } else if t.spanKey.base() < span.base() { // t.npagesKey == npages, so sort on span addresses. pt = &t.right - } else if uintptr(unsafe.Pointer(t.spanKey)) > uintptr(unsafe.Pointer(span)) { + } else if t.spanKey.base() > span.base() { pt = &t.left } else { throw("inserting span already in treap") @@ -140,7 +246,6 @@ func (root *mTreap) insert(span *mspan) { // https://faculty.washington.edu/aragon/pubs/rst89.pdf t := (*treapNode)(mheap_.treapalloc.alloc()) - t.init() t.npagesKey = span.npages t.priority = fastrand() t.spanKey = span @@ -168,7 +273,6 @@ func (root *mTreap) removeNode(t *treapNode) { if t.spanKey.npages != t.npagesKey { throw("span and treap node npages do not match") } - // Rotate t down to be leaf of tree for removal, respecting priorities. for t.right != nil || t.left != nil { if t.right == nil || t.left != nil && t.left.priority < t.right.priority { @@ -188,19 +292,16 @@ func (root *mTreap) removeNode(t *treapNode) { root.treap = nil } // Return the found treapNode's span after freeing the treapNode. - t.spanKey = nil - t.npagesKey = 0 mheap_.treapalloc.free(unsafe.Pointer(t)) } -// remove searches for, finds, removes from the treap, and returns the smallest -// span that can hold npages. If no span has at least npages return nil. +// find searches for, finds, and returns the treap node containing the +// smallest span that can hold npages. If no span has at least npages +// it returns nil. // This is slightly more complicated than a simple binary tree search // since if an exact match is not found the next larger node is // returned. -// If the last node inspected > npagesKey not holding -// a left node (a smaller npages) is the "best fit" node. -func (root *mTreap) remove(npages uintptr) *mspan { +func (root *mTreap) find(npages uintptr) *treapNode { t := root.treap for t != nil { if t.spanKey == nil { @@ -211,9 +312,7 @@ func (root *mTreap) remove(npages uintptr) *mspan { } else if t.left != nil && t.left.npagesKey >= npages { t = t.left } else { - result := t.spanKey - root.removeNode(t) - return result + return t } } return nil @@ -231,24 +330,21 @@ func (root *mTreap) removeSpan(span *mspan) { t = t.right } else if t.npagesKey > npages { t = t.left - } else if uintptr(unsafe.Pointer(t.spanKey)) < uintptr(unsafe.Pointer(span)) { + } else if t.spanKey.base() < span.base() { t = t.right - } else if uintptr(unsafe.Pointer(t.spanKey)) > uintptr(unsafe.Pointer(span)) { + } else if t.spanKey.base() > span.base() { t = t.left } } root.removeNode(t) } -// scavengetreap visits each node in the treap and scavenges the -// treapNode's span. -func scavengetreap(treap *treapNode, now, limit uint64) uintptr { - if treap == nil { - return 0 - } - return scavengeTreapNode(treap, now, limit) + - scavengetreap(treap.left, now, limit) + - scavengetreap(treap.right, now, limit) +// erase removes the element referred to by the current position of the +// iterator. This operation consumes the given iterator, so it should no +// longer be used. It is up to the caller to get the next or previous +// iterator before calling erase, if need be. +func (root *mTreap) erase(i treapIter) { + root.removeNode(i.t) } // rotateLeft rotates the tree rooted at node x. diff --git a/libgo/go/runtime/mgcmark.go b/libgo/go/runtime/mgcmark.go index 631c4d7133b..9da881ed2ab 100644 --- a/libgo/go/runtime/mgcmark.go +++ b/libgo/go/runtime/mgcmark.go @@ -52,62 +52,37 @@ const ( // //go:nowritebarrier func gcMarkRootPrepare() { - if gcphase == _GCmarktermination { - work.nFlushCacheRoots = int(gomaxprocs) - } else { - work.nFlushCacheRoots = 0 - } + work.nFlushCacheRoots = 0 work.nDataRoots = 0 // Only scan globals once per cycle; preferably concurrently. - if !work.markrootDone { - roots := gcRoots - for roots != nil { - work.nDataRoots++ - roots = roots.next - } + roots := gcRoots + for roots != nil { + work.nDataRoots++ + roots = roots.next } - if !work.markrootDone { - // On the first markroot, we need to scan span roots. - // In concurrent GC, this happens during concurrent - // mark and we depend on addfinalizer to ensure the - // above invariants for objects that get finalizers - // after concurrent mark. In STW GC, this will happen - // during mark termination. - // - // We're only interested in scanning the in-use spans, - // which will all be swept at this point. More spans - // may be added to this list during concurrent GC, but - // we only care about spans that were allocated before - // this mark phase. - work.nSpanRoots = mheap_.sweepSpans[mheap_.sweepgen/2%2].numBlocks() - - // On the first markroot, we need to scan all Gs. Gs - // may be created after this point, but it's okay that - // we ignore them because they begin life without any - // roots, so there's nothing to scan, and any roots - // they create during the concurrent phase will be - // scanned during mark termination. During mark - // termination, allglen isn't changing, so we'll scan - // all Gs. - work.nStackRoots = int(atomic.Loaduintptr(&allglen)) - } else { - // We've already scanned span roots and kept the scan - // up-to-date during concurrent mark. - work.nSpanRoots = 0 - - // The hybrid barrier ensures that stacks can't - // contain pointers to unmarked objects, so on the - // second markroot, there's no need to scan stacks. - work.nStackRoots = 0 - - if debug.gcrescanstacks > 0 { - // Scan stacks anyway for debugging. - work.nStackRoots = int(atomic.Loaduintptr(&allglen)) - } - } + // Scan span roots for finalizer specials. + // + // We depend on addfinalizer to mark objects that get + // finalizers after root marking. + // + // We're only interested in scanning the in-use spans, + // which will all be swept at this point. More spans + // may be added to this list during concurrent GC, but + // we only care about spans that were allocated before + // this mark phase. + work.nSpanRoots = mheap_.sweepSpans[mheap_.sweepgen/2%2].numBlocks() + + // Scan stacks. + // + // Gs may be created after this point, but it's okay that we + // ignore them because they begin life without any roots, so + // there's nothing to scan, and any roots they create during + // the concurrent phase will be scanned during mark + // termination. + work.nStackRoots = int(atomic.Loaduintptr(&allglen)) work.markrootNext = 0 work.markrootJobs = uint32(fixedRootCount + work.nFlushCacheRoots + work.nDataRoots + work.nSpanRoots + work.nStackRoots) @@ -124,19 +99,10 @@ func gcMarkRootCheck() { lock(&allglock) // Check that stacks have been scanned. var gp *g - if gcphase == _GCmarktermination && debug.gcrescanstacks > 0 { - for i := 0; i < len(allgs); i++ { - gp = allgs[i] - if !(gp.gcscandone && gp.gcscanvalid) && readgstatus(gp) != _Gdead { - goto fail - } - } - } else { - for i := 0; i < work.nStackRoots; i++ { - gp = allgs[i] - if !gp.gcscandone { - goto fail - } + for i := 0; i < work.nStackRoots; i++ { + gp = allgs[i] + if !gp.gcscandone { + goto fail } } unlock(&allglock) @@ -188,11 +154,6 @@ func markroot(gcw *gcWork, i uint32) { } case i == fixedRootFinalizers: - // Only do this once per GC cycle since we don't call - // queuefinalizer during marking. - if work.markrootDone { - break - } for fb := allfin; fb != nil; fb = fb.alllink { cnt := uintptr(atomic.Load(&fb.cnt)) scanblock(uintptr(unsafe.Pointer(&fb.fin[0])), cnt*unsafe.Sizeof(fb.fin[0]), &finptrmask[0], gcw) @@ -202,7 +163,7 @@ func markroot(gcw *gcWork, i uint32) { // FIXME: We don't do this for gccgo. case baseSpans <= i && i < baseStacks: - // mark MSpan.specials + // mark mspan.specials markrootSpans(gcw, int(i-baseSpans)) default: @@ -278,10 +239,6 @@ func markrootSpans(gcw *gcWork, shard int) { // TODO(austin): There are several ideas for making this more // efficient in issue #11485. - if work.markrootDone { - throw("markrootSpans during second markroot") - } - sg := mheap_.sweepgen spans := mheap_.sweepSpans[mheap_.sweepgen/2%2].block(shard) // Note that work.spans may not include spans that were @@ -294,7 +251,8 @@ func markrootSpans(gcw *gcWork, shard int) { if s.state != mSpanInUse { continue } - if !useCheckmark && s.sweepgen != sg { + // Check that this span was swept (it may be cached or uncached). + if !useCheckmark && !(s.sweepgen == sg || s.sweepgen == sg+3) { // sweepgen was updated (+2) during non-checkmark GC pass print("sweep ", s.sweepgen, " ", sg, "\n") throw("gc: unswept span") @@ -492,11 +450,6 @@ func gcAssistAlloc1(gp *g, scanWork int64) { // will be more cache friendly. gcw := &getg().m.p.ptr().gcw workDone := gcDrainN(gcw, scanWork) - // If we are near the end of the mark phase - // dispose of the gcw. - if gcBlackenPromptly { - gcw.dispose() - } casgstatus(gp, _Gwaiting, _Grunning) @@ -513,8 +466,7 @@ func gcAssistAlloc1(gp *g, scanWork int64) { incnwait := atomic.Xadd(&work.nwait, +1) if incnwait > work.nproc { println("runtime: work.nwait=", incnwait, - "work.nproc=", work.nproc, - "gcBlackenPromptly=", gcBlackenPromptly) + "work.nproc=", work.nproc) throw("work.nwait > work.nproc") } @@ -539,15 +491,14 @@ func gcAssistAlloc1(gp *g, scanWork int64) { // new assists from going to sleep after this point. func gcWakeAllAssists() { lock(&work.assistQueue.lock) - injectglist(work.assistQueue.head.ptr()) - work.assistQueue.head.set(nil) - work.assistQueue.tail.set(nil) + list := work.assistQueue.q.popList() + injectglist(&list) unlock(&work.assistQueue.lock) } // gcParkAssist puts the current goroutine on the assist queue and parks. // -// gcParkAssist returns whether the assist is now satisfied. If it +// gcParkAssist reports whether the assist is now satisfied. If it // returns false, the caller must retry the assist. // //go:nowritebarrier @@ -562,24 +513,17 @@ func gcParkAssist() bool { } gp := getg() - oldHead, oldTail := work.assistQueue.head, work.assistQueue.tail - if oldHead == 0 { - work.assistQueue.head.set(gp) - } else { - oldTail.ptr().schedlink.set(gp) - } - work.assistQueue.tail.set(gp) - gp.schedlink.set(nil) + oldList := work.assistQueue.q + work.assistQueue.q.pushBack(gp) // Recheck for background credit now that this G is in // the queue, but can still back out. This avoids a // race in case background marking has flushed more // credit since we checked above. if atomic.Loadint64(&gcController.bgScanCredit) > 0 { - work.assistQueue.head = oldHead - work.assistQueue.tail = oldTail - if oldTail != 0 { - oldTail.ptr().schedlink.set(nil) + work.assistQueue.q = oldList + if oldList.tail != 0 { + oldList.tail.ptr().schedlink.set(nil) } unlock(&work.assistQueue.lock) return false @@ -600,7 +544,7 @@ func gcParkAssist() bool { // //go:nowritebarrierrec func gcFlushBgCredit(scanWork int64) { - if work.assistQueue.head == 0 { + if work.assistQueue.q.empty() { // Fast path; there are no blocked assists. There's a // small window here where an assist may add itself to // the blocked queue and park. If that happens, we'll @@ -612,23 +556,21 @@ func gcFlushBgCredit(scanWork int64) { scanBytes := int64(float64(scanWork) * gcController.assistBytesPerWork) lock(&work.assistQueue.lock) - gp := work.assistQueue.head.ptr() - for gp != nil && scanBytes > 0 { + for !work.assistQueue.q.empty() && scanBytes > 0 { + gp := work.assistQueue.q.pop() // Note that gp.gcAssistBytes is negative because gp // is in debt. Think carefully about the signs below. if scanBytes+gp.gcAssistBytes >= 0 { // Satisfy this entire assist debt. scanBytes += gp.gcAssistBytes gp.gcAssistBytes = 0 - xgp := gp - gp = gp.schedlink.ptr() - // It's important that we *not* put xgp in + // It's important that we *not* put gp in // runnext. Otherwise, it's possible for user // code to exploit the GC worker's high // scheduler priority to get itself always run // before other goroutines and always in the // fresh quantum started by GC. - ready(xgp, 0, false) + ready(gp, 0, false) } else { // Partially satisfy this assist. gp.gcAssistBytes += scanBytes @@ -637,23 +579,10 @@ func gcFlushBgCredit(scanWork int64) { // back of the queue so that large assists // can't clog up the assist queue and // substantially delay small assists. - xgp := gp - gp = gp.schedlink.ptr() - if gp == nil { - // gp is the only assist in the queue. - gp = xgp - } else { - xgp.schedlink = 0 - work.assistQueue.tail.ptr().schedlink.set(xgp) - work.assistQueue.tail.set(xgp) - } + work.assistQueue.q.pushBack(gp) break } } - work.assistQueue.head.set(gp) - if gp == nil { - work.assistQueue.tail.set(nil) - } if scanBytes > 0 { // Convert from scan bytes back to work. @@ -707,11 +636,6 @@ func scanstack(gp *g, gcw *gcWork) { // ok } - mp := gp.m - if mp != nil && mp.helpgc != 0 { - throw("can't scan gchelper stack") - } - // Scan the stack. if usestackmaps { g := getg() @@ -775,34 +699,26 @@ type gcDrainFlags int const ( gcDrainUntilPreempt gcDrainFlags = 1 << iota - gcDrainNoBlock gcDrainFlushBgCredit gcDrainIdle gcDrainFractional - - // gcDrainBlock means neither gcDrainUntilPreempt or - // gcDrainNoBlock. It is the default, but callers should use - // the constant for documentation purposes. - gcDrainBlock gcDrainFlags = 0 ) // gcDrain scans roots and objects in work buffers, blackening grey -// objects until all roots and work buffers have been drained. +// objects until it is unable to get more work. It may return before +// GC is done; it's the caller's responsibility to balance work from +// other Ps. // // If flags&gcDrainUntilPreempt != 0, gcDrain returns when g.preempt -// is set. This implies gcDrainNoBlock. +// is set. // // If flags&gcDrainIdle != 0, gcDrain returns when there is other work -// to do. This implies gcDrainNoBlock. +// to do. // // If flags&gcDrainFractional != 0, gcDrain self-preempts when // pollFractionalWorkerExit() returns true. This implies // gcDrainNoBlock. // -// If flags&gcDrainNoBlock != 0, gcDrain returns as soon as it is -// unable to get more work. Otherwise, it will block until all -// blocking calls are blocked in gcDrain. -// // If flags&gcDrainFlushBgCredit != 0, gcDrain flushes scan work // credit to gcController.bgScanCredit every gcCreditSlack units of // scan work. @@ -815,7 +731,6 @@ func gcDrain(gcw *gcWork, flags gcDrainFlags) { gp := getg().m.curg preemptible := flags&gcDrainUntilPreempt != 0 - blocking := flags&(gcDrainUntilPreempt|gcDrainIdle|gcDrainFractional|gcDrainNoBlock) == 0 flushBgCredit := flags&gcDrainFlushBgCredit != 0 idle := flags&gcDrainIdle != 0 @@ -859,17 +774,19 @@ func gcDrain(gcw *gcWork, flags gcDrainFlags) { gcw.balance() } - var b uintptr - if blocking { - b = gcw.get() - } else { - b = gcw.tryGetFast() + b := gcw.tryGetFast() + if b == 0 { + b = gcw.tryGet() if b == 0 { + // Flush the write barrier + // buffer; this may create + // more work. + wbBufFlush(nil, 0) b = gcw.tryGet() } } if b == 0 { - // work barrier reached or tryGet failed. + // Unable to get work. break } scanobject(b, gcw) @@ -895,10 +812,6 @@ func gcDrain(gcw *gcWork, flags gcDrainFlags) { } } - // In blocking mode, write barriers are not allowed after this - // point because we must preserve the condition that the work - // buffers are empty. - done: // Flush remaining scan work credit. if gcw.scanWork > 0 { @@ -947,6 +860,12 @@ func gcDrainN(gcw *gcWork, scanWork int64) int64 { b := gcw.tryGetFast() if b == 0 { b = gcw.tryGet() + if b == 0 { + // Flush the write barrier buffer; + // this may create more work. + wbBufFlush(nil, 0) + b = gcw.tryGet() + } } if b == 0 { @@ -1005,9 +924,9 @@ func scanblock(b0, n0 uintptr, ptrmask *uint8, gcw *gcWork) { for j := 0; j < 8 && i < n; j++ { if bits&1 != 0 { // Same work as in scanobject; see comments there. - obj := *(*uintptr)(unsafe.Pointer(b + i)) - if obj != 0 { - if obj, span, objIndex := findObject(obj, b, i, false); obj != 0 { + p := *(*uintptr)(unsafe.Pointer(b + i)) + if p != 0 { + if obj, span, objIndex := findObject(p, b, i, false); obj != 0 { greyobject(obj, b, i, span, gcw, objIndex, false) } } @@ -1163,7 +1082,7 @@ func scanstackblockwithmap(pc, b0, n0 uintptr, ptrmask *uint8, gcw *gcWork) { if obj != 0 { o, span, objIndex := findObject(obj, b, i, false) if obj < minPhysPageSize || - span != nil && span.state != _MSpanManual && + span != nil && span.state != mSpanManual && (obj < span.base() || obj >= span.limit || span.state != mSpanInUse) { print("runtime: found in object at *(", hex(b), "+", hex(i), ") = ", hex(obj), ", pc=", hex(pc), "\n") name, file, line := funcfileline(pc, -1) @@ -1190,11 +1109,6 @@ func shade(b uintptr) { if obj, span, objIndex := findObject(b, 0, 0, true); obj != 0 { gcw := &getg().m.p.ptr().gcw greyobject(obj, 0, 0, span, gcw, objIndex, true) - if gcphase == _GCmarktermination || gcBlackenPromptly { - // Ps aren't allowed to cache work during mark - // termination. - gcw.dispose() - } } } @@ -1262,8 +1176,14 @@ func greyobject(obj, base, off uintptr, span *mspan, gcw *gcWork, objIndex uintp if mbits.isMarked() { return } - // mbits.setMarked() // Avoid extra call overhead with manual inlining. - atomic.Or8(mbits.bytep, mbits.mask) + mbits.setMarked() + + // Mark span. + arena, pageIdx, pageMask := pageIndexOf(span.base()) + if arena.pageMarks[pageIdx]&pageMask == 0 { + atomic.Or8(&arena.pageMarks[pageIdx], pageMask) + } + // If this is a noscan object, fast-track it to black // instead of greying it. if span.spanclass.noscan() { @@ -1301,7 +1221,7 @@ func gcDumpObject(label string, obj, off uintptr) { skipped := false size := s.elemsize - if s.state == _MSpanManual && size == 0 { + if s.state == mSpanManual && size == 0 { // We're printing something from a stack frame. We // don't know how big it is, so just show up to an // including off. @@ -1338,18 +1258,13 @@ func gcDumpObject(label string, obj, off uintptr) { //go:nowritebarrier //go:nosplit func gcmarknewobject(obj, size, scanSize uintptr) { - if useCheckmark && !gcBlackenPromptly { // The world should be stopped so this should not happen. + if useCheckmark { // The world should be stopped so this should not happen. throw("gcmarknewobject called while doing checkmark") } markBitsForAddr(obj).setMarked() gcw := &getg().m.p.ptr().gcw gcw.bytesMarked += uint64(size) gcw.scanWork += int64(scanSize) - if gcBlackenPromptly { - // There shouldn't be anything in the work queue, but - // we still need to flush stats. - gcw.dispose() - } } // gcMarkTinyAllocs greys all active tiny alloc blocks. @@ -1364,9 +1279,6 @@ func gcMarkTinyAllocs() { _, span, objIndex := findObject(c.tiny, 0, 0, false) gcw := &p.gcw greyobject(c.tiny, 0, 0, span, gcw, objIndex, false) - if gcBlackenPromptly { - gcw.dispose() - } } } @@ -1397,7 +1309,7 @@ var useCheckmark = false func initCheckmarks() { useCheckmark = true for _, s := range mheap_.allspans { - if s.state == _MSpanInUse { + if s.state == mSpanInUse { heapBitsForAddr(s.base()).initCheckmarkSpan(s.layout()) } } @@ -1406,7 +1318,7 @@ func initCheckmarks() { func clearCheckmarks() { useCheckmark = false for _, s := range mheap_.allspans { - if s.state == _MSpanInUse { + if s.state == mSpanInUse { heapBitsForAddr(s.base()).clearCheckmarkSpan(s.layout()) } } diff --git a/libgo/go/runtime/mgcsweep.go b/libgo/go/runtime/mgcsweep.go index 39dd54ea2eb..dcaeb106dc4 100644 --- a/libgo/go/runtime/mgcsweep.go +++ b/libgo/go/runtime/mgcsweep.go @@ -4,6 +4,24 @@ // Garbage collector: sweeping +// The sweeper consists of two different algorithms: +// +// * The object reclaimer finds and frees unmarked slots in spans. It +// can free a whole span if none of the objects are marked, but that +// isn't its goal. This can be driven either synchronously by +// mcentral.cacheSpan for mcentral spans, or asynchronously by +// sweepone from the list of all in-use spans in mheap_.sweepSpans. +// +// * The span reclaimer looks for spans that contain no marked objects +// and frees whole spans. This is a separate algorithm because +// freeing whole spans is the hardest task for the object reclaimer, +// but is critical when allocating new spans. The entry point for +// this is mheap_.reclaim and it's driven by a sequential scan of +// the page marks bitmap in the heap arenas. +// +// Both algorithms ultimately call mspan.sweep, which sweeps a single +// heap span. + package runtime import ( @@ -54,7 +72,7 @@ func bgsweep(c chan int) { goparkunlock(&sweep.lock, waitReasonGCSweepWait, traceEvGoBlock, 1) for { - for gosweepone() != ^uintptr(0) { + for sweepone() != ^uintptr(0) { sweep.nbgsweep++ Gosched() } @@ -62,7 +80,7 @@ func bgsweep(c chan int) { Gosched() } lock(&sweep.lock) - if !gosweepdone() { + if !isSweepDone() { // This can happen if a GC runs between // gosweepone returning ^0 above // and the lock being acquired. @@ -74,9 +92,8 @@ func bgsweep(c chan int) { } } -// sweeps one span -// returns number of pages returned to heap, or ^uintptr(0) if there is nothing to sweep -//go:nowritebarrier +// sweepone sweeps some unswept heap span and returns the number of pages returned +// to the heap, or ^uintptr(0) if there was nothing to sweep. func sweepone() uintptr { _g_ := getg() sweepRatio := mheap_.sweepPagesPerByte // For debugging @@ -90,10 +107,11 @@ func sweepone() uintptr { } atomic.Xadd(&mheap_.sweepers, +1) - npages := ^uintptr(0) + // Find a span to sweep. + var s *mspan sg := mheap_.sweepgen for { - s := mheap_.sweepSpans[1-sg/2%2].pop() + s = mheap_.sweepSpans[1-sg/2%2].pop() if s == nil { atomic.Store(&mheap_.sweepdone, 1) break @@ -102,23 +120,32 @@ func sweepone() uintptr { // This can happen if direct sweeping already // swept this span, but in that case the sweep // generation should always be up-to-date. - if s.sweepgen != sg { + if !(s.sweepgen == sg || s.sweepgen == sg+3) { print("runtime: bad span s.state=", s.state, " s.sweepgen=", s.sweepgen, " sweepgen=", sg, "\n") throw("non in-use span in unswept list") } continue } - if s.sweepgen != sg-2 || !atomic.Cas(&s.sweepgen, sg-2, sg-1) { - continue + if s.sweepgen == sg-2 && atomic.Cas(&s.sweepgen, sg-2, sg-1) { + break } + } + + // Sweep the span we found. + npages := ^uintptr(0) + if s != nil { npages = s.npages - if !s.sweep(false) { + if s.sweep(false) { + // Whole span was freed. Count it toward the + // page reclaimer credit since these pages can + // now be used for span allocation. + atomic.Xadduintptr(&mheap_.reclaimCredit, npages) + } else { // Span is still in-use, so this returned no // pages to the heap and the span needs to // move to the swept in-use list. npages = 0 } - break } // Decrement the number of active sweepers and if this is the @@ -132,17 +159,13 @@ func sweepone() uintptr { return npages } -//go:nowritebarrier -func gosweepone() uintptr { - var ret uintptr - systemstack(func() { - ret = sweepone() - }) - return ret -} - -//go:nowritebarrier -func gosweepdone() bool { +// isSweepDone reports whether all spans are swept or currently being swept. +// +// Note that this condition may transition from false to true at any +// time as the sweeper runs. It may transition from true to false if a +// GC runs; to prevent that the caller must be non-preemptible or must +// somehow block GC progress. +func isSweepDone() bool { return mheap_.sweepdone != 0 } @@ -154,20 +177,25 @@ func (s *mspan) ensureSwept() { // (if GC is triggered on another goroutine). _g_ := getg() if _g_.m.locks == 0 && _g_.m.mallocing == 0 && _g_ != _g_.m.g0 { - throw("MSpan_EnsureSwept: m is not locked") + throw("mspan.ensureSwept: m is not locked") } sg := mheap_.sweepgen - if atomic.Load(&s.sweepgen) == sg { + spangen := atomic.Load(&s.sweepgen) + if spangen == sg || spangen == sg+3 { return } - // The caller must be sure that the span is a MSpanInUse span. + // The caller must be sure that the span is a mSpanInUse span. if atomic.Cas(&s.sweepgen, sg-2, sg-1) { s.sweep(false) return } // unfortunate condition, and we don't have efficient means to wait - for atomic.Load(&s.sweepgen) != sg { + for { + spangen := atomic.Load(&s.sweepgen) + if spangen == sg || spangen == sg+3 { + break + } osyield() } } @@ -175,7 +203,7 @@ func (s *mspan) ensureSwept() { // Sweep frees or collects finalizers for blocks not marked in the mark phase. // It clears the mark bits in preparation for the next GC round. // Returns true if the span was returned to heap. -// If preserve=true, don't return it to heap nor relink in MCentral lists; +// If preserve=true, don't return it to heap nor relink in mcentral lists; // caller takes care of it. //TODO go:nowritebarrier func (s *mspan) sweep(preserve bool) bool { @@ -183,12 +211,12 @@ func (s *mspan) sweep(preserve bool) bool { // GC must not start while we are in the middle of this function. _g_ := getg() if _g_.m.locks == 0 && _g_.m.mallocing == 0 && _g_ != _g_.m.g0 { - throw("MSpan_Sweep: m is not locked") + throw("mspan.sweep: m is not locked") } sweepgen := mheap_.sweepgen if s.state != mSpanInUse || s.sweepgen != sweepgen-1 { - print("MSpan_Sweep: state=", s.state, " sweepgen=", s.sweepgen, " mheap.sweepgen=", sweepgen, "\n") - throw("MSpan_Sweep: bad span state") + print("mspan.sweep: state=", s.state, " sweepgen=", s.sweepgen, " mheap.sweepgen=", sweepgen, "\n") + throw("mspan.sweep: bad span state") } if trace.enabled { @@ -345,8 +373,8 @@ func (s *mspan) sweep(preserve bool) bool { // The span must be in our exclusive ownership until we update sweepgen, // check for potential races. if s.state != mSpanInUse || s.sweepgen != sweepgen-1 { - print("MSpan_Sweep: state=", s.state, " sweepgen=", s.sweepgen, " mheap.sweepgen=", sweepgen, "\n") - throw("MSpan_Sweep: bad span state after sweep") + print("mspan.sweep: state=", s.state, " sweepgen=", s.sweepgen, " mheap.sweepgen=", sweepgen, "\n") + throw("mspan.sweep: bad span state after sweep") } // Serialization point. // At this point the mark bits are cleared and allocation ready @@ -360,29 +388,29 @@ func (s *mspan) sweep(preserve bool) bool { if nfreedSigned > 0 && spc.sizeclass() != 0 { res = mheap_.central[spc].mcentral.freeSpan(s, preserve, wasempty) - // MCentral_FreeSpan updates sweepgen + // mcentral.freeSpan updates sweepgen } else if freeToHeap { // Free large span to heap // NOTE(rsc,dvyukov): The original implementation of efence - // in CL 22060046 used SysFree instead of SysFault, so that + // in CL 22060046 used sysFree instead of sysFault, so that // the operating system would eventually give the memory // back to us again, so that an efence program could run // longer without running out of memory. Unfortunately, - // calling SysFree here without any kind of adjustment of the + // calling sysFree here without any kind of adjustment of the // heap data structures means that when the memory does // come back to us, we have the wrong metadata for it, either in - // the MSpan structures or in the garbage collection bitmap. - // Using SysFault here means that the program will run out of + // the mspan structures or in the garbage collection bitmap. + // Using sysFault here means that the program will run out of // memory fairly quickly in efence mode, but at least it won't // have mysterious crashes due to confused memory reuse. - // It should be possible to switch back to SysFree if we also - // implement and then call some kind of MHeap_DeleteSpan. + // It should be possible to switch back to sysFree if we also + // implement and then call some kind of mheap.deleteSpan. if debug.efence > 0 { s.limit = 0 // prevent mlookup from finding this span sysFault(unsafe.Pointer(s.base()), size) } else { - mheap_.freeSpan(s, 1) + mheap_.freeSpan(s, true) } c.local_nlargefree++ c.local_largefree += size @@ -430,7 +458,7 @@ retry: newHeapLive := uintptr(atomic.Load64(&memstats.heap_live)-mheap_.sweepHeapLiveBasis) + spanBytes pagesTarget := int64(mheap_.sweepPagesPerByte*float64(newHeapLive)) - int64(callerSweepPages) for pagesTarget > int64(atomic.Load64(&mheap_.pagesSwept)-sweptBasis) { - if gosweepone() == ^uintptr(0) { + if sweepone() == ^uintptr(0) { mheap_.sweepPagesPerByte = 0 break } diff --git a/libgo/go/runtime/mgcsweepbuf.go b/libgo/go/runtime/mgcsweepbuf.go index 6c1118e3857..0491f7ccf6c 100644 --- a/libgo/go/runtime/mgcsweepbuf.go +++ b/libgo/go/runtime/mgcsweepbuf.go @@ -5,6 +5,7 @@ package runtime import ( + "internal/cpu" "runtime/internal/atomic" "runtime/internal/sys" "unsafe" @@ -83,7 +84,7 @@ retry: if newCap == 0 { newCap = gcSweepBufInitSpineCap } - newSpine := persistentalloc(newCap*sys.PtrSize, sys.CacheLineSize, &memstats.gc_sys) + newSpine := persistentalloc(newCap*sys.PtrSize, cpu.CacheLineSize, &memstats.gc_sys) if b.spineCap != 0 { // Blocks are allocated off-heap, so // no write barriers. @@ -102,7 +103,7 @@ retry: } // Allocate a new block and add it to the spine. - block = (*gcSweepBlock)(persistentalloc(unsafe.Sizeof(gcSweepBlock{}), sys.CacheLineSize, &memstats.gc_sys)) + block = (*gcSweepBlock)(persistentalloc(unsafe.Sizeof(gcSweepBlock{}), cpu.CacheLineSize, &memstats.gc_sys)) blockp := add(b.spine, sys.PtrSize*top) // Blocks are allocated off-heap, so no write barrier. atomic.StorepNoWB(blockp, unsafe.Pointer(block)) diff --git a/libgo/go/runtime/mgcwork.go b/libgo/go/runtime/mgcwork.go index 99771e2e57f..89d1e0e38e9 100644 --- a/libgo/go/runtime/mgcwork.go +++ b/libgo/go/runtime/mgcwork.go @@ -22,6 +22,13 @@ const ( workbufAlloc = 32 << 10 ) +// throwOnGCWork causes any operations that add pointers to a gcWork +// buffer to throw. +// +// TODO(austin): This is a temporary debugging measure for issue +// #27993. To be removed before release. +var throwOnGCWork bool + func init() { if workbufAlloc%pageSize != 0 || workbufAlloc%_WorkbufSize != 0 { throw("bad workbufAlloc") @@ -46,10 +53,7 @@ func init() { // // (preemption must be disabled) // gcw := &getg().m.p.ptr().gcw -// .. call gcw.put() to produce and gcw.get() to consume .. -// if gcBlackenPromptly { -// gcw.dispose() -// } +// .. call gcw.put() to produce and gcw.tryGet() to consume .. // // It's important that any use of gcWork during the mark phase prevent // the garbage collector from transitioning to mark termination since @@ -83,6 +87,23 @@ type gcWork struct { // Scan work performed on this gcWork. This is aggregated into // gcController by dispose and may also be flushed by callers. scanWork int64 + + // flushedWork indicates that a non-empty work buffer was + // flushed to the global work list since the last gcMarkDone + // termination check. Specifically, this indicates that this + // gcWork may have communicated work to another gcWork. + flushedWork bool + + // pauseGen causes put operations to spin while pauseGen == + // gcWorkPauseGen if debugCachedWork is true. + pauseGen uint32 + + // putGen is the pauseGen of the last putGen. + putGen uint32 + + // pauseStack is the stack at which this P was paused if + // debugCachedWork is true. + pauseStack [16]location } // Most of the methods of gcWork are go:nowritebarrierrec because the @@ -101,10 +122,59 @@ func (w *gcWork) init() { w.wbuf2 = wbuf2 } +func (w *gcWork) checkPut(ptr uintptr, ptrs []uintptr) { + if debugCachedWork { + alreadyFailed := w.putGen == w.pauseGen + w.putGen = w.pauseGen + if m := getg().m; m.locks > 0 || m.mallocing != 0 || m.preemptoff != "" || m.p.ptr().status != _Prunning { + // If we were to spin, the runtime may + // deadlock: the condition above prevents + // preemption (see newstack), which could + // prevent gcMarkDone from finishing the + // ragged barrier and releasing the spin. + return + } + for atomic.Load(&gcWorkPauseGen) == w.pauseGen { + } + if throwOnGCWork { + printlock() + if alreadyFailed { + println("runtime: checkPut already failed at this generation") + } + println("runtime: late gcWork put") + if ptr != 0 { + gcDumpObject("ptr", ptr, ^uintptr(0)) + } + for _, ptr := range ptrs { + gcDumpObject("ptrs", ptr, ^uintptr(0)) + } + println("runtime: paused at") + for _, loc := range w.pauseStack { + if loc.pc == 0 { + break + } + if loc.function != "" { + // Obviously this doesn't + // relate to ancestor + // tracebacks, but this + // function prints what we + // want. + printAncestorTracebackFuncInfo(loc.function, loc.filename, loc.lineno, loc.pc) + } else { + println("\tunknown PC ", hex(loc.pc), "\n") + } + } + throw("throwOnGCWork") + } + } +} + // put enqueues a pointer for the garbage collector to trace. // obj must point to the beginning of a heap object or an oblet. //go:nowritebarrierrec func (w *gcWork) put(obj uintptr) { + w.checkPut(obj, nil) + flushed := false wbuf := w.wbuf1 if wbuf == nil { @@ -116,6 +186,7 @@ func (w *gcWork) put(obj uintptr) { wbuf = w.wbuf1 if wbuf.nobj == len(wbuf.obj) { putfull(wbuf) + w.flushedWork = true wbuf = getempty() w.wbuf1 = wbuf flushed = true @@ -134,10 +205,12 @@ func (w *gcWork) put(obj uintptr) { } } -// putFast does a put and returns true if it can be done quickly +// putFast does a put and reports whether it can be done quickly // otherwise it returns false and the caller needs to call put. //go:nowritebarrierrec func (w *gcWork) putFast(obj uintptr) bool { + w.checkPut(obj, nil) + wbuf := w.wbuf1 if wbuf == nil { return false @@ -159,6 +232,8 @@ func (w *gcWork) putBatch(obj []uintptr) { return } + w.checkPut(0, obj) + flushed := false wbuf := w.wbuf1 if wbuf == nil { @@ -169,6 +244,7 @@ func (w *gcWork) putBatch(obj []uintptr) { for len(obj) > 0 { for wbuf.nobj == len(wbuf.obj) { putfull(wbuf) + w.flushedWork = true w.wbuf1, w.wbuf2 = w.wbuf2, getempty() wbuf = w.wbuf1 flushed = true @@ -231,37 +307,6 @@ func (w *gcWork) tryGetFast() uintptr { return wbuf.obj[wbuf.nobj] } -// get dequeues a pointer for the garbage collector to trace, blocking -// if necessary to ensure all pointers from all queues and caches have -// been retrieved. get returns 0 if there are no pointers remaining. -//go:nowritebarrierrec -func (w *gcWork) get() uintptr { - wbuf := w.wbuf1 - if wbuf == nil { - w.init() - wbuf = w.wbuf1 - // wbuf is empty at this point. - } - if wbuf.nobj == 0 { - w.wbuf1, w.wbuf2 = w.wbuf2, w.wbuf1 - wbuf = w.wbuf1 - if wbuf.nobj == 0 { - owbuf := wbuf - wbuf = getfull() - if wbuf == nil { - return 0 - } - putempty(owbuf) - w.wbuf1 = wbuf - } - } - - // TODO: This might be a good place to add prefetch code - - wbuf.nobj-- - return wbuf.obj[wbuf.nobj] -} - // dispose returns any cached pointers to the global queue. // The buffers are being put on the full queue so that the // write barriers will not simply reacquire them before the @@ -275,6 +320,7 @@ func (w *gcWork) dispose() { putempty(wbuf) } else { putfull(wbuf) + w.flushedWork = true } w.wbuf1 = nil @@ -283,6 +329,7 @@ func (w *gcWork) dispose() { putempty(wbuf) } else { putfull(wbuf) + w.flushedWork = true } w.wbuf2 = nil } @@ -308,10 +355,14 @@ func (w *gcWork) balance() { return } if wbuf := w.wbuf2; wbuf.nobj != 0 { + w.checkPut(0, wbuf.obj[:wbuf.nobj]) putfull(wbuf) + w.flushedWork = true w.wbuf2 = getempty() } else if wbuf := w.wbuf1; wbuf.nobj > 4 { + w.checkPut(0, wbuf.obj[:wbuf.nobj]) w.wbuf1 = handoff(wbuf) + w.flushedWork = true // handoff did putfull } else { return } @@ -321,7 +372,7 @@ func (w *gcWork) balance() { } } -// empty returns true if w has no mark work available. +// empty reports whether w has no mark work available. //go:nowritebarrierrec func (w *gcWork) empty() bool { return w.wbuf1 == nil || (w.wbuf1.nobj == 0 && w.wbuf2.nobj == 0) @@ -440,61 +491,6 @@ func trygetfull() *workbuf { return b } -// Get a full work buffer off the work.full list. -// If nothing is available wait until all the other gc helpers have -// finished and then return nil. -// getfull acts as a barrier for work.nproc helpers. As long as one -// gchelper is actively marking objects it -// may create a workbuffer that the other helpers can work on. -// The for loop either exits when a work buffer is found -// or when _all_ of the work.nproc GC helpers are in the loop -// looking for work and thus not capable of creating new work. -// This is in fact the termination condition for the STW mark -// phase. -//go:nowritebarrier -func getfull() *workbuf { - b := (*workbuf)(work.full.pop()) - if b != nil { - b.checknonempty() - return b - } - - incnwait := atomic.Xadd(&work.nwait, +1) - if incnwait > work.nproc { - println("runtime: work.nwait=", incnwait, "work.nproc=", work.nproc) - throw("work.nwait > work.nproc") - } - for i := 0; ; i++ { - if work.full != 0 { - decnwait := atomic.Xadd(&work.nwait, -1) - if decnwait == work.nproc { - println("runtime: work.nwait=", decnwait, "work.nproc=", work.nproc) - throw("work.nwait > work.nproc") - } - b = (*workbuf)(work.full.pop()) - if b != nil { - b.checknonempty() - return b - } - incnwait := atomic.Xadd(&work.nwait, +1) - if incnwait > work.nproc { - println("runtime: work.nwait=", incnwait, "work.nproc=", work.nproc) - throw("work.nwait > work.nproc") - } - } - if work.nwait == work.nproc && work.markrootNext >= work.markrootJobs { - return nil - } - if i < 10 { - procyield(20) - } else if i < 20 { - osyield() - } else { - usleep(100) - } - } -} - //go:nowritebarrier func handoff(b *workbuf) *workbuf { // Make new buffer with half of b's pointers. diff --git a/libgo/go/runtime/mheap.go b/libgo/go/runtime/mheap.go index eb98083fecc..7139b0e94e7 100644 --- a/libgo/go/runtime/mheap.go +++ b/libgo/go/runtime/mheap.go @@ -9,6 +9,7 @@ package runtime import ( + "internal/cpu" "runtime/internal/atomic" "runtime/internal/sys" "unsafe" @@ -20,7 +21,7 @@ import ( const minPhysPageSize = 4096 // Main malloc heap. -// The heap itself is the "free[]" and "large" arrays, +// The heap itself is the "free" and "scav" treaps, // but all the other global data is here too. // // mheap must not be heap-allocated because it contains mSpanLists, @@ -29,13 +30,11 @@ const minPhysPageSize = 4096 //go:notinheap type mheap struct { lock mutex - free [_MaxMHeapList]mSpanList // free lists of given length up to _MaxMHeapList - freelarge mTreap // free treap of length >= _MaxMHeapList - busy [_MaxMHeapList]mSpanList // busy lists of large spans of given length - busylarge mSpanList // busy lists of large spans length >= _MaxMHeapList - sweepgen uint32 // sweep generation, see comment in mspan - sweepdone uint32 // all spans are swept - sweepers uint32 // number of active sweepone calls + free mTreap // free and non-scavenged spans + scav mTreap // free and scavenged spans + sweepgen uint32 // sweep generation, see comment in mspan + sweepdone uint32 // all spans are swept + sweepers uint32 // number of active sweepone calls // allspans is a slice of all mspans ever created. Each mspan // appears exactly once. @@ -61,7 +60,7 @@ type mheap struct { // on the swept stack. sweepSpans [2]gcSweepBuf - //_ uint32 // align uint64 fields on 32-bit for atomics + _ uint32 // align uint64 fields on 32-bit for atomics // Proportional sweep // @@ -81,7 +80,7 @@ type mheap struct { // accounting for current progress. If we could only adjust // the slope, it would create a discontinuity in debt if any // progress has already been made. - pagesInUse uint64 // pages of spans in stats _MSpanInUse; R/W with mheap.lock + pagesInUse uint64 // pages of spans in stats mSpanInUse; R/W with mheap.lock pagesSwept uint64 // pages swept this cycle; updated atomically pagesSweptBasis uint64 // pagesSwept to use as the origin of the sweep ratio; updated atomically sweepHeapLiveBasis uint64 // value of heap_live to use as the origin of sweep ratio; written with lock, read without @@ -89,6 +88,25 @@ type mheap struct { // TODO(austin): pagesInUse should be a uintptr, but the 386 // compiler can't 8-byte align fields. + // Page reclaimer state + + // reclaimIndex is the page index in allArenas of next page to + // reclaim. Specifically, it refers to page (i % + // pagesPerArena) of arena allArenas[i / pagesPerArena]. + // + // If this is >= 1<<63, the page reclaimer is done scanning + // the page marks. + // + // This is accessed atomically. + reclaimIndex uint64 + // reclaimCredit is spare credit for extra pages swept. Since + // the page reclaimer works in large chunks, it may reclaim + // more than requested. Any spare pages released go to this + // credit pool. + // + // This is accessed atomically. + reclaimCredit uintptr + // Malloc stats. largealloc uint64 // bytes allocated for large objects nlargealloc uint64 // number of large object allocations @@ -133,21 +151,35 @@ type mheap struct { // (the actual arenas). This is only used on 32-bit. arena linearAlloc - //_ uint32 // ensure 64-bit alignment of central + // allArenas is the arenaIndex of every mapped arena. This can + // be used to iterate through the address space. + // + // Access is protected by mheap_.lock. However, since this is + // append-only and old backing arrays are never freed, it is + // safe to acquire mheap_.lock, copy the slice header, and + // then release mheap_.lock. + allArenas []arenaIdx + + // sweepArenas is a snapshot of allArenas taken at the + // beginning of the sweep cycle. This can be read safely by + // simply blocking GC (by disabling preemption). + sweepArenas []arenaIdx + + _ uint32 // ensure 64-bit alignment of central // central free lists for small size classes. - // the padding makes sure that the MCentrals are - // spaced CacheLineSize bytes apart, so that each MCentral.lock + // the padding makes sure that the mcentrals are + // spaced CacheLinePadSize bytes apart, so that each mcentral.lock // gets its own cache line. // central is indexed by spanClass. central [numSpanClasses]struct { mcentral mcentral - pad [sys.CacheLineSize - unsafe.Sizeof(mcentral{})%sys.CacheLineSize]byte + pad [cpu.CacheLinePadSize - unsafe.Sizeof(mcentral{})%cpu.CacheLinePadSize]byte } spanalloc fixalloc // allocator for span* cachealloc fixalloc // allocator for mcache* - treapalloc fixalloc // allocator for treapNodes* used by large objects + treapalloc fixalloc // allocator for treapNodes* specialfinalizeralloc fixalloc // allocator for specialfinalizer* specialprofilealloc fixalloc // allocator for specialprofile* speciallock mutex // lock for special record allocators. @@ -184,6 +216,29 @@ type heapArena struct { // must not be a safe-point between establishing that an // address is live and looking it up in the spans array. spans [pagesPerArena]*mspan + + // pageInUse is a bitmap that indicates which spans are in + // state mSpanInUse. This bitmap is indexed by page number, + // but only the bit corresponding to the first page in each + // span is used. + // + // Writes are protected by mheap_.lock. + pageInUse [pagesPerArena / 8]uint8 + + // pageMarks is a bitmap that indicates which spans have any + // marked objects on them. Like pageInUse, only the bit + // corresponding to the first page in each span is used. + // + // Writes are done atomically during marking. Reads are + // non-atomic and lock-free since they only occur during + // sweeping (and hence never race with writes). + // + // This is used to quickly find whole spans that can be freed. + // + // TODO(austin): It would be nice if this was uint64 for + // faster scanning, but we don't have 64-bit atomic bit + // operations. + pageMarks [pagesPerArena / 8]uint8 } // arenaHint is a hint for where to grow the heap arenas. See @@ -196,20 +251,21 @@ type arenaHint struct { next *arenaHint } -// An MSpan is a run of pages. +// An mspan is a run of pages. // -// When a MSpan is in the heap free list, state == MSpanFree +// When a mspan is in the heap free treap, state == mSpanFree // and heapmap(s->start) == span, heapmap(s->start+s->npages-1) == span. +// If the mspan is in the heap scav treap, then in addition to the +// above scavenged == true. scavenged == false in all other cases. // -// When a MSpan is allocated, state == MSpanInUse or MSpanManual +// When a mspan is allocated, state == mSpanInUse or mSpanManual // and heapmap(i) == span for all s->start <= i < s->start+s->npages. -// Every MSpan is in one doubly-linked list, -// either one of the MHeap's free lists or one of the -// MCentral's span lists. +// Every mspan is in one doubly-linked list, either in the mheap's +// busy list or one of the mcentral's span lists. -// An MSpan representing actual memory has state _MSpanInUse, -// _MSpanManual, or _MSpanFree. Transitions between these states are +// An mspan representing actual memory has state mSpanInUse, +// mSpanManual, or mSpanFree. Transitions between these states are // constrained as follows: // // * A span may transition from free to in-use or manual during any GC @@ -225,19 +281,19 @@ type arenaHint struct { type mSpanState uint8 const ( - _MSpanDead mSpanState = iota - _MSpanInUse // allocated for garbage collected heap - _MSpanManual // allocated for manual management (e.g., stack allocator) - _MSpanFree + mSpanDead mSpanState = iota + mSpanInUse // allocated for garbage collected heap + mSpanManual // allocated for manual management (e.g., stack allocator) + mSpanFree ) // mSpanStateNames are the names of the span states, indexed by // mSpanState. var mSpanStateNames = []string{ - "_MSpanDead", - "_MSpanInUse", - "_MSpanManual", - "_MSpanFree", + "mSpanDead", + "mSpanInUse", + "mSpanManual", + "mSpanFree", } // mSpanList heads a linked list of spans. @@ -257,7 +313,7 @@ type mspan struct { startAddr uintptr // address of first byte of span aka s.base() npages uintptr // number of pages in span - manualFreeList gclinkptr // list of free objects in _MSpanManual spans + manualFreeList gclinkptr // list of free objects in mSpanManual spans // freeindex is the slot index between 0 and nelems at which to begin scanning // for the next free object in this span. @@ -316,6 +372,8 @@ type mspan struct { // if sweepgen == h->sweepgen - 2, the span needs sweeping // if sweepgen == h->sweepgen - 1, the span is currently being swept // if sweepgen == h->sweepgen, the span is swept and ready to use + // if sweepgen == h->sweepgen + 1, the span was cached before sweep began and is still cached, and needs sweeping + // if sweepgen == h->sweepgen + 3, the span was swept and then cached and is still cached // h->sweepgen is incremented by 2 after every GC sweepgen uint32 @@ -323,14 +381,13 @@ type mspan struct { baseMask uint16 // if non-0, elemsize is a power of 2, & this will get object allocation base allocCount uint16 // number of allocated objects spanclass spanClass // size class and noscan (uint8) - incache bool // being used by an mcache state mSpanState // mspaninuse etc needzero uint8 // needs to be zeroed before allocation divShift uint8 // for divide by elemsize - divMagic.shift divShift2 uint8 // for divide by elemsize - divMagic.shift2 + scavenged bool // whether this span has had its pages released to the OS elemsize uintptr // computed from sizeclass or from npages unusedsince int64 // first time spotted by gc in mspanfree state - npreleased uintptr // number of pages released to the os limit uintptr // end of data in span speciallock mutex // guards specials list specials *special // linked list of special records sorted by offset. @@ -349,6 +406,45 @@ func (s *mspan) layout() (size, n, total uintptr) { return } +// physPageBounds returns the start and end of the span +// rounded in to the physical page size. +func (s *mspan) physPageBounds() (uintptr, uintptr) { + start := s.base() + end := start + s.npages<<_PageShift + if physPageSize > _PageSize { + // Round start and end in. + start = (start + physPageSize - 1) &^ (physPageSize - 1) + end &^= physPageSize - 1 + } + return start, end +} + +func (s *mspan) scavenge() uintptr { + // start and end must be rounded in, otherwise madvise + // will round them *out* and release more memory + // than we want. + start, end := s.physPageBounds() + if end <= start { + // start and end don't span a whole physical page. + return 0 + } + released := end - start + memstats.heap_released += uint64(released) + s.scavenged = true + sysUnused(unsafe.Pointer(start), released) + return released +} + +// released returns the number of bytes in this span +// which were returned back to the OS. +func (s *mspan) released() uintptr { + if !s.scavenged { + return 0 + } + start, end := s.physPageBounds() + return end - start +} + // recordspan adds a newly allocated span to h.allspans. // // This only happens the first time a span is allocated from @@ -457,7 +553,7 @@ func (i arenaIdx) l2() uint { } // inheap reports whether b is a pointer into a (potentially dead) heap object. -// It returns false for pointers into _MSpanManual spans. +// It returns false for pointers into mSpanManual spans. // Non-preemptible because it is used by write barriers. //go:nowritebarrier //go:nosplit @@ -476,7 +572,7 @@ func inHeapOrStack(b uintptr) bool { return false } switch s.state { - case mSpanInUse, _MSpanManual: + case mSpanInUse, mSpanManual: return b < s.limit default: return false @@ -550,6 +646,16 @@ func spanOfHeap(p uintptr) *mspan { return s } +// pageIndexOf returns the arena, page index, and page mask for pointer p. +// The caller must ensure p is in the heap. +func pageIndexOf(p uintptr) (arena *heapArena, pageIdx uintptr, pageMask uint8) { + ai := arenaIndex(p) + arena = mheap_.arenas[ai.l1()][ai.l2()] + pageIdx = ((p / pageSize) / 8) % uintptr(len(arena.pageInUse)) + pageMask = byte(1 << ((p / pageSize) % 8)) + return +} + // Initialize the heap. func (h *mheap) init() { h.treapalloc.init(unsafe.Sizeof(treapNode{}), nil, nil, &memstats.other_sys) @@ -569,117 +675,182 @@ func (h *mheap) init() { h.spanalloc.zero = false // h->mapcache needs no init - for i := range h.free { - h.free[i].init() - h.busy[i].init() - } - h.busylarge.init() for i := range h.central { h.central[i].mcentral.init(spanClass(i)) } } -// Sweeps spans in list until reclaims at least npages into heap. -// Returns the actual number of pages reclaimed. -func (h *mheap) reclaimList(list *mSpanList, npages uintptr) uintptr { - n := uintptr(0) - sg := mheap_.sweepgen -retry: - for s := list.first; s != nil; s = s.next { - if s.sweepgen == sg-2 && atomic.Cas(&s.sweepgen, sg-2, sg-1) { - list.remove(s) - // swept spans are at the end of the list - list.insertBack(s) // Puts it back on a busy list. s is not in the treap at this point. - unlock(&h.lock) - snpages := s.npages - if s.sweep(false) { - n += snpages +// reclaim sweeps and reclaims at least npage pages into the heap. +// It is called before allocating npage pages to keep growth in check. +// +// reclaim implements the page-reclaimer half of the sweeper. +// +// h must NOT be locked. +func (h *mheap) reclaim(npage uintptr) { + // This scans pagesPerChunk at a time. Higher values reduce + // contention on h.reclaimPos, but increase the minimum + // latency of performing a reclaim. + // + // Must be a multiple of the pageInUse bitmap element size. + // + // The time required by this can vary a lot depending on how + // many spans are actually freed. Experimentally, it can scan + // for pages at ~300 GB/ms on a 2.6GHz Core i7, but can only + // free spans at ~32 MB/ms. Using 512 pages bounds this at + // roughly 100µs. + // + // TODO(austin): Half of the time spent freeing spans is in + // locking/unlocking the heap (even with low contention). We + // could make the slow path here several times faster by + // batching heap frees. + const pagesPerChunk = 512 + + // Bail early if there's no more reclaim work. + if atomic.Load64(&h.reclaimIndex) >= 1<<63 { + return + } + + // Disable preemption so the GC can't start while we're + // sweeping, so we can read h.sweepArenas, and so + // traceGCSweepStart/Done pair on the P. + mp := acquirem() + + if trace.enabled { + traceGCSweepStart() + } + + arenas := h.sweepArenas + locked := false + for npage > 0 { + // Pull from accumulated credit first. + if credit := atomic.Loaduintptr(&h.reclaimCredit); credit > 0 { + take := credit + if take > npage { + // Take only what we need. + take = npage } - lock(&h.lock) - if n >= npages { - return n + if atomic.Casuintptr(&h.reclaimCredit, credit, credit-take) { + npage -= take } - // the span could have been moved elsewhere - goto retry - } - if s.sweepgen == sg-1 { - // the span is being swept by background sweeper, skip continue } - // already swept empty span, - // all subsequent ones must also be either swept or in process of sweeping - break - } - return n -} -// Sweeps and reclaims at least npage pages into heap. -// Called before allocating npage pages. -func (h *mheap) reclaim(npage uintptr) { - // First try to sweep busy spans with large objects of size >= npage, - // this has good chances of reclaiming the necessary space. - for i := int(npage); i < len(h.busy); i++ { - if h.reclaimList(&h.busy[i], npage) != 0 { - return // Bingo! + // Claim a chunk of work. + idx := uintptr(atomic.Xadd64(&h.reclaimIndex, pagesPerChunk) - pagesPerChunk) + if idx/pagesPerArena >= uintptr(len(arenas)) { + // Page reclaiming is done. + atomic.Store64(&h.reclaimIndex, 1<<63) + break } - } - // Then -- even larger objects. - if h.reclaimList(&h.busylarge, npage) != 0 { - return // Bingo! - } + if !locked { + // Lock the heap for reclaimChunk. + lock(&h.lock) + locked = true + } - // Now try smaller objects. - // One such object is not enough, so we need to reclaim several of them. - reclaimed := uintptr(0) - for i := 0; i < int(npage) && i < len(h.busy); i++ { - reclaimed += h.reclaimList(&h.busy[i], npage-reclaimed) - if reclaimed >= npage { - return + // Scan this chunk. + nfound := h.reclaimChunk(arenas, idx, pagesPerChunk) + if nfound <= npage { + npage -= nfound + } else { + // Put spare pages toward global credit. + atomic.Xadduintptr(&h.reclaimCredit, nfound-npage) + npage = 0 } } + if locked { + unlock(&h.lock) + } - // Now sweep everything that is not yet swept. - unlock(&h.lock) - for { - n := sweepone() - if n == ^uintptr(0) { // all spans are swept - break + if trace.enabled { + traceGCSweepDone() + } + releasem(mp) +} + +// reclaimChunk sweeps unmarked spans that start at page indexes [pageIdx, pageIdx+n). +// It returns the number of pages returned to the heap. +// +// h.lock must be held and the caller must be non-preemptible. +func (h *mheap) reclaimChunk(arenas []arenaIdx, pageIdx, n uintptr) uintptr { + // The heap lock must be held because this accesses the + // heapArena.spans arrays using potentially non-live pointers. + // In particular, if a span were freed and merged concurrently + // with this probing heapArena.spans, it would be possible to + // observe arbitrary, stale span pointers. + n0 := n + var nFreed uintptr + sg := h.sweepgen + for n > 0 { + ai := arenas[pageIdx/pagesPerArena] + ha := h.arenas[ai.l1()][ai.l2()] + + // Get a chunk of the bitmap to work on. + arenaPage := uint(pageIdx % pagesPerArena) + inUse := ha.pageInUse[arenaPage/8:] + marked := ha.pageMarks[arenaPage/8:] + if uintptr(len(inUse)) > n/8 { + inUse = inUse[:n/8] + marked = marked[:n/8] } - reclaimed += n - if reclaimed >= npage { - break + + // Scan this bitmap chunk for spans that are in-use + // but have no marked objects on them. + for i := range inUse { + inUseUnmarked := inUse[i] &^ marked[i] + if inUseUnmarked == 0 { + continue + } + + for j := uint(0); j < 8; j++ { + if inUseUnmarked&(1<<j) != 0 { + s := ha.spans[arenaPage+uint(i)*8+j] + if atomic.Load(&s.sweepgen) == sg-2 && atomic.Cas(&s.sweepgen, sg-2, sg-1) { + npages := s.npages + unlock(&h.lock) + if s.sweep(false) { + nFreed += npages + } + lock(&h.lock) + // Reload inUse. It's possible nearby + // spans were freed when we dropped the + // lock and we don't want to get stale + // pointers from the spans array. + inUseUnmarked = inUse[i] &^ marked[i] + } + } + } } + + // Advance. + pageIdx += uintptr(len(inUse) * 8) + n -= uintptr(len(inUse) * 8) } - lock(&h.lock) + if trace.enabled { + // Account for pages scanned but not reclaimed. + traceGCSweepSpan((n0 - nFreed) * pageSize) + } + return nFreed } -// Allocate a new span of npage pages from the heap for GC'd memory -// and record its size class in the HeapMap and HeapMapCache. +// alloc_m is the internal implementation of mheap.alloc. +// +// alloc_m must run on the system stack because it locks the heap, so +// any stack growth during alloc_m would self-deadlock. +// +//go:systemstack func (h *mheap) alloc_m(npage uintptr, spanclass spanClass, large bool) *mspan { _g_ := getg() - lock(&h.lock) // To prevent excessive heap growth, before allocating n pages // we need to sweep and reclaim at least n pages. if h.sweepdone == 0 { - // TODO(austin): This tends to sweep a large number of - // spans in order to find a few completely free spans - // (for example, in the garbage benchmark, this sweeps - // ~30x the number of pages its trying to allocate). - // If GC kept a bit for whether there were any marks - // in a span, we could release these free spans - // at the end of GC and eliminate this entirely. - if trace.enabled { - traceGCSweepStart() - } h.reclaim(npage) - if trace.enabled { - traceGCSweepDone() - } } + lock(&h.lock) // transfer stats from cache to global memstats.heap_scan += uint64(_g_.m.mcache.local_scan) _g_.m.mcache.local_scan = 0 @@ -692,7 +863,7 @@ func (h *mheap) alloc_m(npage uintptr, spanclass spanClass, large bool) *mspan { // able to map interior pointer to containing span. atomic.Store(&s.sweepgen, h.sweepgen) h.sweepSpans[h.sweepgen/2%2].push(s) // Add to swept in-use list. - s.state = _MSpanInUse + s.state = mSpanInUse s.allocCount = 0 s.spanclass = spanclass if sizeclass := spanclass.sizeclass(); sizeclass == 0 { @@ -710,6 +881,10 @@ func (h *mheap) alloc_m(npage uintptr, spanclass spanClass, large bool) *mspan { s.baseMask = m.baseMask } + // Mark in-use span in arena page bitmap. + arena, pageIdx, pageMask := pageIndexOf(s.base()) + arena.pageInUse[pageIdx] |= pageMask + // update stats, sweep lists h.pagesInUse += uint64(npage) if large { @@ -717,12 +892,6 @@ func (h *mheap) alloc_m(npage uintptr, spanclass spanClass, large bool) *mspan { mheap_.largealloc += uint64(s.elemsize) mheap_.nlargealloc++ atomic.Xadd64(&memstats.heap_live, int64(npage<<_PageShift)) - // Swept spans are at the end of lists. - if s.npages < uintptr(len(h.busy)) { - h.busy[s.npages].insertBack(s) - } else { - h.busylarge.insertBack(s) - } } } // heap_scan and heap_live were updated. @@ -747,6 +916,12 @@ func (h *mheap) alloc_m(npage uintptr, spanclass spanClass, large bool) *mspan { return s } +// alloc allocates a new span of npage pages from the GC'd heap. +// +// Either large must be true or spanclass must indicates the span's +// size class and scannability. +// +// If needzero is true, the memory for the returned span will be zeroed. func (h *mheap) alloc(npage uintptr, spanclass spanClass, large bool, needzero bool) *mspan { // Don't do any operations that lock the heap on the G stack. // It might trigger stack growth, and the stack growth code needs @@ -784,7 +959,7 @@ func (h *mheap) allocManual(npage uintptr, stat *uint64) *mspan { lock(&h.lock) s := h.allocSpanLocked(npage, stat) if s != nil { - s.state = _MSpanManual + s.state = mSpanManual s.manualFreeList = 0 s.allocCount = 0 s.spanclass = 0 @@ -823,48 +998,61 @@ func (h *mheap) setSpans(base, npage uintptr, s *mspan) { } } +// pickFreeSpan acquires a free span from internal free list +// structures if one is available. Otherwise returns nil. +// h must be locked. +func (h *mheap) pickFreeSpan(npage uintptr) *mspan { + tf := h.free.find(npage) + ts := h.scav.find(npage) + + // Check for whichever treap gave us the smaller, non-nil result. + // Note that we want the _smaller_ free span, i.e. the free span + // closer in size to the amount we requested (npage). + var s *mspan + if tf != nil && (ts == nil || tf.spanKey.npages <= ts.spanKey.npages) { + s = tf.spanKey + h.free.removeNode(tf) + } else if ts != nil && (tf == nil || tf.spanKey.npages > ts.spanKey.npages) { + s = ts.spanKey + h.scav.removeNode(ts) + } + return s +} + // Allocates a span of the given size. h must be locked. // The returned span has been removed from the -// free list, but its state is still MSpanFree. +// free structures, but its state is still mSpanFree. func (h *mheap) allocSpanLocked(npage uintptr, stat *uint64) *mspan { - var list *mSpanList var s *mspan - // Try in fixed-size lists up to max. - for i := int(npage); i < len(h.free); i++ { - list = &h.free[i] - if !list.isEmpty() { - s = list.first - list.remove(s) - goto HaveSpan - } + s = h.pickFreeSpan(npage) + if s != nil { + goto HaveSpan } - // Best fit in list of large spans. - s = h.allocLarge(npage) // allocLarge removed s from h.freelarge for us - if s == nil { - if !h.grow(npage) { - return nil - } - s = h.allocLarge(npage) - if s == nil { - return nil - } + // On failure, grow the heap and try again. + if !h.grow(npage) { + return nil + } + s = h.pickFreeSpan(npage) + if s != nil { + goto HaveSpan } + throw("grew heap, but no adequate free span found") HaveSpan: // Mark span in use. - if s.state != _MSpanFree { - throw("MHeap_AllocLocked - MSpan not free") + if s.state != mSpanFree { + throw("candidate mspan for allocation is not free") } if s.npages < npage { - throw("MHeap_AllocLocked - bad npages") - } - if s.npreleased > 0 { - sysUsed(unsafe.Pointer(s.base()), s.npages<<_PageShift) - memstats.heap_released -= uint64(s.npreleased << _PageShift) - s.npreleased = 0 + throw("candidate mspan for allocation is too small") } + // First, subtract any memory that was released back to + // the OS from s. We will re-scavenge the trimmed section + // if necessary. + memstats.heap_released -= uint64(s.released()) + if s.npages > npage { // Trim extra and put it back in the heap. t := (*mspan)(h.spanalloc.alloc()) @@ -874,10 +1062,25 @@ HaveSpan: h.setSpan(t.base(), t) h.setSpan(t.base()+t.npages*pageSize-1, t) t.needzero = s.needzero - s.state = _MSpanManual // prevent coalescing with s - t.state = _MSpanManual + // If s was scavenged, then t may be scavenged. + start, end := t.physPageBounds() + if s.scavenged && start < end { + memstats.heap_released += uint64(end - start) + t.scavenged = true + } + s.state = mSpanManual // prevent coalescing with s + t.state = mSpanManual h.freeSpanLocked(t, false, false, s.unusedsince) - s.state = _MSpanFree + s.state = mSpanFree + } + // "Unscavenge" s only AFTER splitting so that + // we only sysUsed whatever we actually need. + if s.scavenged { + // sysUsed all the pages that are actually available + // in the span. Note that we don't need to decrement + // heap_released since we already did so earlier. + sysUsed(unsafe.Pointer(s.base()), s.npages<<_PageShift) + s.scavenged = false } s.unusedsince = 0 @@ -893,21 +1096,6 @@ HaveSpan: return s } -// Large spans have a minimum size of 1MByte. The maximum number of large spans to support -// 1TBytes is 1 million, experimentation using random sizes indicates that the depth of -// the tree is less that 2x that of a perfectly balanced tree. For 1TByte can be referenced -// by a perfectly balanced tree with a depth of 20. Twice that is an acceptable 40. -func (h *mheap) isLargeSpan(npages uintptr) bool { - return npages >= uintptr(len(h.free)) -} - -// allocLarge allocates a span of at least npage pages from the treap of large spans. -// Returns nil if no such span currently exists. -func (h *mheap) allocLarge(npage uintptr) *mspan { - // Search treap for smallest span with >= npage pages. - return h.freelarge.remove(npage) -} - // Try to add at least npage pages of memory to the heap, // returning whether it worked. // @@ -920,20 +1108,31 @@ func (h *mheap) grow(npage uintptr) bool { return false } + // Scavenge some pages out of the free treap to make up for + // the virtual memory space we just allocated. We prefer to + // scavenge the largest spans first since the cost of scavenging + // is proportional to the number of sysUnused() calls rather than + // the number of pages released, so we make fewer of those calls + // with larger spans. + h.scavengeLargest(size) + // Create a fake "in use" span and free it, so that the // right coalescing happens. s := (*mspan)(h.spanalloc.alloc()) s.init(uintptr(v), size/pageSize) h.setSpans(s.base(), s.npages, s) atomic.Store(&s.sweepgen, h.sweepgen) - s.state = _MSpanInUse + s.state = mSpanInUse h.pagesInUse += uint64(s.npages) h.freeSpanLocked(s, false, true, 0) return true } // Free the span back into the heap. -func (h *mheap) freeSpan(s *mspan, acct int32) { +// +// large must match the value of large passed to mheap.alloc. This is +// used for accounting. +func (h *mheap) freeSpan(s *mspan, large bool) { systemstack(func() { mp := getg().m lock(&h.lock) @@ -947,7 +1146,8 @@ func (h *mheap) freeSpan(s *mspan, acct int32) { bytes := s.npages << _PageShift msanfree(base, bytes) } - if acct != 0 { + if large { + // Match accounting done in mheap.alloc. memstats.heap_objects-- } if gcBlackenEnabled != 0 { @@ -979,21 +1179,25 @@ func (h *mheap) freeManual(s *mspan, stat *uint64) { unlock(&h.lock) } -// s must be on a busy list (h.busy or h.busylarge) or unlinked. +// s must be on the busy list or unlinked. func (h *mheap) freeSpanLocked(s *mspan, acctinuse, acctidle bool, unusedsince int64) { switch s.state { - case _MSpanManual: + case mSpanManual: if s.allocCount != 0 { - throw("MHeap_FreeSpanLocked - invalid stack free") + throw("mheap.freeSpanLocked - invalid stack free") } - case _MSpanInUse: + case mSpanInUse: if s.allocCount != 0 || s.sweepgen != h.sweepgen { - print("MHeap_FreeSpanLocked - span ", s, " ptr ", hex(s.base()), " allocCount ", s.allocCount, " sweepgen ", s.sweepgen, "/", h.sweepgen, "\n") - throw("MHeap_FreeSpanLocked - invalid free") + print("mheap.freeSpanLocked - span ", s, " ptr ", hex(s.base()), " allocCount ", s.allocCount, " sweepgen ", s.sweepgen, "/", h.sweepgen, "\n") + throw("mheap.freeSpanLocked - invalid free") } h.pagesInUse -= uint64(s.npages) + + // Clear in-use bit in arena page bitmap. + arena, pageIdx, pageMask := pageIndexOf(s.base()) + arena.pageInUse[pageIdx] &^= pageMask default: - throw("MHeap_FreeSpanLocked - invalid span state") + throw("mheap.freeSpanLocked - invalid span state") } if acctinuse { @@ -1002,10 +1206,7 @@ func (h *mheap) freeSpanLocked(s *mspan, acctinuse, acctidle bool, unusedsince i if acctidle { memstats.heap_idle += uint64(s.npages << _PageShift) } - s.state = _MSpanFree - if s.inList() { - h.busyList(s.npages).remove(s) - } + s.state = mSpanFree // Stamp newly unused spans. The scavenger will use that // info to potentially give back some pages to the OS. @@ -1013,133 +1214,122 @@ func (h *mheap) freeSpanLocked(s *mspan, acctinuse, acctidle bool, unusedsince i if unusedsince == 0 { s.unusedsince = nanotime() } - s.npreleased = 0 + + // We scavenge s at the end after coalescing if s or anything + // it merged with is marked scavenged. + needsScavenge := false + prescavenged := s.released() // number of bytes already scavenged. // Coalesce with earlier, later spans. - if before := spanOf(s.base() - 1); before != nil && before.state == _MSpanFree { + if before := spanOf(s.base() - 1); before != nil && before.state == mSpanFree { // Now adjust s. s.startAddr = before.startAddr s.npages += before.npages - s.npreleased = before.npreleased // absorb released pages s.needzero |= before.needzero h.setSpan(before.base(), s) + // If before or s are scavenged, then we need to scavenge the final coalesced span. + needsScavenge = needsScavenge || before.scavenged || s.scavenged + prescavenged += before.released() // The size is potentially changing so the treap needs to delete adjacent nodes and // insert back as a combined node. - if h.isLargeSpan(before.npages) { - // We have a t, it is large so it has to be in the treap so we can remove it. - h.freelarge.removeSpan(before) + if before.scavenged { + h.scav.removeSpan(before) } else { - h.freeList(before.npages).remove(before) + h.free.removeSpan(before) } - before.state = _MSpanDead + before.state = mSpanDead h.spanalloc.free(unsafe.Pointer(before)) } // Now check to see if next (greater addresses) span is free and can be coalesced. - if after := spanOf(s.base() + s.npages*pageSize); after != nil && after.state == _MSpanFree { + if after := spanOf(s.base() + s.npages*pageSize); after != nil && after.state == mSpanFree { s.npages += after.npages - s.npreleased += after.npreleased s.needzero |= after.needzero h.setSpan(s.base()+s.npages*pageSize-1, s) - if h.isLargeSpan(after.npages) { - h.freelarge.removeSpan(after) + needsScavenge = needsScavenge || after.scavenged || s.scavenged + prescavenged += after.released() + if after.scavenged { + h.scav.removeSpan(after) } else { - h.freeList(after.npages).remove(after) + h.free.removeSpan(after) } - after.state = _MSpanDead + after.state = mSpanDead h.spanalloc.free(unsafe.Pointer(after)) } - // Insert s into appropriate list or treap. - if h.isLargeSpan(s.npages) { - h.freelarge.insert(s) + if needsScavenge { + // When coalescing spans, some physical pages which + // were not returned to the OS previously because + // they were only partially covered by the span suddenly + // become available for scavenging. We want to make sure + // those holes are filled in, and the span is properly + // scavenged. Rather than trying to detect those holes + // directly, we collect how many bytes were already + // scavenged above and subtract that from heap_released + // before re-scavenging the entire newly-coalesced span, + // which will implicitly bump up heap_released. + memstats.heap_released -= uint64(prescavenged) + s.scavenge() + } + + // Insert s into the appropriate treap. + if s.scavenged { + h.scav.insert(s) } else { - h.freeList(s.npages).insert(s) - } -} - -func (h *mheap) freeList(npages uintptr) *mSpanList { - return &h.free[npages] -} - -func (h *mheap) busyList(npages uintptr) *mSpanList { - if npages < uintptr(len(h.busy)) { - return &h.busy[npages] - } - return &h.busylarge -} - -func scavengeTreapNode(t *treapNode, now, limit uint64) uintptr { - s := t.spanKey - var sumreleased uintptr - if (now-uint64(s.unusedsince)) > limit && s.npreleased != s.npages { - start := s.base() - end := start + s.npages<<_PageShift - if physPageSize > _PageSize { - // We can only release pages in - // physPageSize blocks, so round start - // and end in. (Otherwise, madvise - // will round them *out* and release - // more memory than we want.) - start = (start + physPageSize - 1) &^ (physPageSize - 1) - end &^= physPageSize - 1 - if end <= start { - // start and end don't span a - // whole physical page. - return sumreleased - } - } - len := end - start - released := len - (s.npreleased << _PageShift) - if physPageSize > _PageSize && released == 0 { - return sumreleased - } - memstats.heap_released += uint64(released) - sumreleased += released - s.npreleased = len >> _PageShift - sysUnused(unsafe.Pointer(start), len) - } - return sumreleased -} - -func scavengelist(list *mSpanList, now, limit uint64) uintptr { - if list.isEmpty() { - return 0 - } - - var sumreleased uintptr - for s := list.first; s != nil; s = s.next { - if (now-uint64(s.unusedsince)) <= limit || s.npreleased == s.npages { - continue + h.free.insert(s) + } +} + +// scavengeLargest scavenges nbytes worth of spans in unscav +// starting from the largest span and working down. It then takes those spans +// and places them in scav. h must be locked. +func (h *mheap) scavengeLargest(nbytes uintptr) { + // Iterate over the treap backwards (from largest to smallest) scavenging spans + // until we've reached our quota of nbytes. + released := uintptr(0) + for t := h.free.end(); released < nbytes && t.valid(); { + s := t.span() + r := s.scavenge() + if r == 0 { + // Since we're going in order of largest-to-smallest span, this + // means all other spans are no bigger than s. There's a high + // chance that the other spans don't even cover a full page, + // (though they could) but iterating further just for a handful + // of pages probably isn't worth it, so just stop here. + // + // This check also preserves the invariant that spans that have + // `scavenged` set are only ever in the `scav` treap, and + // those which have it unset are only in the `free` treap. + return } - start := s.base() - end := start + s.npages<<_PageShift - if physPageSize > _PageSize { - // We can only release pages in - // physPageSize blocks, so round start - // and end in. (Otherwise, madvise - // will round them *out* and release - // more memory than we want.) - start = (start + physPageSize - 1) &^ (physPageSize - 1) - end &^= physPageSize - 1 - if end <= start { - // start and end don't span a - // whole physical page. - continue + n := t.prev() + h.free.erase(t) + t = n + h.scav.insert(s) + released += r + } +} + +// scavengeAll visits each node in the unscav treap and scavenges the +// treapNode's span. It then removes the scavenged span from +// unscav and adds it into scav before continuing. h must be locked. +func (h *mheap) scavengeAll(now, limit uint64) uintptr { + // Iterate over the treap scavenging spans if unused for at least limit time. + released := uintptr(0) + for t := h.free.start(); t.valid(); { + s := t.span() + n := t.next() + if (now - uint64(s.unusedsince)) > limit { + r := s.scavenge() + if r != 0 { + h.free.erase(t) + h.scav.insert(s) + released += r } } - len := end - start - - released := len - (s.npreleased << _PageShift) - if physPageSize > _PageSize && released == 0 { - continue - } - memstats.heap_released += uint64(released) - sumreleased += released - s.npreleased = len >> _PageShift - sysUnused(unsafe.Pointer(start), len) + t = n } - return sumreleased + return released } func (h *mheap) scavenge(k int32, now, limit uint64) { @@ -1149,17 +1339,13 @@ func (h *mheap) scavenge(k int32, now, limit uint64) { gp := getg() gp.m.mallocing++ lock(&h.lock) - var sumreleased uintptr - for i := 0; i < len(h.free); i++ { - sumreleased += scavengelist(&h.free[i], now, limit) - } - sumreleased += scavengetreap(h.freelarge.treap, now, limit) + released := h.scavengeAll(now, limit) unlock(&h.lock) gp.m.mallocing-- if debug.gctrace > 0 { - if sumreleased > 0 { - print("scvg", k, ": ", sumreleased>>20, " MB released\n") + if released > 0 { + print("scvg", k, ": ", released>>20, " MB released\n") } print("scvg", k, ": inuse: ", memstats.heap_inuse>>20, ", idle: ", memstats.heap_idle>>20, ", sys: ", memstats.heap_sys>>20, ", released: ", memstats.heap_released>>20, ", consumed: ", (memstats.heap_sys-memstats.heap_released)>>20, " (MB)\n") } @@ -1181,11 +1367,10 @@ func (span *mspan) init(base uintptr, npages uintptr) { span.npages = npages span.allocCount = 0 span.spanclass = 0 - span.incache = false span.elemsize = 0 - span.state = _MSpanDead + span.state = mSpanDead span.unusedsince = 0 - span.npreleased = 0 + span.scavenged = false span.speciallock.key = 0 span.specials = nil span.needzero = 0 @@ -1206,9 +1391,9 @@ func (list *mSpanList) init() { func (list *mSpanList) remove(span *mspan) { if span.list != list { - print("runtime: failed MSpanList_Remove span.npages=", span.npages, + print("runtime: failed mSpanList.remove span.npages=", span.npages, " span=", span, " prev=", span.prev, " span.list=", span.list, " list=", list, "\n") - throw("MSpanList_Remove") + throw("mSpanList.remove") } if list.first == span { list.first = span.next @@ -1231,8 +1416,8 @@ func (list *mSpanList) isEmpty() bool { func (list *mSpanList) insert(span *mspan) { if span.next != nil || span.prev != nil || span.list != nil { - println("runtime: failed MSpanList_Insert", span, span.next, span.prev, span.list) - throw("MSpanList_Insert") + println("runtime: failed mSpanList.insert", span, span.next, span.prev, span.list) + throw("mSpanList.insert") } span.next = list.first if list.first != nil { @@ -1249,8 +1434,8 @@ func (list *mSpanList) insert(span *mspan) { func (list *mSpanList) insertBack(span *mspan) { if span.next != nil || span.prev != nil || span.list != nil { - println("runtime: failed MSpanList_InsertBack", span, span.next, span.prev, span.list) - throw("MSpanList_InsertBack") + println("runtime: failed mSpanList.insertBack", span, span.next, span.prev, span.list) + throw("mSpanList.insertBack") } span.prev = list.last if list.last != nil { @@ -1432,9 +1617,6 @@ func addfinalizer(p unsafe.Pointer, f *funcval, ft *functype, ot *ptrtype) bool // Mark the finalizer itself, since the // special isn't part of the GC'd heap. scanblock(uintptr(unsafe.Pointer(&s.fn)), sys.PtrSize, &oneptrmask[0], gcw) - if gcBlackenPromptly { - gcw.dispose() - } releasem(mp) } return true @@ -1479,7 +1661,7 @@ func setprofilebucket(p unsafe.Pointer, b *bucket) { } // Do whatever cleanup needs to be done to deallocate s. It has -// already been unlinked from the MSpan specials list. +// already been unlinked from the mspan specials list. func freespecial(s *special, p unsafe.Pointer, size uintptr) { switch s.kind { case _KindSpecialFinalizer: diff --git a/libgo/go/runtime/mkfastlog2table.go b/libgo/go/runtime/mkfastlog2table.go index 587ebf476d3..305c84a7c11 100644 --- a/libgo/go/runtime/mkfastlog2table.go +++ b/libgo/go/runtime/mkfastlog2table.go @@ -20,7 +20,7 @@ import ( func main() { var buf bytes.Buffer - fmt.Fprintln(&buf, "// AUTO-GENERATED by mkfastlog2table.go") + fmt.Fprintln(&buf, "// Code generated by mkfastlog2table.go; DO NOT EDIT.") fmt.Fprintln(&buf, "// Run go generate from src/runtime to update.") fmt.Fprintln(&buf, "// See mkfastlog2table.go for comments.") fmt.Fprintln(&buf) diff --git a/libgo/go/runtime/mprof.go b/libgo/go/runtime/mprof.go index 2bbf37a63e4..ab975696fe6 100644 --- a/libgo/go/runtime/mprof.go +++ b/libgo/go/runtime/mprof.go @@ -734,7 +734,7 @@ func GoroutineProfile(p []StackRecord) (n int, ok bool) { isOK := func(gp1 *g) bool { // Checking isSystemGoroutine here makes GoroutineProfile // consistent with both NumGoroutine and Stack. - return gp1 != gp && readgstatus(gp1) != _Gdead && !isSystemGoroutine(gp1) + return gp1 != gp && readgstatus(gp1) != _Gdead && !isSystemGoroutine(gp1, false) } stopTheWorld("profile") diff --git a/libgo/go/runtime/mstats.go b/libgo/go/runtime/mstats.go index 8aac8502e0b..cd9da02afda 100644 --- a/libgo/go/runtime/mstats.go +++ b/libgo/go/runtime/mstats.go @@ -38,24 +38,10 @@ type mstats struct { heap_alloc uint64 // bytes allocated and not yet freed (same as alloc above) heap_sys uint64 // virtual address space obtained from system for GC'd heap heap_idle uint64 // bytes in idle spans - heap_inuse uint64 // bytes in _MSpanInUse spans + heap_inuse uint64 // bytes in mSpanInUse spans heap_released uint64 // bytes released to the os heap_objects uint64 // total number of allocated objects - // TODO(austin): heap_released is both useless and inaccurate - // in its current form. It's useless because, from the user's - // and OS's perspectives, there's no difference between a page - // that has not yet been faulted in and a page that has been - // released back to the OS. We could fix this by considering - // newly mapped spans to be "released". It's inaccurate - // because when we split a large span for allocation, we - // "unrelease" all pages in the large span and not just the - // ones we split off for use. This is trickier to fix because - // we currently don't know which pages of a span we've - // released. We could fix it by separating "free" and - // "released" spans, but then we have to allocate from runs of - // free and released spans. - // Statistics about allocation of low-level fixed-size structures. // Protected by FixAlloc locks. stacks_inuse uint64 // bytes in manually-managed stack spans @@ -543,7 +529,7 @@ func updatememstats() { memstats.by_size[i].nfree = 0 } - // Flush MCache's to MCentral. + // Flush mcache's to mcentral. systemstack(flushallmcaches) // Aggregate local stats. diff --git a/libgo/go/runtime/mwbbuf.go b/libgo/go/runtime/mwbbuf.go index 39d13709069..4c875ff2d15 100644 --- a/libgo/go/runtime/mwbbuf.go +++ b/libgo/go/runtime/mwbbuf.go @@ -23,6 +23,7 @@ package runtime import ( + "runtime/internal/atomic" "runtime/internal/sys" "unsafe" ) @@ -56,6 +57,12 @@ type wbBuf struct { // on. This must be a multiple of wbBufEntryPointers because // the write barrier only checks for overflow once per entry. buf [wbBufEntryPointers * wbBufEntries]uintptr + + // debugGen causes the write barrier buffer to flush after + // every write barrier if equal to gcWorkPauseGen. This is for + // debugging #27993. This is only set if debugCachedWork is + // set. + debugGen uint32 } const ( @@ -79,7 +86,7 @@ const ( func (b *wbBuf) reset() { start := uintptr(unsafe.Pointer(&b.buf[0])) b.next = start - if gcBlackenPromptly || writeBarrier.cgo { + if writeBarrier.cgo || (debugCachedWork && (throwOnGCWork || b.debugGen == atomic.Load(&gcWorkPauseGen))) { // Effectively disable the buffer by forcing a flush // on every barrier. b.end = uintptr(unsafe.Pointer(&b.buf[wbBufEntryPointers])) @@ -107,6 +114,11 @@ func (b *wbBuf) discard() { b.next = uintptr(unsafe.Pointer(&b.buf[0])) } +// empty reports whether b contains no pointers. +func (b *wbBuf) empty() bool { + return b.next == uintptr(unsafe.Pointer(&b.buf[0])) +} + // putFast adds old and new to the write barrier buffer and returns // false if a flush is necessary. Callers should use this as: // @@ -192,10 +204,32 @@ func wbBufFlush(dst *uintptr, src uintptr) { // Switch to the system stack so we don't have to worry about // the untyped stack slots or safe points. systemstack(func() { - wbBufFlush1(getg().m.p.ptr()) + if debugCachedWork { + // For debugging, include the old value of the + // slot and some other data in the traceback. + wbBuf := &getg().m.p.ptr().wbBuf + var old uintptr + if dst != nil { + // dst may be nil in direct calls to wbBufFlush. + old = *dst + } + wbBufFlush1Debug(old, wbBuf.buf[0], wbBuf.buf[1], &wbBuf.buf[0], wbBuf.next) + } else { + wbBufFlush1(getg().m.p.ptr()) + } }) } +// wbBufFlush1Debug is a temporary function for debugging issue +// #27993. It exists solely to add some context to the traceback. +// +//go:nowritebarrierrec +//go:systemstack +//go:noinline +func wbBufFlush1Debug(old, buf1, buf2 uintptr, start *uintptr, next uintptr) { + wbBufFlush1(getg().m.p.ptr()) +} + // wbBufFlush1 flushes p's write barrier buffer to the GC work queue. // // This must not have write barriers because it is part of the write @@ -212,14 +246,16 @@ func wbBufFlush1(_p_ *p) { n := (_p_.wbBuf.next - start) / unsafe.Sizeof(_p_.wbBuf.buf[0]) ptrs := _p_.wbBuf.buf[:n] - // Reset the buffer. - _p_.wbBuf.reset() + // Poison the buffer to make extra sure nothing is enqueued + // while we're processing the buffer. + _p_.wbBuf.next = 0 if useCheckmark { // Slow path for checkmark mode. for _, ptr := range ptrs { shade(ptr) } + _p_.wbBuf.reset() return } @@ -270,9 +306,6 @@ func wbBufFlush1(_p_ *p) { // Enqueue the greyed objects. gcw.putBatch(ptrs[:pos]) - if gcphase == _GCmarktermination || gcBlackenPromptly { - // Ps aren't allowed to cache work during mark - // termination. - gcw.dispose() - } + + _p_.wbBuf.reset() } diff --git a/libgo/go/runtime/netpoll.go b/libgo/go/runtime/netpoll.go index 6016b7d64f5..fa7f1fad71c 100644 --- a/libgo/go/runtime/netpoll.go +++ b/libgo/go/runtime/netpoll.go @@ -59,14 +59,15 @@ type pollDesc struct { lock mutex // protects the following fields fd uintptr closing bool - seq uintptr // protects from stale timers and ready notifications + user uint32 // user settable cookie + rseq uintptr // protects from stale read timers rg uintptr // pdReady, pdWait, G waiting for read or nil rt timer // read deadline timer (set if rt.f != nil) rd int64 // read deadline + wseq uintptr // protects from stale write timers wg uintptr // pdReady, pdWait, G waiting for write or nil wt timer // write deadline timer wd int64 // write deadline - user uint32 // user settable cookie } type pollCache struct { @@ -95,12 +96,19 @@ func netpollinited() bool { return atomic.Load(&netpollInited) != 0 } -//go:linkname poll_runtime_pollServerDescriptor internal..z2fpoll.runtime_pollServerDescriptor +//go:linkname poll_runtime_isPollServerDescriptor internal..z2fpoll.runtime_isPollServerDescriptor -// poll_runtime_pollServerDescriptor returns the descriptor being used, -// or ^uintptr(0) if the system does not use a poll descriptor. -func poll_runtime_pollServerDescriptor() uintptr { - return netpolldescriptor() +// poll_runtime_isPollServerDescriptor reports whether fd is a +// descriptor being used by netpoll. +func poll_runtime_isPollServerDescriptor(fd uintptr) bool { + fds := netpolldescriptor() + if GOOS != "aix" { + return fd == fds + } else { + // AIX have a pipe in its netpoll implementation. + // Therefore, two fd are returned by netpolldescriptor using a mask. + return fd == fds&0xFFFF || fd == (fds>>16)&0xFFFF + } } //go:linkname poll_runtime_pollOpen internal..z2fpoll.runtime_pollOpen @@ -115,9 +123,10 @@ func poll_runtime_pollOpen(fd uintptr) (*pollDesc, int) { } pd.fd = fd pd.closing = false - pd.seq++ + pd.rseq++ pd.rg = 0 pd.rd = 0 + pd.wseq++ pd.wg = 0 pd.wd = 0 unlock(&pd.lock) @@ -200,19 +209,15 @@ func poll_runtime_pollSetDeadline(pd *pollDesc, d int64, mode int) { unlock(&pd.lock) return } - pd.seq++ // invalidate current timers - // Reset current timers. - if pd.rt.f != nil { - deltimer(&pd.rt) - pd.rt.f = nil - } - if pd.wt.f != nil { - deltimer(&pd.wt) - pd.wt.f = nil - } - // Setup new timers. - if d != 0 && d <= nanotime() { - d = -1 + rd0, wd0 := pd.rd, pd.wd + combo0 := rd0 > 0 && rd0 == wd0 + if d > 0 { + d += nanotime() + if d <= 0 { + // If the user has a deadline in the future, but the delay calculation + // overflows, then set the deadline to the maximum possible value. + d = 1<<63 - 1 + } } if mode == 'r' || mode == 'r'+'w' { pd.rd = d @@ -220,39 +225,58 @@ func poll_runtime_pollSetDeadline(pd *pollDesc, d int64, mode int) { if mode == 'w' || mode == 'r'+'w' { pd.wd = d } - if pd.rd > 0 && pd.rd == pd.wd { - pd.rt.f = netpollDeadline - pd.rt.when = pd.rd - // Copy current seq into the timer arg. - // Timer func will check the seq against current descriptor seq, - // if they differ the descriptor was reused or timers were reset. - pd.rt.arg = pd - pd.rt.seq = pd.seq - addtimer(&pd.rt) - } else { + combo := pd.rd > 0 && pd.rd == pd.wd + rtf := netpollReadDeadline + if combo { + rtf = netpollDeadline + } + if pd.rt.f == nil { if pd.rd > 0 { - pd.rt.f = netpollReadDeadline + pd.rt.f = rtf pd.rt.when = pd.rd + // Copy current seq into the timer arg. + // Timer func will check the seq against current descriptor seq, + // if they differ the descriptor was reused or timers were reset. pd.rt.arg = pd - pd.rt.seq = pd.seq + pd.rt.seq = pd.rseq addtimer(&pd.rt) } - if pd.wd > 0 { + } else if pd.rd != rd0 || combo != combo0 { + pd.rseq++ // invalidate current timers + if pd.rd > 0 { + modtimer(&pd.rt, pd.rd, 0, rtf, pd, pd.rseq) + } else { + deltimer(&pd.rt) + pd.rt.f = nil + } + } + if pd.wt.f == nil { + if pd.wd > 0 && !combo { pd.wt.f = netpollWriteDeadline pd.wt.when = pd.wd pd.wt.arg = pd - pd.wt.seq = pd.seq + pd.wt.seq = pd.wseq addtimer(&pd.wt) } + } else if pd.wd != wd0 || combo != combo0 { + pd.wseq++ // invalidate current timers + if pd.wd > 0 && !combo { + modtimer(&pd.wt, pd.wd, 0, netpollWriteDeadline, pd, pd.wseq) + } else { + deltimer(&pd.wt) + pd.wt.f = nil + } } // If we set the new deadline in the past, unblock currently pending IO if any. var rg, wg *g - atomicstorep(unsafe.Pointer(&wg), nil) // full memory barrier between stores to rd/wd and load of rg/wg in netpollunblock - if pd.rd < 0 { - rg = netpollunblock(pd, 'r', false) - } - if pd.wd < 0 { - wg = netpollunblock(pd, 'w', false) + if pd.rd < 0 || pd.wd < 0 { + atomic.StorepNoWB(noescape(unsafe.Pointer(&wg)), nil) // full memory barrier between stores to rd/wd and load of rg/wg in netpollunblock + if pd.rd < 0 { + rg = netpollunblock(pd, 'r', false) + } + if pd.wd < 0 { + wg = netpollunblock(pd, 'w', false) + } } unlock(&pd.lock) if rg != nil { @@ -270,9 +294,10 @@ func poll_runtime_pollUnblock(pd *pollDesc) { throw("runtime: unblock on closing polldesc") } pd.closing = true - pd.seq++ + pd.rseq++ + pd.wseq++ var rg, wg *g - atomicstorep(unsafe.Pointer(&rg), nil) // full memory barrier between store to closing and read of rg/wg in netpollunblock + atomic.StorepNoWB(noescape(unsafe.Pointer(&rg)), nil) // full memory barrier between store to closing and read of rg/wg in netpollunblock rg = netpollunblock(pd, 'r', false) wg = netpollunblock(pd, 'w', false) if pd.rt.f != nil { @@ -292,24 +317,22 @@ func poll_runtime_pollUnblock(pd *pollDesc) { } } -// make pd ready, newly runnable goroutines (if any) are returned in rg/wg +// make pd ready, newly runnable goroutines (if any) are added to toRun. // May run during STW, so write barriers are not allowed. //go:nowritebarrier -func netpollready(gpp *guintptr, pd *pollDesc, mode int32) { - var rg, wg guintptr +func netpollready(toRun *gList, pd *pollDesc, mode int32) { + var rg, wg *g if mode == 'r' || mode == 'r'+'w' { - rg.set(netpollunblock(pd, 'r', true)) + rg = netpollunblock(pd, 'r', true) } if mode == 'w' || mode == 'r'+'w' { - wg.set(netpollunblock(pd, 'w', true)) + wg = netpollunblock(pd, 'w', true) } - if rg != 0 { - rg.ptr().schedlink = *gpp - *gpp = rg + if rg != nil { + toRun.push(rg) } - if wg != 0 { - wg.ptr().schedlink = *gpp - *gpp = wg + if wg != nil { + toRun.push(wg) } } @@ -409,7 +432,11 @@ func netpolldeadlineimpl(pd *pollDesc, seq uintptr, read, write bool) { lock(&pd.lock) // Seq arg is seq when the timer was set. // If it's stale, ignore the timer event. - if seq != pd.seq { + currentSeq := pd.rseq + if !read { + currentSeq = pd.wseq + } + if seq != currentSeq { // The descriptor was reused or timers were reset. unlock(&pd.lock) return @@ -420,7 +447,7 @@ func netpolldeadlineimpl(pd *pollDesc, seq uintptr, read, write bool) { throw("runtime: inconsistent read deadline") } pd.rd = -1 - atomicstorep(unsafe.Pointer(&pd.rt.f), nil) // full memory barrier between store to rd and load of rg in netpollunblock + atomic.StorepNoWB(unsafe.Pointer(&pd.rt.f), nil) // full memory barrier between store to rd and load of rg in netpollunblock rg = netpollunblock(pd, 'r', false) } var wg *g @@ -429,7 +456,7 @@ func netpolldeadlineimpl(pd *pollDesc, seq uintptr, read, write bool) { throw("runtime: inconsistent write deadline") } pd.wd = -1 - atomicstorep(unsafe.Pointer(&pd.wt.f), nil) // full memory barrier between store to wd and load of wg in netpollunblock + atomic.StorepNoWB(unsafe.Pointer(&pd.wt.f), nil) // full memory barrier between store to wd and load of wg in netpollunblock wg = netpollunblock(pd, 'w', false) } unlock(&pd.lock) diff --git a/libgo/go/runtime/netpoll_aix.go b/libgo/go/runtime/netpoll_aix.go index cbeb8c9ed18..86c3e960f42 100644 --- a/libgo/go/runtime/netpoll_aix.go +++ b/libgo/go/runtime/netpoll_aix.go @@ -1,4 +1,4 @@ -// Copyright 2017 The Go Authors. All rights reserved. +// Copyright 2018 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. @@ -8,17 +8,7 @@ import "unsafe" // This is based on the former libgo/runtime/netpoll_select.c implementation // except that it uses poll instead of select and is written in Go. - -type pollfd struct { - fd int32 - events int16 - revents int16 -} - -const _POLLIN = 0x0001 -const _POLLOUT = 0x0002 -const _POLLHUP = 0x2000 -const _POLLERR = 0x4000 +// It's also based on Solaris implementation for the arming mechanisms //go:noescape //extern poll @@ -31,62 +21,95 @@ func libc_pipe(fd *int32) int32 //extern __go_fcntl_uintptr func fcntlUintptr(fd, cmd, arg uintptr) (uintptr, uintptr) -func fcntl(fd, cmd int32, arg uintptr) uintptr { +func fcntl(fd, cmd int32, arg uintptr) int32 { r, _ := fcntlUintptr(uintptr(fd), uintptr(cmd), arg) - return r + return int32(r) } +// pollfd represents the poll structure for AIX operating system. +type pollfd struct { + fd int32 + events int16 + revents int16 +} + +const _POLLIN = 0x0001 +const _POLLOUT = 0x0002 +const _POLLHUP = 0x2000 +const _POLLERR = 0x4000 +const _O_NONBLOCK = 0x4 + var ( - pfds []pollfd - pds []*pollDesc - mtxpoll mutex - mtxset mutex - rdwake int32 - wrwake int32 - needsUpdate bool + pfds []pollfd + pds []*pollDesc + mtxpoll mutex + mtxset mutex + rdwake int32 + wrwake int32 + pendingUpdates int32 ) +const pollVerbose = false + func netpollinit() { var p [2]int32 // Create the pipe we use to wakeup poll. if err := libc_pipe(&p[0]); err < 0 { - throw("runtime: netpollinit failed to create pipe") + throw("netpollinit: failed to create pipe") } rdwake = p[0] wrwake = p[1] - fl := fcntl(rdwake, _F_GETFL, 0) + fl := uintptr(fcntl(rdwake, _F_GETFL, 0)) fcntl(rdwake, _F_SETFL, fl|_O_NONBLOCK) fcntl(rdwake, _F_SETFD, _FD_CLOEXEC) - fl = fcntl(wrwake, _F_GETFL, 0) + fl = uintptr(fcntl(wrwake, _F_GETFL, 0)) + fcntl(wrwake, _F_SETFL, fl|_O_NONBLOCK) fcntl(wrwake, _F_SETFD, _FD_CLOEXEC) // Pre-allocate array of pollfd structures for poll. + if pollVerbose { + println("*** allocating") + } pfds = make([]pollfd, 1, 128) + if pollVerbose { + println("*** allocating done", &pfds[0]) + } + // Poll the read side of the pipe. pfds[0].fd = rdwake pfds[0].events = _POLLIN - // Allocate index to pd array pds = make([]*pollDesc, 1, 128) pds[0] = nil } func netpolldescriptor() uintptr { - return ^uintptr(0) + // Both fd must be returned + if rdwake > 0xFFFF || wrwake > 0xFFFF { + throw("netpolldescriptor: invalid fd number") + } + return uintptr(rdwake<<16 | wrwake) } +// netpollwakeup writes on wrwake to wakeup poll before any changes. func netpollwakeup() { - if !needsUpdate { - needsUpdate = true + if pendingUpdates == 0 { + pendingUpdates = 1 + if pollVerbose { + println("*** writing 1 byte") + } b := [1]byte{0} write(uintptr(wrwake), unsafe.Pointer(&b[0]), 1) } } func netpollopen(fd uintptr, pd *pollDesc) int32 { + if pollVerbose { + println("*** netpollopen", fd) + } lock(&mtxpoll) netpollwakeup() @@ -94,15 +117,16 @@ func netpollopen(fd uintptr, pd *pollDesc) int32 { unlock(&mtxpoll) pd.user = uint32(len(pfds)) - var pfd pollfd - pfd.fd = int32(fd) - pfds = append(pfds, pfd) + pfds = append(pfds, pollfd{fd: int32(fd)}) pds = append(pds, pd) unlock(&mtxset) return 0 } func netpollclose(fd uintptr) int32 { + if pollVerbose { + println("*** netpollclose", fd) + } lock(&mtxpoll) netpollwakeup() @@ -125,6 +149,9 @@ func netpollclose(fd uintptr) int32 { } func netpollarm(pd *pollDesc, mode int) { + if pollVerbose { + println("*** netpollarm", pd.fd, mode) + } lock(&mtxpoll) netpollwakeup() @@ -141,23 +168,36 @@ func netpollarm(pd *pollDesc, mode int) { } //go:nowritebarrierrec -func netpoll(block bool) *g { +func netpoll(block bool) gList { timeout := ^uintptr(0) if !block { timeout = 0 - return nil + return gList{} + } + if pollVerbose { + println("*** netpoll", block) } retry: lock(&mtxpoll) lock(&mtxset) - needsUpdate = false + pendingUpdates = 0 unlock(&mtxpoll) + if pollVerbose { + println("*** netpoll before poll") + } n := libc_poll(&pfds[0], uintptr(len(pfds)), timeout) + if pollVerbose { + println("*** netpoll after poll", n) + } if n < 0 { e := errno() if e != _EINTR { - throw("runtime: poll failed") + println("errno=", e, " len(pfds)=", len(pfds)) + throw("poll failed") + } + if pollVerbose { + println("*** poll failed") } unlock(&mtxset) goto retry @@ -166,13 +206,16 @@ retry: if n != 0 && pfds[0].revents&(_POLLIN|_POLLHUP|_POLLERR) != 0 { var b [1]byte for read(rdwake, unsafe.Pointer(&b[0]), 1) == 1 { + if pollVerbose { + println("*** read 1 byte from pipe") + } } // Do not look at the other fds in this case as the mode may have changed // XXX only additions of flags are made, so maybe it is ok unlock(&mtxset) goto retry } - var gp guintptr + var toRun gList for i := 0; i < len(pfds) && n > 0; i++ { pfd := &pfds[i] @@ -186,13 +229,19 @@ retry: pfd.events &= ^_POLLOUT } if mode != 0 { - netpollready(&gp, pds[i], mode) + if pollVerbose { + println("*** netpollready i=", i, "revents=", pfd.revents, "events=", pfd.events, "pd=", pds[i]) + } + netpollready(&toRun, pds[i], mode) n-- } } unlock(&mtxset) - if block && gp == 0 { + if block && toRun.empty() { goto retry } - return gp.ptr() + if pollVerbose { + println("*** netpoll returning end") + } + return toRun } diff --git a/libgo/go/runtime/netpoll_epoll.go b/libgo/go/runtime/netpoll_epoll.go index ced399d781e..2004fbc4da8 100644 --- a/libgo/go/runtime/netpoll_epoll.go +++ b/libgo/go/runtime/netpoll_epoll.go @@ -75,9 +75,9 @@ func netpollarm(pd *pollDesc, mode int) { // polls for ready network connections // returns list of goroutines that become runnable -func netpoll(block bool) *g { +func netpoll(block bool) gList { if epfd == -1 { - return nil + return gList{} } waitms := int32(-1) if !block { @@ -94,7 +94,7 @@ retry: } goto retry } - var gp guintptr + var toRun gList for i := int32(0); i < n; i++ { ev := &events[i] if ev.events == 0 { @@ -110,11 +110,11 @@ retry: if mode != 0 { pd := *(**pollDesc)(unsafe.Pointer(&ev.data)) - netpollready(&gp, pd, mode) + netpollready(&toRun, pd, mode) } } - if block && gp == 0 { + if block && toRun.empty() { goto retry } - return gp.ptr() + return toRun } diff --git a/libgo/go/runtime/netpoll_fake.go b/libgo/go/runtime/netpoll_fake.go index aab18dc8468..5b1a63a8787 100644 --- a/libgo/go/runtime/netpoll_fake.go +++ b/libgo/go/runtime/netpoll_fake.go @@ -27,6 +27,6 @@ func netpollclose(fd uintptr) int32 { func netpollarm(pd *pollDesc, mode int) { } -func netpoll(block bool) *g { - return nil +func netpoll(block bool) gList { + return gList{} } diff --git a/libgo/go/runtime/netpoll_kqueue.go b/libgo/go/runtime/netpoll_kqueue.go index 1f68effbf9d..3d6265092e6 100644 --- a/libgo/go/runtime/netpoll_kqueue.go +++ b/libgo/go/runtime/netpoll_kqueue.go @@ -73,9 +73,9 @@ func netpollarm(pd *pollDesc, mode int) { // Polls for ready network connections. // Returns list of goroutines that become runnable. -func netpoll(block bool) *g { +func netpoll(block bool) gList { if kq == -1 { - return nil + return gList{} } var tp *timespec var ts timespec @@ -93,7 +93,7 @@ retry: } goto retry } - var gp guintptr + var toRun gList for i := 0; i < int(n); i++ { ev := &events[i] var mode int32 @@ -117,11 +117,11 @@ retry: mode += 'w' } if mode != 0 { - netpollready(&gp, (*pollDesc)(unsafe.Pointer(ev.udata)), mode) + netpollready(&toRun, (*pollDesc)(unsafe.Pointer(ev.udata)), mode) } } - if block && gp == 0 { + if block && toRun.empty() { goto retry } - return gp.ptr() + return toRun } diff --git a/libgo/go/runtime/netpoll_solaris.go b/libgo/go/runtime/netpoll_solaris.go index a960e93b8bf..b8baffdfc12 100644 --- a/libgo/go/runtime/netpoll_solaris.go +++ b/libgo/go/runtime/netpoll_solaris.go @@ -166,9 +166,9 @@ func netpollarm(pd *pollDesc, mode int) { // polls for ready network connections // returns list of goroutines that become runnable -func netpoll(block bool) *g { +func netpoll(block bool) gList { if portfd == -1 { - return nil + return gList{} } var wait *timespec @@ -188,7 +188,7 @@ retry: goto retry } - var gp guintptr + var toRun gList for i := 0; i < int(n); i++ { ev := &events[i] @@ -219,12 +219,12 @@ retry: } if mode != 0 { - netpollready(&gp, pd, mode) + netpollready(&toRun, pd, mode) } } - if block && gp == 0 { + if block && toRun.empty() { goto retry } - return gp.ptr() + return toRun } diff --git a/libgo/go/runtime/netpoll_stub.go b/libgo/go/runtime/netpoll_stub.go index a4d6b4608ac..f585333579d 100644 --- a/libgo/go/runtime/netpoll_stub.go +++ b/libgo/go/runtime/netpoll_stub.go @@ -10,10 +10,10 @@ var netpollWaiters uint32 // Polls for ready network connections. // Returns list of goroutines that become runnable. -func netpoll(block bool) (gp *g) { +func netpoll(block bool) gList { // Implementation for platforms that do not support // integrated network poller. - return + return gList{} } func netpollinited() bool { diff --git a/libgo/go/runtime/netpoll_windows.go b/libgo/go/runtime/netpoll_windows.go index 134071f5e3c..07ef15ce2f3 100644 --- a/libgo/go/runtime/netpoll_windows.go +++ b/libgo/go/runtime/netpoll_windows.go @@ -63,17 +63,17 @@ func netpollarm(pd *pollDesc, mode int) { // Polls for completed network IO. // Returns list of goroutines that become runnable. -func netpoll(block bool) *g { +func netpoll(block bool) gList { var entries [64]overlappedEntry var wait, qty, key, flags, n, i uint32 var errno int32 var op *net_op - var gp guintptr + var toRun gList mp := getg().m if iocphandle == _INVALID_HANDLE_VALUE { - return nil + return gList{} } wait = 0 if block { @@ -92,7 +92,7 @@ retry: mp.blocked = false errno = int32(getlasterror()) if !block && errno == _WAIT_TIMEOUT { - return nil + return gList{} } println("runtime: GetQueuedCompletionStatusEx failed (errno=", errno, ")") throw("runtime: netpoll failed") @@ -105,7 +105,7 @@ retry: if stdcall5(_WSAGetOverlappedResult, op.pd.fd, uintptr(unsafe.Pointer(op)), uintptr(unsafe.Pointer(&qty)), 0, uintptr(unsafe.Pointer(&flags))) == 0 { errno = int32(getlasterror()) } - handlecompletion(&gp, op, errno, qty) + handlecompletion(&toRun, op, errno, qty) } } else { op = nil @@ -118,7 +118,7 @@ retry: mp.blocked = false errno = int32(getlasterror()) if !block && errno == _WAIT_TIMEOUT { - return nil + return gList{} } if op == nil { println("runtime: GetQueuedCompletionStatus failed (errno=", errno, ")") @@ -127,15 +127,15 @@ retry: // dequeued failed IO packet, so report that } mp.blocked = false - handlecompletion(&gp, op, errno, qty) + handlecompletion(&toRun, op, errno, qty) } - if block && gp == 0 { + if block && toRun.empty() { goto retry } - return gp.ptr() + return toRun } -func handlecompletion(gpp *guintptr, op *net_op, errno int32, qty uint32) { +func handlecompletion(toRun *gList, op *net_op, errno int32, qty uint32) { if op == nil { println("runtime: GetQueuedCompletionStatus returned op == nil") throw("runtime: netpoll failed") @@ -147,5 +147,5 @@ func handlecompletion(gpp *guintptr, op *net_op, errno int32, qty uint32) { } op.errno = errno op.qty = qty - netpollready(gpp, op.pd, mode) + netpollready(toRun, op.pd, mode) } diff --git a/libgo/go/runtime/os_aix.go b/libgo/go/runtime/os_aix.go index 246b9c3c944..10036162c76 100644 --- a/libgo/go/runtime/os_aix.go +++ b/libgo/go/runtime/os_aix.go @@ -1,7 +1,9 @@ -// Copyright 2017 The Go Authors. All rights reserved. +// Copyright 2018 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +// +build aix + package runtime import "unsafe" @@ -55,17 +57,16 @@ func semacreate(mp *m) { func semasleep(ns int64) int32 { _m_ := getg().m if ns >= 0 { - const CLOCK_REALTIME int64 = 9 var ts timespec - if clock_gettime(CLOCK_REALTIME, &ts) != 0 { + if clock_gettime(_CLOCK_REALTIME, &ts) != 0 { throw("clock_gettime") } - ts.tv_sec += timespec_sec_t(ns / 1000000000) - ts.tv_nsec += timespec_nsec_t(ns % 1000000000) - if ts.tv_nsec >= 1000000000 { - ts.tv_sec += timespec_sec_t(1) - ts.tv_nsec -= timespec_nsec_t(1000000000) + ts.tv_sec += ns / 1e9 + ts.tv_nsec += ns % 1e9 + if ts.tv_nsec >= 1e9 { + ts.tv_sec++ + ts.tv_nsec -= 1e9 } if sem_timedwait((*semt)(unsafe.Pointer(_m_.mos.waitsema)), &ts) != 0 { @@ -73,6 +74,7 @@ func semasleep(ns int64) int32 { if err == _ETIMEDOUT || err == _EAGAIN || err == _EINTR { return -1 } + println("sem_timedwait err ", err, " ts.tv_sec ", ts.tv_sec, " ts.tv_nsec ", ts.tv_nsec, " ns ", ns, " id ", _m_.id) throw("sem_timedwait") } return 0 @@ -96,3 +98,8 @@ func semawakeup(mp *m) { throw("sem_post") } } + +const ( + _CLOCK_REALTIME = 9 + _CLOCK_MONOTONIC = 10 +) diff --git a/libgo/go/runtime/os_freebsd.go b/libgo/go/runtime/os_freebsd.go index 34939c56f03..4cce6fdb7f4 100644 --- a/libgo/go/runtime/os_freebsd.go +++ b/libgo/go/runtime/os_freebsd.go @@ -63,3 +63,42 @@ func futexwakeup(addr *uint32, cnt uint32) { print("umtx_wake_addr=", addr, " ret=", ret, "\n") }) } + +func sysargs(argc int32, argv **byte) { + n := argc + 1 + + // skip over argv, envp to get to auxv + for argv_index(argv, n) != nil { + n++ + } + + // skip NULL separator + n++ + + // now argv+n is auxv + auxv := (*[1 << 28]uintptr)(add(unsafe.Pointer(argv), uintptr(n)*sys.PtrSize)) + sysauxv(auxv[:]) +} + +const ( + _AT_NULL = 0 // Terminates the vector + _AT_PAGESZ = 6 // Page size in bytes + _AT_TIMEKEEP = 22 // Pointer to timehands. + _AT_HWCAP = 25 // CPU feature flags + _AT_HWCAP2 = 26 // CPU feature flags 2 +) + +func sysauxv(auxv []uintptr) { + for i := 0; auxv[i] != _AT_NULL; i += 2 { + tag, val := auxv[i], auxv[i+1] + switch tag { + // _AT_NCPUS from auxv shouldn't be used due to golang.org/issue/15206 + case _AT_PAGESZ: + physPageSize = val + case _AT_TIMEKEEP: + timekeepSharedPage = (*vdsoTimekeep)(unsafe.Pointer(val)) + } + + archauxv(tag, val) + } +} diff --git a/libgo/go/runtime/os_linux_arm64.go b/libgo/go/runtime/os_linux_arm64.go index 013e7ae17ac..30d63bfbdbc 100644 --- a/libgo/go/runtime/os_linux_arm64.go +++ b/libgo/go/runtime/os_linux_arm64.go @@ -22,7 +22,15 @@ func archauxv(tag, val uintptr) { case _AT_HWCAP: // arm64 doesn't have a 'cpuid' instruction equivalent and relies on // HWCAP/HWCAP2 bits for hardware capabilities. - cpu.HWCap = uint(val) + hwcap := uint(val) + if GOOS == "android" { + // The Samsung S9+ kernel reports support for atomics, but not all cores + // actually support them, resulting in SIGILL. See issue #28431. + // TODO(elias.naur): Only disable the optimization on bad chipsets. + const hwcap_ATOMICS = 1 << 8 + hwcap &= ^uint(hwcap_ATOMICS) + } + cpu.HWCap = hwcap case _AT_HWCAP2: cpu.HWCap2 = uint(val) } diff --git a/libgo/go/runtime/os_windows_arm.go b/libgo/go/runtime/os_windows_arm.go new file mode 100644 index 00000000000..10aff75e311 --- /dev/null +++ b/libgo/go/runtime/os_windows_arm.go @@ -0,0 +1,22 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package runtime + +import "unsafe" + +//go:nosplit +func cputicks() int64 { + var counter int64 + stdcall1(_QueryPerformanceCounter, uintptr(unsafe.Pointer(&counter))) + return counter +} + +func checkgoarm() { + if goarm < 7 { + print("Need atomic synchronization instructions, coprocessor ", + "access instructions. Recompile using GOARM=7.\n") + exit(1) + } +} diff --git a/libgo/go/runtime/panic.go b/libgo/go/runtime/panic.go index 752bf7185f7..bfd2f2d542b 100644 --- a/libgo/go/runtime/panic.go +++ b/libgo/go/runtime/panic.go @@ -54,7 +54,7 @@ var indexError = error(errorString("index out of range")) func panicindex() { name, _, _ := funcfileline(getcallerpc(), -1) - if hasprefix(name, "runtime.") { + if hasPrefix(name, "runtime.") { throw(string(indexError.(errorString))) } panicCheckMalloc(indexError) @@ -65,7 +65,7 @@ var sliceError = error(errorString("slice bounds out of range")) func panicslice() { name, _, _ := funcfileline(getcallerpc(), -1) - if hasprefix(name, "runtime.") { + if hasPrefix(name, "runtime.") { throw(string(sliceError.(errorString))) } panicCheckMalloc(sliceError) @@ -151,6 +151,14 @@ func newdefer() *_defer { systemstack(func() { d = new(_defer) }) + if debugCachedWork { + // Duplicate the tail below so if there's a + // crash in checkPut we can tell if d was just + // allocated or came from the pool. + d.link = gp._defer + gp._defer = d + return d + } } d.link = gp._defer gp._defer = d @@ -242,17 +250,22 @@ func deferreturn(frame *bool) { // code in jmpdefer. var fn func(unsafe.Pointer) *(*uintptr)(unsafe.Pointer(&fn)) = uintptr(noescape(unsafe.Pointer(&pfn))) + gp.deferring = true fn(d.arg) + gp.deferring = false } - // If we are returning from a Go function called by a - // C function running in a C thread, g may now be nil, - // in which case CgocallBackDone will have cleared _defer. - // In that case some other goroutine may already be using gp. + // If that was CgocallBackDone, it will have freed the + // defer for us, since we are no longer running as Go code. if getg() == nil { *frame = true return } + if gp.ranCgocallBackDone { + gp.ranCgocallBackDone = false + *frame = true + return + } gp._defer = d.link @@ -316,7 +329,9 @@ func checkdefer(frame *bool) { var fn func(unsafe.Pointer) *(*uintptr)(unsafe.Pointer(&fn)) = uintptr(noescape(unsafe.Pointer(&pfn))) + gp.deferring = true fn(d.arg) + gp.deferring = false freedefer(d) @@ -389,6 +404,7 @@ func Goexit() { // This code is similar to gopanic, see that implementation // for detailed comments. gp := getg() + gp.goexiting = true for { d := gp._defer if d == nil { @@ -409,7 +425,9 @@ func Goexit() { var fn func(unsafe.Pointer) *(*uintptr)(unsafe.Pointer(&fn)) = uintptr(noescape(unsafe.Pointer(&pfn))) + gp.deferring = true fn(d.arg) + gp.deferring = false if gp._defer != d { throw("bad defer entry in Goexit") @@ -419,6 +437,7 @@ func Goexit() { freedefer(d) // Note: we ignore recovers here because Goexit isn't a panic } + gp.goexiting = false goexit1() } @@ -532,7 +551,9 @@ func gopanic(e interface{}) { var fn func(unsafe.Pointer) *(*uintptr)(unsafe.Pointer(&fn)) = uintptr(noescape(unsafe.Pointer(&pfn))) + gp.deferring = true fn(d.arg) + gp.deferring = false if gp._defer != d { throw("bad defer entry in panic") @@ -649,7 +670,7 @@ func canrecover(retaddr uintptr) bool { } name := locs[1].function - if hasprefix(name, "runtime.") { + if hasPrefix(name, "runtime.") { return true } @@ -676,7 +697,7 @@ func canrecover(retaddr uintptr) bool { } // Ignore function in libffi. - if hasprefix(name, "ffi_") { + if hasPrefix(name, "ffi_") { continue } @@ -690,7 +711,7 @@ func canrecover(retaddr uintptr) bool { } // Ignore other functions in the reflect package. - if hasprefix(name, "reflect.") || hasprefix(name, ".1reflect.") { + if hasPrefix(name, "reflect.") || hasPrefix(name, ".1reflect.") { continue } @@ -700,7 +721,7 @@ func canrecover(retaddr uintptr) bool { if i < n { name = locs[i].function - if hasprefix(name, "runtime.") { + if hasPrefix(name, "runtime.") { return true } } @@ -734,7 +755,7 @@ func makefuncfficanrecover(loc []location) { } name := loc[1].function - if hasprefix(name, "runtime.") { + if hasPrefix(name, "runtime.") { d.makefunccanrecover = true } } @@ -935,10 +956,13 @@ func fatalpanic(msgs *_panic) { // It returns true if panic messages should be printed, or false if // the runtime is in bad shape and should just print stacks. // -// It can have write barriers because the write barrier explicitly -// ignores writes once dying > 0. +// It must not have write barriers even though the write barrier +// explicitly ignores writes once dying > 0. Write barriers still +// assume that g.m.p != nil, and this function may not have P +// in some contexts (e.g. a panic in a signal handler for a signal +// sent to an M with no P). // -//go:yeswritebarrierrec +//go:nowritebarrierrec func startpanic_m() bool { _g_ := getg() if mheap_.cachealloc.size == 0 { // very early @@ -958,8 +982,8 @@ func startpanic_m() bool { switch _g_.m.dying { case 0: + // Setting dying >0 has the side-effect of disabling this G's writebuf. _g_.m.dying = 1 - _g_.writebuf = nil atomic.Xadd(&panicking, 1) lock(&paniclk) if debug.schedtrace > 0 || debug.scheddetail > 0 { @@ -1061,7 +1085,7 @@ func canpanic(gp *g) bool { return true } -// isAbortPC returns true if pc is the program counter at which +// isAbortPC reports whether pc is the program counter at which // runtime.abort raises a signal. // // It is nosplit because it's part of the isgoexception diff --git a/libgo/go/runtime/pprof/internal/profile/filter.go b/libgo/go/runtime/pprof/internal/profile/filter.go index 1baa096a49c..9cad866df8c 100644 --- a/libgo/go/runtime/pprof/internal/profile/filter.go +++ b/libgo/go/runtime/pprof/internal/profile/filter.go @@ -55,7 +55,7 @@ func (p *Profile) FilterSamplesByName(focus, ignore, hide *regexp.Regexp) (fm, i return } -// matchesName returns whether the function name or file in the +// matchesName reports whether the function name or file in the // location matches the regular expression. func (loc *Location) matchesName(re *regexp.Regexp) bool { for _, ln := range loc.Line { diff --git a/libgo/go/runtime/pprof/internal/profile/profile.go b/libgo/go/runtime/pprof/internal/profile/profile.go index 64c3e3f054d..a6f8354b1e8 100644 --- a/libgo/go/runtime/pprof/internal/profile/profile.go +++ b/libgo/go/runtime/pprof/internal/profile/profile.go @@ -200,7 +200,7 @@ var libRx = regexp.MustCompile(`([.]so$|[.]so[._][0-9]+)`) // first. func (p *Profile) setMain() { for i := 0; i < len(p.Mapping); i++ { - file := strings.TrimSpace(strings.Replace(p.Mapping[i].File, "(deleted)", "", -1)) + file := strings.TrimSpace(strings.ReplaceAll(p.Mapping[i].File, "(deleted)", "")) if len(file) == 0 { continue } @@ -415,16 +415,16 @@ func (p *Profile) String() string { for _, m := range p.Mapping { bits := "" if m.HasFunctions { - bits = bits + "[FN]" + bits += "[FN]" } if m.HasFilenames { - bits = bits + "[FL]" + bits += "[FL]" } if m.HasLineNumbers { - bits = bits + "[LN]" + bits += "[LN]" } if m.HasInlineFrames { - bits = bits + "[IN]" + bits += "[IN]" } ss = append(ss, fmt.Sprintf("%d: %#x/%#x/%#x %s %s %s", m.ID, @@ -573,7 +573,7 @@ func (p *Profile) Demangle(d Demangler) error { return nil } -// Empty returns true if the profile contains no samples. +// Empty reports whether the profile contains no samples. func (p *Profile) Empty() bool { return len(p.Sample) == 0 } diff --git a/libgo/go/runtime/pprof/pprof.go b/libgo/go/runtime/pprof/pprof.go index 5128c22b338..996b3cbf468 100644 --- a/libgo/go/runtime/pprof/pprof.go +++ b/libgo/go/runtime/pprof/pprof.go @@ -28,6 +28,7 @@ // if err != nil { // log.Fatal("could not create CPU profile: ", err) // } +// defer f.Close() // if err := pprof.StartCPUProfile(f); err != nil { // log.Fatal("could not start CPU profile: ", err) // } @@ -41,11 +42,11 @@ // if err != nil { // log.Fatal("could not create memory profile: ", err) // } +// defer f.Close() // runtime.GC() // get up-to-date statistics // if err := pprof.WriteHeapProfile(f); err != nil { // log.Fatal("could not write memory profile: ", err) // } -// f.Close() // } // } // diff --git a/libgo/go/runtime/pprof/pprof_test.go b/libgo/go/runtime/pprof/pprof_test.go index fd05a049b5a..bf9f5266e29 100644 --- a/libgo/go/runtime/pprof/pprof_test.go +++ b/libgo/go/runtime/pprof/pprof_test.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build !nacl,!js +// +build !aix,!nacl,!js package pprof @@ -82,14 +82,14 @@ func avoidFunctions() []string { } func TestCPUProfile(t *testing.T) { - testCPUProfile(t, []string{"pprof.cpuHog1"}, avoidFunctions(), func(dur time.Duration) { + testCPUProfile(t, stackContains, []string{"pprof.cpuHog1"}, avoidFunctions(), func(dur time.Duration) { cpuHogger(cpuHog1, &salt1, dur) }) } func TestCPUProfileMultithreaded(t *testing.T) { defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(2)) - testCPUProfile(t, []string{"pprof.cpuHog1", "pprof.cpuHog2"}, avoidFunctions(), func(dur time.Duration) { + testCPUProfile(t, stackContains, []string{"pprof.cpuHog1", "pprof.cpuHog2"}, avoidFunctions(), func(dur time.Duration) { c := make(chan int) go func() { cpuHogger(cpuHog1, &salt1, dur) @@ -101,7 +101,7 @@ func TestCPUProfileMultithreaded(t *testing.T) { } func TestCPUProfileInlining(t *testing.T) { - testCPUProfile(t, []string{"pprof.inlinedCallee", "pprof.inlinedCaller"}, avoidFunctions(), func(dur time.Duration) { + testCPUProfile(t, stackContains, []string{"pprof.inlinedCallee", "pprof.inlinedCaller"}, avoidFunctions(), func(dur time.Duration) { cpuHogger(inlinedCaller, &salt1, dur) }) } @@ -139,7 +139,9 @@ func parseProfile(t *testing.T, valBytes []byte, f func(uintptr, []*profile.Loca } } -func testCPUProfile(t *testing.T, need []string, avoid []string, f func(dur time.Duration)) { +// testCPUProfile runs f under the CPU profiler, checking for some conditions specified by need, +// as interpreted by matches. +func testCPUProfile(t *testing.T, matches matchFunc, need []string, avoid []string, f func(dur time.Duration)) { switch runtime.GOOS { case "darwin": switch runtime.GOARCH { @@ -178,7 +180,7 @@ func testCPUProfile(t *testing.T, need []string, avoid []string, f func(dur time f(duration) StopCPUProfile() - if profileOk(t, need, avoid, prof, duration) { + if profileOk(t, matches, need, avoid, prof, duration) { return } @@ -191,6 +193,10 @@ func testCPUProfile(t *testing.T, need []string, avoid []string, f func(dur time switch runtime.GOOS { case "darwin", "dragonfly", "netbsd", "solaris": t.Skipf("ignoring failure on %s; see golang.org/issue/13841", runtime.GOOS) + case "openbsd": + if runtime.GOARCH == "arm" { + t.Skipf("ignoring failure on %s/%s; see golang.org/issue/13841", runtime.GOOS, runtime.GOARCH) + } } // Ignore the failure if the tests are running in a QEMU-based emulator, // QEMU is not perfect at emulating everything. @@ -211,7 +217,21 @@ func contains(slice []string, s string) bool { return false } -func profileOk(t *testing.T, need []string, avoid []string, prof bytes.Buffer, duration time.Duration) (ok bool) { +// stackContains matches if a function named spec appears anywhere in the stack trace. +func stackContains(spec string, count uintptr, stk []*profile.Location, labels map[string][]string) bool { + for _, loc := range stk { + for _, line := range loc.Line { + if strings.Contains(line.Function.Name, spec) { + return true + } + } + } + return false +} + +type matchFunc func(spec string, count uintptr, stk []*profile.Location, labels map[string][]string) bool + +func profileOk(t *testing.T, matches matchFunc, need []string, avoid []string, prof bytes.Buffer, duration time.Duration) (ok bool) { ok = true // Check that profile is well formed, contains 'need', and does not contain @@ -224,18 +244,16 @@ func profileOk(t *testing.T, need []string, avoid []string, prof bytes.Buffer, d fmt.Fprintf(&buf, "%d:", count) fprintStack(&buf, stk) samples += count - for i, name := range need { - if semi := strings.Index(name, ";"); semi > -1 { - kv := strings.SplitN(name[semi+1:], "=", 2) - if len(kv) != 2 || !contains(labels[kv[0]], kv[1]) { - continue - } - name = name[:semi] + for i, spec := range need { + if matches(spec, count, stk, labels) { + have[i] += count } + } + for i, name := range avoid { for _, loc := range stk { for _, line := range loc.Line { if strings.Contains(line.Function.Name, name) { - have[i] += count + avoidSamples[i] += count } } } @@ -316,6 +334,10 @@ func TestCPUProfileWithFork(t *testing.T) { // Use smaller size for Android to avoid crash. heap = 100 << 20 } + if runtime.GOOS == "windows" && runtime.GOARCH == "arm" { + // Use smaller heap for Windows/ARM to avoid crash. + heap = 100 << 20 + } if testing.Short() { heap = 100 << 20 } @@ -408,7 +430,7 @@ func fprintStack(w io.Writer, stk []*profile.Location) { // Test that profiling of division operations is okay, especially on ARM. See issue 6681. func TestMathBigDivide(t *testing.T) { - testCPUProfile(t, nil, nil, func(duration time.Duration) { + testCPUProfile(t, nil, nil, nil, func(duration time.Duration) { t := time.After(duration) pi := new(big.Int) for { @@ -426,6 +448,51 @@ func TestMathBigDivide(t *testing.T) { }) } +// stackContainsAll matches if all functions in spec (comma-separated) appear somewhere in the stack trace. +func stackContainsAll(spec string, count uintptr, stk []*profile.Location, labels map[string][]string) bool { + for _, f := range strings.Split(spec, ",") { + if !stackContains(f, count, stk, labels) { + return false + } + } + return true +} + +func TestMorestack(t *testing.T) { + if runtime.Compiler == "gccgo" { + t.Skip("no runtime.newstack in gccgo") + } + testCPUProfile(t, stackContainsAll, []string{"runtime.newstack,runtime/pprof.growstack"}, avoidFunctions(), func(duration time.Duration) { + t := time.After(duration) + c := make(chan bool) + for { + go func() { + growstack1() + c <- true + }() + select { + case <-t: + return + case <-c: + } + } + }) +} + +//go:noinline +func growstack1() { + growstack() +} + +//go:noinline +func growstack() { + var buf [8 << 10]byte + use(buf) +} + +//go:noinline +func use(x [8 << 10]byte) {} + func TestBlockProfile(t *testing.T) { t.Skip("lots of details are different for gccgo; FIXME") type TestCase struct { @@ -556,7 +623,7 @@ func TestBlockProfile(t *testing.T) { } for _, test := range tests { - if !regexp.MustCompile(strings.Replace(test.re, "\t", "\t+", -1)).MatchString(prof) { + if !regexp.MustCompile(strings.ReplaceAll(test.re, "\t", "\t+")).MatchString(prof) { t.Errorf("Bad %v entry, expect:\n%v\ngot:\n%v", test.name, test.re, prof) } } @@ -887,8 +954,25 @@ func TestEmptyCallStack(t *testing.T) { } } +// stackContainsLabeled takes a spec like funcname;key=value and matches if the stack has that key +// and value and has funcname somewhere in the stack. +func stackContainsLabeled(spec string, count uintptr, stk []*profile.Location, labels map[string][]string) bool { + semi := strings.Index(spec, ";") + if semi == -1 { + panic("no semicolon in key/value spec") + } + kv := strings.SplitN(spec[semi+1:], "=", 2) + if len(kv) != 2 { + panic("missing = in key/value spec") + } + if !contains(labels[kv[0]], kv[1]) { + return false + } + return stackContains(spec[:semi], count, stk, labels) +} + func TestCPUProfileLabel(t *testing.T) { - testCPUProfile(t, []string{"pprof.cpuHogger;key=value"}, avoidFunctions(), func(dur time.Duration) { + testCPUProfile(t, stackContainsLabeled, []string{"pprof.cpuHogger;key=value"}, avoidFunctions(), func(dur time.Duration) { Do(context.Background(), Labels("key", "value"), func(context.Context) { cpuHogger(cpuHog1, &salt1, dur) }) @@ -899,7 +983,7 @@ func TestLabelRace(t *testing.T) { // Test the race detector annotations for synchronization // between settings labels and consuming them from the // profile. - testCPUProfile(t, []string{"pprof.cpuHogger;key=value"}, avoidFunctions(), func(dur time.Duration) { + testCPUProfile(t, stackContainsLabeled, []string{"pprof.cpuHogger;key=value"}, nil, func(dur time.Duration) { start := time.Now() var wg sync.WaitGroup for time.Since(start) < dur { diff --git a/libgo/go/runtime/pprof/proto.go b/libgo/go/runtime/pprof/proto.go index d8456befd3d..b82e738f941 100644 --- a/libgo/go/runtime/pprof/proto.go +++ b/libgo/go/runtime/pprof/proto.go @@ -213,7 +213,7 @@ func (b *profileBuilder) pbMapping(tag int, id, base, limit, offset uint64, file } // locForPC returns the location ID for addr. -// addr must be a return PC. This returns the location of the call. +// addr must a return PC or 1 + the PC of an inline marker. This returns the location of the corresponding call. // It may emit to b.pb, so there must be no message encoding in progress. func (b *profileBuilder) locForPC(addr uintptr) uint64 { id := uint64(b.locs[addr]) @@ -529,6 +529,14 @@ func parseProcSelfMaps(data []byte, addMapping func(lo, hi, offset uint64, file, continue } file := string(line) + + // Trim deleted file marker. + deletedStr := " (deleted)" + deletedLen := len(deletedStr) + if len(file) >= deletedLen && file[len(file)-deletedLen:] == deletedStr { + file = file[:len(file)-deletedLen] + } + if len(inode) == 1 && inode[0] == '0' && file == "" { // Huge-page text mappings list the initial fragment of // mapped but unpopulated memory as being inode 0. diff --git a/libgo/go/runtime/pprof/proto_test.go b/libgo/go/runtime/pprof/proto_test.go index 604628ce457..5a915fb4c3a 100644 --- a/libgo/go/runtime/pprof/proto_test.go +++ b/libgo/go/runtime/pprof/proto_test.go @@ -218,24 +218,89 @@ c000000000-c000036000 rw-p 00000000 00:00 0 07000000 07093000 06c00000 /path/to/gobench_server_main ` +var profSelfMapsTestsWithDeleted = ` +00400000-0040b000 r-xp 00000000 fc:01 787766 /bin/cat (deleted) +0060a000-0060b000 r--p 0000a000 fc:01 787766 /bin/cat (deleted) +0060b000-0060c000 rw-p 0000b000 fc:01 787766 /bin/cat (deleted) +014ab000-014cc000 rw-p 00000000 00:00 0 [heap] +7f7d76af8000-7f7d7797c000 r--p 00000000 fc:01 1318064 /usr/lib/locale/locale-archive +7f7d7797c000-7f7d77b36000 r-xp 00000000 fc:01 1180226 /lib/x86_64-linux-gnu/libc-2.19.so +7f7d77b36000-7f7d77d36000 ---p 001ba000 fc:01 1180226 /lib/x86_64-linux-gnu/libc-2.19.so +7f7d77d36000-7f7d77d3a000 r--p 001ba000 fc:01 1180226 /lib/x86_64-linux-gnu/libc-2.19.so +7f7d77d3a000-7f7d77d3c000 rw-p 001be000 fc:01 1180226 /lib/x86_64-linux-gnu/libc-2.19.so +7f7d77d3c000-7f7d77d41000 rw-p 00000000 00:00 0 +7f7d77d41000-7f7d77d64000 r-xp 00000000 fc:01 1180217 /lib/x86_64-linux-gnu/ld-2.19.so +7f7d77f3f000-7f7d77f42000 rw-p 00000000 00:00 0 +7f7d77f61000-7f7d77f63000 rw-p 00000000 00:00 0 +7f7d77f63000-7f7d77f64000 r--p 00022000 fc:01 1180217 /lib/x86_64-linux-gnu/ld-2.19.so +7f7d77f64000-7f7d77f65000 rw-p 00023000 fc:01 1180217 /lib/x86_64-linux-gnu/ld-2.19.so +7f7d77f65000-7f7d77f66000 rw-p 00000000 00:00 0 +7ffc342a2000-7ffc342c3000 rw-p 00000000 00:00 0 [stack] +7ffc34343000-7ffc34345000 r-xp 00000000 00:00 0 [vdso] +ffffffffff600000-ffffffffff601000 r-xp 00000090 00:00 0 [vsyscall] +-> +00400000 0040b000 00000000 /bin/cat +7f7d7797c000 7f7d77b36000 00000000 /lib/x86_64-linux-gnu/libc-2.19.so +7f7d77d41000 7f7d77d64000 00000000 /lib/x86_64-linux-gnu/ld-2.19.so +7ffc34343000 7ffc34345000 00000000 [vdso] +ffffffffff600000 ffffffffff601000 00000090 [vsyscall] + +00400000-0040b000 r-xp 00000000 fc:01 787766 /bin/cat with space +0060a000-0060b000 r--p 0000a000 fc:01 787766 /bin/cat with space +0060b000-0060c000 rw-p 0000b000 fc:01 787766 /bin/cat with space +014ab000-014cc000 rw-p 00000000 00:00 0 [heap] +7f7d76af8000-7f7d7797c000 r--p 00000000 fc:01 1318064 /usr/lib/locale/locale-archive +7f7d7797c000-7f7d77b36000 r-xp 00000000 fc:01 1180226 /lib/x86_64-linux-gnu/libc-2.19.so +7f7d77b36000-7f7d77d36000 ---p 001ba000 fc:01 1180226 /lib/x86_64-linux-gnu/libc-2.19.so +7f7d77d36000-7f7d77d3a000 r--p 001ba000 fc:01 1180226 /lib/x86_64-linux-gnu/libc-2.19.so +7f7d77d3a000-7f7d77d3c000 rw-p 001be000 fc:01 1180226 /lib/x86_64-linux-gnu/libc-2.19.so +7f7d77d3c000-7f7d77d41000 rw-p 00000000 00:00 0 +7f7d77d41000-7f7d77d64000 r-xp 00000000 fc:01 1180217 /lib/x86_64-linux-gnu/ld-2.19.so +7f7d77f3f000-7f7d77f42000 rw-p 00000000 00:00 0 +7f7d77f61000-7f7d77f63000 rw-p 00000000 00:00 0 +7f7d77f63000-7f7d77f64000 r--p 00022000 fc:01 1180217 /lib/x86_64-linux-gnu/ld-2.19.so +7f7d77f64000-7f7d77f65000 rw-p 00023000 fc:01 1180217 /lib/x86_64-linux-gnu/ld-2.19.so +7f7d77f65000-7f7d77f66000 rw-p 00000000 00:00 0 +7ffc342a2000-7ffc342c3000 rw-p 00000000 00:00 0 [stack] +7ffc34343000-7ffc34345000 r-xp 00000000 00:00 0 [vdso] +ffffffffff600000-ffffffffff601000 r-xp 00000090 00:00 0 [vsyscall] +-> +00400000 0040b000 00000000 /bin/cat with space +7f7d7797c000 7f7d77b36000 00000000 /lib/x86_64-linux-gnu/libc-2.19.so +7f7d77d41000 7f7d77d64000 00000000 /lib/x86_64-linux-gnu/ld-2.19.so +7ffc34343000 7ffc34345000 00000000 [vdso] +ffffffffff600000 ffffffffff601000 00000090 [vsyscall] +` + func TestProcSelfMaps(t *testing.T) { - for tx, tt := range strings.Split(profSelfMapsTests, "\n\n") { - i := strings.Index(tt, "->\n") - if i < 0 { - t.Fatal("malformed test case") - } - in, out := tt[:i], tt[i+len("->\n"):] - if len(out) > 0 && out[len(out)-1] != '\n' { - out += "\n" - } - var buf bytes.Buffer - parseProcSelfMaps([]byte(in), func(lo, hi, offset uint64, file, buildID string) { - fmt.Fprintf(&buf, "%08x %08x %08x %s\n", lo, hi, offset, file) - }) - if buf.String() != out { - t.Errorf("#%d: have:\n%s\nwant:\n%s\n%q\n%q", tx, buf.String(), out, buf.String(), out) + + f := func(t *testing.T, input string) { + for tx, tt := range strings.Split(input, "\n\n") { + i := strings.Index(tt, "->\n") + if i < 0 { + t.Fatal("malformed test case") + } + in, out := tt[:i], tt[i+len("->\n"):] + if len(out) > 0 && out[len(out)-1] != '\n' { + out += "\n" + } + var buf bytes.Buffer + parseProcSelfMaps([]byte(in), func(lo, hi, offset uint64, file, buildID string) { + fmt.Fprintf(&buf, "%08x %08x %08x %s\n", lo, hi, offset, file) + }) + if buf.String() != out { + t.Errorf("#%d: have:\n%s\nwant:\n%s\n%q\n%q", tx, buf.String(), out, buf.String(), out) + } } } + + t.Run("Normal", func(t *testing.T) { + f(t, profSelfMapsTests) + }) + + t.Run("WithDeletedFile", func(t *testing.T) { + f(t, profSelfMapsTestsWithDeleted) + }) } // TestMapping checkes the mapping section of CPU profiles diff --git a/libgo/go/runtime/pprof/protomem.go b/libgo/go/runtime/pprof/protomem.go index 82565d5245b..1c88aae43a0 100644 --- a/libgo/go/runtime/pprof/protomem.go +++ b/libgo/go/runtime/pprof/protomem.go @@ -56,8 +56,8 @@ func writeHeapProto(w io.Writer, p []runtime.MemProfileRecord, rate int64, defau values[0], values[1] = scaleHeapSample(r.AllocObjects, r.AllocBytes, rate) values[2], values[3] = scaleHeapSample(r.InUseObjects(), r.InUseBytes(), rate) var blockSize int64 - if values[0] > 0 { - blockSize = values[1] / values[0] + if r.AllocObjects > 0 { + blockSize = r.AllocBytes / r.AllocObjects } b.pbSample(values, locs, func() { if blockSize != 0 { diff --git a/libgo/go/runtime/pprof/protomem_test.go b/libgo/go/runtime/pprof/protomem_test.go index 315d5f0b4d8..471b1ae9c32 100644 --- a/libgo/go/runtime/pprof/protomem_test.go +++ b/libgo/go/runtime/pprof/protomem_test.go @@ -48,7 +48,7 @@ func TestConvertMemProfile(t *testing.T) { {ID: 3, Mapping: map2, Address: addr2 + 1}, {ID: 4, Mapping: map2, Address: addr2 + 2}, }, - NumLabel: map[string][]int64{"bytes": {829411}}, + NumLabel: map[string][]int64{"bytes": {512 * 1024}}, }, { Value: []int64{1, 829411, 0, 0}, @@ -57,7 +57,7 @@ func TestConvertMemProfile(t *testing.T) { {ID: 6, Mapping: map1, Address: addr1 + 2}, {ID: 7, Mapping: map2, Address: addr2 + 3}, }, - NumLabel: map[string][]int64{"bytes": {829411}}, + NumLabel: map[string][]int64{"bytes": {512 * 1024}}, }, } for _, tc := range []struct { diff --git a/libgo/go/runtime/print.go b/libgo/go/runtime/print.go index 3da05ad5f9e..8c396442862 100644 --- a/libgo/go/runtime/print.go +++ b/libgo/go/runtime/print.go @@ -110,7 +110,12 @@ func gwrite(b []byte) { } recordForPanic(b) gp := getg() - if gp == nil || gp.writebuf == nil { + // Don't use the writebuf if gp.m is dying. We want anything + // written through gwrite to appear in the terminal rather + // than be written to in some buffer, if we're in a panicking state. + // Note that we can't just clear writebuf in the gp.m.dying case + // because a panic isn't allowed to have any write barriers. + if gp == nil || gp.writebuf == nil || gp.m.dying > 0 { writeErr(b) return } diff --git a/libgo/go/runtime/proc.go b/libgo/go/runtime/proc.go index 80b04ab4a5d..05dd53d886f 100644 --- a/libgo/go/runtime/proc.go +++ b/libgo/go/runtime/proc.go @@ -20,7 +20,6 @@ import ( //go:linkname incidlelocked runtime.incidlelocked //go:linkname schedinit runtime.schedinit //go:linkname ready runtime.ready -//go:linkname gcprocs runtime.gcprocs //go:linkname stopm runtime.stopm //go:linkname handoffp runtime.handoffp //go:linkname wakep runtime.wakep @@ -32,7 +31,6 @@ import ( //go:linkname reentersyscallblock runtime.reentersyscallblock //go:linkname exitsyscall runtime.exitsyscall //go:linkname gfget runtime.gfget -//go:linkname helpgc runtime.helpgc //go:linkname kickoff runtime.kickoff //go:linkname mstart1 runtime.mstart1 //go:linkname mexit runtime.mexit @@ -196,8 +194,7 @@ func main() { } }() - // Record when the world started. Must be after runtime_init - // because nanotime on some platforms depends on startNano. + // Record when the world started. runtimeInitTime = nanotime() main_init_done = make(chan bool) @@ -285,7 +282,7 @@ func forcegchelper() { println("GC forced") } // Time-triggered, fully concurrent. - gcStart(gcBackgroundMode, gcTrigger{kind: gcTriggerTime, now: nanotime()}) + gcStart(gcTrigger{kind: gcTriggerTime, now: nanotime()}) } } @@ -479,17 +476,18 @@ const ( _GoidCacheBatch = 16 ) -// cpuinit extracts the environment variable GODEBUGCPU from the environment on -// Linux and Darwin if the GOEXPERIMENT debugcpu was set and calls internal/cpu.Initialize. +// cpuinit extracts the environment variable GODEBUG from the environment on +// Unix-like operating systems and calls internal/cpu.Initialize. func cpuinit() { - const prefix = "GODEBUGCPU=" + const prefix = "GODEBUG=" var env string - if haveexperiment("debugcpu") && (GOOS == "linux" || GOOS == "darwin") { + switch GOOS { + case "aix", "darwin", "dragonfly", "freebsd", "netbsd", "openbsd", "solaris", "linux": cpu.DebugOptions = true // Similar to goenv_unix but extracts the environment value for - // GODEBUGCPU directly. + // GODEBUG directly. // TODO(moehrmann): remove when general goenvs() can be called before cpuinit() n := int32(0) for argv_index(argv, argc+1+n) != nil { @@ -500,7 +498,7 @@ func cpuinit() { p := argv_index(argv, argc+1+i) s := *(*string)(unsafe.Pointer(&stringStruct{unsafe.Pointer(p), findnull(p)})) - if hasprefix(s, prefix) { + if hasPrefix(s, prefix) { env = gostring(p)[len(prefix):] break } @@ -643,59 +641,6 @@ func ready(gp *g, traceskip int, next bool) { _g_.m.locks-- } -func gcprocs() int32 { - // Figure out how many CPUs to use during GC. - // Limited by gomaxprocs, number of actual CPUs, and MaxGcproc. - lock(&sched.lock) - n := gomaxprocs - if n > ncpu { - n = ncpu - } - if n > _MaxGcproc { - n = _MaxGcproc - } - if n > sched.nmidle+1 { // one M is currently running - n = sched.nmidle + 1 - } - unlock(&sched.lock) - return n -} - -func needaddgcproc() bool { - lock(&sched.lock) - n := gomaxprocs - if n > ncpu { - n = ncpu - } - if n > _MaxGcproc { - n = _MaxGcproc - } - n -= sched.nmidle + 1 // one M is currently running - unlock(&sched.lock) - return n > 0 -} - -func helpgc(nproc int32) { - _g_ := getg() - lock(&sched.lock) - pos := 0 - for n := int32(1); n < nproc; n++ { // one M is currently running - if allp[pos].mcache == _g_.m.mcache { - pos++ - } - mp := mget() - if mp == nil { - throw("gcprocs inconsistency") - } - mp.helpgc = n - mp.p.set(allp[pos]) - mp.mcache = allp[pos].mcache - pos++ - notewakeup(&mp.park) - } - unlock(&sched.lock) -} - // freezeStopWait is a large value that freezetheworld sets // sched.stopwait to in order to request that all Gs permanently stop. const freezeStopWait = 0x7fffffff @@ -1154,20 +1099,14 @@ func stopTheWorldWithSema() { } } -func mhelpgc() { - _g_ := getg() - _g_.m.helpgc = -1 -} - func startTheWorldWithSema(emitTraceEvent bool) int64 { _g_ := getg() _g_.m.locks++ // disable preemption because it can be holding p in a local var if netpollinited() { - gp := netpoll(false) // non-blocking - injectglist(gp) + list := netpoll(false) // non-blocking + injectglist(&list) } - add := needaddgcproc() lock(&sched.lock) procs := gomaxprocs @@ -1197,7 +1136,6 @@ func startTheWorldWithSema(emitTraceEvent bool) int64 { } else { // Start M to run P. Do not start another M below. newm(nil, p) - add = false } } @@ -1214,16 +1152,6 @@ func startTheWorldWithSema(emitTraceEvent bool) int64 { wakep() } - if add { - // If GC could have used another helper proc, start one now, - // in the hope that it will be available next time. - // It would have been even better to start it before the collection, - // but doing so requires allocating memory, so it's tricky to - // coordinate. This lazy approach works out in practice: - // we don't mind if the first couple gc rounds don't have quite - // the maximum number of procs. - newm(mhelpgc, nil) - } _g_.m.locks-- return startTime @@ -1288,10 +1216,7 @@ func mstart1() { fn() } - if _g_.m.helpgc != 0 { - _g_.m.helpgc = 0 - stopm() - } else if _g_.m != &m0 { + if _g_.m != &m0 { acquirep(_g_.m.nextp.ptr()) _g_.m.nextp = 0 } @@ -1591,7 +1516,7 @@ func allocm(_p_ *p, fn func(), allocatestack bool) (mp *m, g0Stack unsafe.Pointe // the following strategy: there is a stack of available m's // that can be stolen. Using compare-and-swap // to pop from the stack has ABA races, so we simulate -// a lock by doing an exchange (via casp) to steal the stack +// a lock by doing an exchange (via Casuintptr) to steal the stack // head and replace the top pointer with MLOCKED (1). // This serves as a simple spin lock that we can use even // without an m. The thread that locks the stack in this way @@ -1925,7 +1850,7 @@ func startTemplateThread() { // templateThread is a thread in a known-good state that exists solely // to start new threads in known-good states when the calling thread -// may not be a a good state. +// may not be in a good state. // // Many programs never need this, so templateThread is started lazily // when we first enter a state that might lead to running on a thread @@ -1977,21 +1902,11 @@ func stopm() { throw("stopm spinning") } -retry: lock(&sched.lock) mput(_g_.m) unlock(&sched.lock) notesleep(&_g_.m.park) noteclear(&_g_.m.park) - if _g_.m.helpgc != 0 { - // helpgc() set _g_.m.p and _g_.m.mcache, so we have a P. - gchelper() - // Undo the effects of helpgc(). - _g_.m.helpgc = 0 - _g_.m.mcache = nil - _g_.m.p = 0 - goto retry - } acquirep(_g_.m.nextp.ptr()) _g_.m.nextp = 0 } @@ -2279,9 +2194,9 @@ top: // not set lastpoll yet), this thread will do blocking netpoll below // anyway. if netpollinited() && atomic.Load(&netpollWaiters) > 0 && atomic.Load64(&sched.lastpoll) != 0 { - if gp := netpoll(false); gp != nil { // non-blocking - // netpoll returns list of goroutines linked by schedlink. - injectglist(gp.schedlink.ptr()) + if list := netpoll(false); !list.empty() { // non-blocking + gp := list.pop() + injectglist(&list) casgstatus(gp, _Gwaiting, _Grunnable) if trace.enabled { traceGoUnpark(gp, 0) @@ -2336,10 +2251,10 @@ stop: } // wasm only: - // Check if a goroutine is waiting for a callback from the WebAssembly host. - // If yes, pause the execution until a callback was triggered. - if pauseSchedulerUntilCallback() { - // A callback was triggered and caused at least one goroutine to wake up. + // If a callback returned and no other goroutine is awake, + // then pause execution until a callback was triggered. + if beforeIdle() { + // At least one goroutine got woken. goto top } @@ -2433,29 +2348,30 @@ stop: if _g_.m.spinning { throw("findrunnable: netpoll with spinning") } - gp := netpoll(true) // block until new work is available + list := netpoll(true) // block until new work is available atomic.Store64(&sched.lastpoll, uint64(nanotime())) - if gp != nil { + if !list.empty() { lock(&sched.lock) _p_ = pidleget() unlock(&sched.lock) if _p_ != nil { acquirep(_p_) - injectglist(gp.schedlink.ptr()) + gp := list.pop() + injectglist(&list) casgstatus(gp, _Gwaiting, _Grunnable) if trace.enabled { traceGoUnpark(gp, 0) } return gp, false } - injectglist(gp) + injectglist(&list) } } stopm() goto top } -// pollWork returns true if there is non-background work this P could +// pollWork reports whether there is non-background work this P could // be doing. This is a fairly lightweight check to be used for // background work loops, like idle GC. It checks a subset of the // conditions checked by the actual scheduler. @@ -2468,8 +2384,8 @@ func pollWork() bool { return true } if netpollinited() && atomic.Load(&netpollWaiters) > 0 && sched.lastpoll != 0 { - if gp := netpoll(false); gp != nil { - injectglist(gp) + if list := netpoll(false); !list.empty() { + injectglist(&list) return true } } @@ -2494,22 +2410,21 @@ func resetspinning() { } } -// Injects the list of runnable G's into the scheduler. +// Injects the list of runnable G's into the scheduler and clears glist. // Can run concurrently with GC. -func injectglist(glist *g) { - if glist == nil { +func injectglist(glist *gList) { + if glist.empty() { return } if trace.enabled { - for gp := glist; gp != nil; gp = gp.schedlink.ptr() { + for gp := glist.head.ptr(); gp != nil; gp = gp.schedlink.ptr() { traceGoUnpark(gp, 0) } } lock(&sched.lock) var n int - for n = 0; glist != nil; n++ { - gp := glist - glist = gp.schedlink.ptr() + for n = 0; !glist.empty(); n++ { + gp := glist.pop() casgstatus(gp, _Gwaiting, _Grunnable) globrunqput(gp) } @@ -2517,6 +2432,7 @@ func injectglist(glist *g) { for ; n != 0 && sched.npidle != 0; n-- { startm(nil, false) } + *glist = gList{} } // One round of scheduler: find a runnable goroutine and execute it. @@ -2602,6 +2518,23 @@ top: resetspinning() } + if sched.disable.user && !schedEnabled(gp) { + // Scheduling of this goroutine is disabled. Put it on + // the list of pending runnable goroutines for when we + // re-enable user scheduling and look again. + lock(&sched.lock) + if schedEnabled(gp) { + // Something re-enabled scheduling while we + // were acquiring the lock. + unlock(&sched.lock) + } else { + sched.disable.runnable.pushBack(gp) + sched.disable.n++ + unlock(&sched.lock) + goto top + } + } + if gp.lockedm != 0 { // Hands off own p to the locked m, // then blocks waiting for a new p. @@ -2714,7 +2647,7 @@ func goexit0(gp *g) { _g_ := getg() casgstatus(gp, _Grunning, _Gdead) - if isSystemGoroutine(gp) { + if isSystemGoroutine(gp, false) { atomic.Xadd(&sched.ngsys, -1) gp.isSystemGoroutine = false } @@ -2755,7 +2688,6 @@ func goexit0(gp *g) { print("invalid m->lockedInt = ", _g_.m.lockedInt, "\n") throw("internal lockOSThread error") } - _g_.m.lockedExt = 0 gfput(_g_.m.p.ptr(), gp) if locked { // The goroutine may have locked this thread because @@ -2767,6 +2699,10 @@ func goexit0(gp *g) { if GOOS != "plan9" { // See golang.org/issue/22227. _g_.m.exiting = true gogo(_g_.m.g0) + } else { + // Clear lockedExt on plan9 since we may end up re-using + // this thread. + _g_.m.lockedExt = 0 } } schedule() @@ -2823,8 +2759,11 @@ func reentersyscall(pc, sp uintptr) { _g_.m.syscalltick = _g_.m.p.ptr().syscalltick _g_.sysblocktraced = true _g_.m.mcache = nil - _g_.m.p.ptr().m = 0 - atomic.Store(&_g_.m.p.ptr().status, _Psyscall) + pp := _g_.m.p.ptr() + pp.m = 0 + _g_.m.oldp.set(pp) + _g_.m.p = 0 + atomic.Store(&pp.status, _Psyscall) if sched.gcwaiting != 0 { systemstack(entersyscall_gcwait) } @@ -2843,7 +2782,7 @@ func entersyscall_sysmon() { func entersyscall_gcwait() { _g_ := getg() - _p_ := _g_.m.p.ptr() + _p_ := _g_.m.oldp.ptr() lock(&sched.lock) if sched.stopwait > 0 && atomic.Cas(&_p_.status, _Psyscall, _Pgcstop) { @@ -2900,8 +2839,9 @@ func exitsyscall() { _g_.m.locks++ // see comment in entersyscall _g_.waitsince = 0 - oldp := _g_.m.p.ptr() - if exitsyscallfast() { + oldp := _g_.m.oldp.ptr() + _g_.m.oldp = 0 + if exitsyscallfast(oldp) { if _g_.m.mcache == nil { throw("lost mcache") } @@ -2924,6 +2864,12 @@ func exitsyscall() { if getg().preempt { checkPreempt() } + _g_.throwsplit = false + + if sched.disable.user && !schedEnabled(_g_) { + // Scheduling of this goroutine is disabled. + Gosched() + } return } @@ -2964,27 +2910,23 @@ func exitsyscall() { } //go:nosplit -func exitsyscallfast() bool { +func exitsyscallfast(oldp *p) bool { _g_ := getg() // Freezetheworld sets stopwait but does not retake P's. if sched.stopwait == freezeStopWait { - _g_.m.mcache = nil - _g_.m.p = 0 return false } // Try to re-acquire the last P. - if _g_.m.p != 0 && _g_.m.p.ptr().status == _Psyscall && atomic.Cas(&_g_.m.p.ptr().status, _Psyscall, _Prunning) { + if oldp != nil && oldp.status == _Psyscall && atomic.Cas(&oldp.status, _Psyscall, _Pidle) { // There's a cpu for us, so we can run. + wirep(oldp) exitsyscallfast_reacquired() return true } // Try to get any other idle P. - oldp := _g_.m.p.ptr() - _g_.m.mcache = nil - _g_.m.p = 0 if sched.pidle != 0 { var ok bool systemstack(func() { @@ -3011,15 +2953,9 @@ func exitsyscallfast() bool { // has successfully reacquired the P it was running on before the // syscall. // -// This function is allowed to have write barriers because exitsyscall -// has acquired a P at this point. -// -//go:yeswritebarrierrec //go:nosplit func exitsyscallfast_reacquired() { _g_ := getg() - _g_.m.mcache = _g_.m.p.ptr().mcache - _g_.m.p.ptr().m.set(_g_.m) if _g_.m.syscalltick != _g_.m.p.ptr().syscalltick { if trace.enabled { // The p was retaken and then enter into syscall again (since _g_.m.syscalltick has changed). @@ -3062,7 +2998,10 @@ func exitsyscall0(gp *g) { dropg() casgstatus(gp, _Gexitingsyscall, _Grunnable) lock(&sched.lock) - _p_ := pidleget() + var _p_ *p + if schedEnabled(_g_) { + _p_ = pidleget() + } if _p_ == nil { globrunqput(gp) } else if atomic.Load(&sched.sysmonwait) != 0 { @@ -3229,11 +3168,12 @@ func newproc(fn uintptr, arg unsafe.Pointer) *g { newg.param = arg newg.gopc = getcallerpc() + newg.ancestors = saveAncestors(_g_) newg.startpc = fn if _g_.m.curg != nil { newg.labels = _g_.m.curg.labels } - if isSystemGoroutine(newg) { + if isSystemGoroutine(newg, false) { atomic.Xadd(&sched.ngsys, +1) } newg.gcscanvalid = false @@ -3342,20 +3282,17 @@ func gfput(_p_ *p, gp *g) { throw("gfput: bad status (not Gdead)") } - gp.schedlink.set(_p_.gfree) - _p_.gfree = gp - _p_.gfreecnt++ - if _p_.gfreecnt >= 64 { - lock(&sched.gflock) - for _p_.gfreecnt >= 32 { - _p_.gfreecnt-- - gp = _p_.gfree - _p_.gfree = gp.schedlink.ptr() - gp.schedlink.set(sched.gfree) - sched.gfree = gp - sched.ngfree++ + _p_.gFree.push(gp) + _p_.gFree.n++ + if _p_.gFree.n >= 64 { + lock(&sched.gFree.lock) + for _p_.gFree.n >= 32 { + _p_.gFree.n-- + gp = _p_.gFree.pop() + sched.gFree.list.push(gp) + sched.gFree.n++ } - unlock(&sched.gflock) + unlock(&sched.gFree.lock) } } @@ -3363,43 +3300,39 @@ func gfput(_p_ *p, gp *g) { // If local list is empty, grab a batch from global list. func gfget(_p_ *p) *g { retry: - gp := _p_.gfree - if gp == nil && sched.gfree != nil { - lock(&sched.gflock) - for _p_.gfreecnt < 32 { - if sched.gfree != nil { - gp = sched.gfree - sched.gfree = gp.schedlink.ptr() - } else { + if _p_.gFree.empty() && !sched.gFree.list.empty() { + lock(&sched.gFree.lock) + // Move a batch of free Gs to the P. + for _p_.gFree.n < 32 { + gp := sched.gFree.list.pop() + if gp == nil { break } - _p_.gfreecnt++ - sched.ngfree-- - gp.schedlink.set(_p_.gfree) - _p_.gfree = gp + sched.gFree.n-- + _p_.gFree.push(gp) + _p_.gFree.n++ } - unlock(&sched.gflock) + unlock(&sched.gFree.lock) goto retry } - if gp != nil { - _p_.gfree = gp.schedlink.ptr() - _p_.gfreecnt-- + gp := _p_.gFree.pop() + if gp == nil { + return nil } + _p_.gFree.n-- return gp } // Purge all cached G's from gfree list to the global list. func gfpurge(_p_ *p) { - lock(&sched.gflock) - for _p_.gfreecnt != 0 { - _p_.gfreecnt-- - gp := _p_.gfree - _p_.gfree = gp.schedlink.ptr() - gp.schedlink.set(sched.gfree) - sched.gfree = gp - sched.ngfree++ + lock(&sched.gFree.lock) + for !_p_.gFree.empty() { + gp := _p_.gFree.pop() + _p_.gFree.n-- + sched.gFree.list.push(gp) + sched.gFree.n++ } - unlock(&sched.gflock) + unlock(&sched.gFree.lock) } // Breakpoint executes a breakpoint trap. @@ -3512,9 +3445,9 @@ func badunlockosthread() { } func gcount() int32 { - n := int32(allglen) - sched.ngfree - int32(atomic.Load(&sched.ngsys)) + n := int32(allglen) - sched.gFree.n - int32(atomic.Load(&sched.ngsys)) for _, _p_ := range allp { - n -= _p_.gfreecnt + n -= _p_.gFree.n } // All these variables can be changed concurrently, so the result can be inconsistent. @@ -3627,7 +3560,7 @@ func sigprof(pc uintptr, gp *g, mp *m) { // Account it against abstract "System" or "GC". n = 2 stk[0] = pc - if mp.preemptoff != "" || mp.helpgc != 0 { + if mp.preemptoff != "" { stk[1] = _GCPC + sys.PCQuantum } else { stk[1] = _SystemPC + sys.PCQuantum @@ -3854,6 +3787,7 @@ func procresize(nprocs int32) *p { if _g_.m.p != 0 && _g_.m.p.ptr().id < nprocs { // continue to use the current P _g_.m.p.ptr().status = _Prunning + _g_.m.p.ptr().mcache.prepareForSweep() } else { // release the current P and acquire allp[0] if _g_.m.p != 0 { @@ -3898,36 +3832,40 @@ func procresize(nprocs int32) *p { //go:yeswritebarrierrec func acquirep(_p_ *p) { // Do the part that isn't allowed to have write barriers. - acquirep1(_p_) + wirep(_p_) - // have p; write barriers now allowed - _g_ := getg() - _g_.m.mcache = _p_.mcache + // Have p; write barriers now allowed. + + // Perform deferred mcache flush before this P can allocate + // from a potentially stale mcache. + _p_.mcache.prepareForSweep() if trace.enabled { traceProcStart() } } -// acquirep1 is the first step of acquirep, which actually acquires -// _p_. This is broken out so we can disallow write barriers for this -// part, since we don't yet have a P. +// wirep is the first step of acquirep, which actually associates the +// current M to _p_. This is broken out so we can disallow write +// barriers for this part, since we don't yet have a P. // //go:nowritebarrierrec -func acquirep1(_p_ *p) { +//go:nosplit +func wirep(_p_ *p) { _g_ := getg() if _g_.m.p != 0 || _g_.m.mcache != nil { - throw("acquirep: already in go") + throw("wirep: already in go") } if _p_.m != 0 || _p_.status != _Pidle { id := int64(0) if _p_.m != 0 { id = _p_.m.ptr().id } - print("acquirep: p->m=", _p_.m, "(", id, ") p->status=", _p_.status, "\n") - throw("acquirep: invalid p state") + print("wirep: p->m=", _p_.m, "(", id, ") p->status=", _p_.status, "\n") + throw("wirep: invalid p state") } + _g_.m.mcache = _p_.mcache _g_.m.p.set(_p_) _p_.m.set(_g_.m) _p_.status = _Prunning @@ -4005,7 +3943,7 @@ func checkdead() { lock(&allglock) for i := 0; i < len(allgs); i++ { gp := allgs[i] - if isSystemGoroutine(gp) { + if isSystemGoroutine(gp, false) { continue } s := readgstatus(gp) @@ -4134,8 +4072,8 @@ func sysmon() { now := nanotime() if netpollinited() && lastpoll != 0 && lastpoll+10*1000*1000 < now { atomic.Cas64(&sched.lastpoll, uint64(lastpoll), uint64(now)) - gp := netpoll(false) // non-blocking - returns list of goroutines - if gp != nil { + list := netpoll(false) // non-blocking - returns list of goroutines + if !list.empty() { // Need to decrement number of idle locked M's // (pretending that one more is running) before injectglist. // Otherwise it can lead to the following situation: @@ -4144,7 +4082,7 @@ func sysmon() { // observes that there is no work to do and no other running M's // and reports deadlock. incidlelocked(-1) - injectglist(gp) + injectglist(&list) incidlelocked(1) } } @@ -4159,8 +4097,9 @@ func sysmon() { if t := (gcTrigger{kind: gcTriggerTime, now: now}); t.test() && atomic.Load(&forcegc.idle) != 0 { lock(&forcegc.lock) forcegc.idle = 0 - forcegc.g.schedlink = 0 - injectglist(forcegc.g) + var list gList + list.push(forcegc.g) + injectglist(&list) unlock(&forcegc.lock) } // scavenge heap once in a while @@ -4337,7 +4276,7 @@ func schedtrace(detailed bool) { if mp != nil { id = mp.id } - print(" P", i, ": status=", _p_.status, " schedtick=", _p_.schedtick, " syscalltick=", _p_.syscalltick, " m=", id, " runqsize=", t-h, " gfreecnt=", _p_.gfreecnt, "\n") + print(" P", i, ": status=", _p_.status, " schedtick=", _p_.schedtick, " syscalltick=", _p_.syscalltick, " m=", id, " runqsize=", t-h, " gfreecnt=", _p_.gFree.n, "\n") } else { // In non-detailed mode format lengths of per-P run queues as: // [len1 len2 len3 len4] @@ -4373,7 +4312,7 @@ func schedtrace(detailed bool) { if lockedg != nil { id3 = lockedg.goid } - print(" M", mp.id, ": p=", id1, " curg=", id2, " mallocing=", mp.mallocing, " throwing=", mp.throwing, " preemptoff=", mp.preemptoff, ""+" locks=", mp.locks, " dying=", mp.dying, " helpgc=", mp.helpgc, " spinning=", mp.spinning, " blocked=", mp.blocked, " lockedg=", id3, "\n") + print(" M", mp.id, ": p=", id1, " curg=", id2, " mallocing=", mp.mallocing, " throwing=", mp.throwing, " preemptoff=", mp.preemptoff, ""+" locks=", mp.locks, " dying=", mp.dying, " spinning=", mp.spinning, " blocked=", mp.blocked, " lockedg=", id3, "\n") } lock(&allglock) @@ -4395,6 +4334,40 @@ func schedtrace(detailed bool) { unlock(&sched.lock) } +// schedEnableUser enables or disables the scheduling of user +// goroutines. +// +// This does not stop already running user goroutines, so the caller +// should first stop the world when disabling user goroutines. +func schedEnableUser(enable bool) { + lock(&sched.lock) + if sched.disable.user == !enable { + unlock(&sched.lock) + return + } + sched.disable.user = !enable + if enable { + n := sched.disable.n + sched.disable.n = 0 + globrunqputbatch(&sched.disable.runnable, n) + unlock(&sched.lock) + for ; n != 0 && sched.npidle != 0; n-- { + startm(nil, false) + } + } else { + unlock(&sched.lock) + } +} + +// schedEnabled reports whether gp should be scheduled. It returns +// false is scheduling of gp is disabled. +func schedEnabled(gp *g) bool { + if sched.disable.user { + return isSystemGoroutine(gp, true) + } + return true +} + // Put mp on midle list. // Sched must be locked. // May run during STW, so write barriers are not allowed. @@ -4424,13 +4397,7 @@ func mget() *m { // May run during STW, so write barriers are not allowed. //go:nowritebarrierrec func globrunqput(gp *g) { - gp.schedlink = 0 - if sched.runqtail != 0 { - sched.runqtail.ptr().schedlink.set(gp) - } else { - sched.runqhead.set(gp) - } - sched.runqtail.set(gp) + sched.runq.pushBack(gp) sched.runqsize++ } @@ -4439,25 +4406,17 @@ func globrunqput(gp *g) { // May run during STW, so write barriers are not allowed. //go:nowritebarrierrec func globrunqputhead(gp *g) { - gp.schedlink = sched.runqhead - sched.runqhead.set(gp) - if sched.runqtail == 0 { - sched.runqtail.set(gp) - } + sched.runq.push(gp) sched.runqsize++ } // Put a batch of runnable goroutines on the global runnable queue. +// This clears *batch. // Sched must be locked. -func globrunqputbatch(ghead *g, gtail *g, n int32) { - gtail.schedlink = 0 - if sched.runqtail != 0 { - sched.runqtail.ptr().schedlink.set(ghead) - } else { - sched.runqhead.set(ghead) - } - sched.runqtail.set(gtail) +func globrunqputbatch(batch *gQueue, n int32) { + sched.runq.pushBackAll(*batch) sched.runqsize += n + *batch = gQueue{} } // Try get a batch of G's from the global runnable queue. @@ -4479,16 +4438,11 @@ func globrunqget(_p_ *p, max int32) *g { } sched.runqsize -= n - if sched.runqsize == 0 { - sched.runqtail = 0 - } - gp := sched.runqhead.ptr() - sched.runqhead = gp.schedlink + gp := sched.runq.pop() n-- for ; n > 0; n-- { - gp1 := sched.runqhead.ptr() - sched.runqhead = gp1.schedlink + gp1 := sched.runq.pop() runqput(_p_, gp1, false) } return gp @@ -4520,7 +4474,7 @@ func pidleget() *p { return _p_ } -// runqempty returns true if _p_ has no Gs on its local run queue. +// runqempty reports whether _p_ has no Gs on its local run queue. // It never returns true spuriously. func runqempty(_p_ *p) bool { // Defend against a race where 1) _p_ has G1 in runqnext but runqhead == runqtail, @@ -4572,11 +4526,11 @@ func runqput(_p_ *p, gp *g, next bool) { } retry: - h := atomic.Load(&_p_.runqhead) // load-acquire, synchronize with consumers + h := atomic.LoadAcq(&_p_.runqhead) // load-acquire, synchronize with consumers t := _p_.runqtail if t-h < uint32(len(_p_.runq)) { _p_.runq[t%uint32(len(_p_.runq))].set(gp) - atomic.Store(&_p_.runqtail, t+1) // store-release, makes the item available for consumption + atomic.StoreRel(&_p_.runqtail, t+1) // store-release, makes the item available for consumption return } if runqputslow(_p_, gp, h, t) { @@ -4600,7 +4554,7 @@ func runqputslow(_p_ *p, gp *g, h, t uint32) bool { for i := uint32(0); i < n; i++ { batch[i] = _p_.runq[(h+i)%uint32(len(_p_.runq))].ptr() } - if !atomic.Cas(&_p_.runqhead, h, h+n) { // cas-release, commits consume + if !atomic.CasRel(&_p_.runqhead, h, h+n) { // cas-release, commits consume return false } batch[n] = gp @@ -4616,10 +4570,13 @@ func runqputslow(_p_ *p, gp *g, h, t uint32) bool { for i := uint32(0); i < n; i++ { batch[i].schedlink.set(batch[i+1]) } + var q gQueue + q.head.set(batch[0]) + q.tail.set(batch[n]) // Now put the batch on global queue. lock(&sched.lock) - globrunqputbatch(batch[0], batch[n], int32(n+1)) + globrunqputbatch(&q, int32(n+1)) unlock(&sched.lock) return true } @@ -4641,13 +4598,13 @@ func runqget(_p_ *p) (gp *g, inheritTime bool) { } for { - h := atomic.Load(&_p_.runqhead) // load-acquire, synchronize with other consumers + h := atomic.LoadAcq(&_p_.runqhead) // load-acquire, synchronize with other consumers t := _p_.runqtail if t == h { return nil, false } gp := _p_.runq[h%uint32(len(_p_.runq))].ptr() - if atomic.Cas(&_p_.runqhead, h, h+1) { // cas-release, commits consume + if atomic.CasRel(&_p_.runqhead, h, h+1) { // cas-release, commits consume return gp, false } } @@ -4659,8 +4616,8 @@ func runqget(_p_ *p) (gp *g, inheritTime bool) { // Can be executed by any P. func runqgrab(_p_ *p, batch *[256]guintptr, batchHead uint32, stealRunNextG bool) uint32 { for { - h := atomic.Load(&_p_.runqhead) // load-acquire, synchronize with other consumers - t := atomic.Load(&_p_.runqtail) // load-acquire, synchronize with the producer + h := atomic.LoadAcq(&_p_.runqhead) // load-acquire, synchronize with other consumers + t := atomic.LoadAcq(&_p_.runqtail) // load-acquire, synchronize with the producer n := t - h n = n - n/2 if n == 0 { @@ -4703,7 +4660,7 @@ func runqgrab(_p_ *p, batch *[256]guintptr, batchHead uint32, stealRunNextG bool g := _p_.runq[(h+i)%uint32(len(_p_.runq))] batch[(batchHead+i)%uint32(len(batch))] = g } - if atomic.Cas(&_p_.runqhead, h, h+n) { // cas-release, commits consume + if atomic.CasRel(&_p_.runqhead, h, h+n) { // cas-release, commits consume return n } } @@ -4723,11 +4680,112 @@ func runqsteal(_p_, p2 *p, stealRunNextG bool) *g { if n == 0 { return gp } - h := atomic.Load(&_p_.runqhead) // load-acquire, synchronize with consumers + h := atomic.LoadAcq(&_p_.runqhead) // load-acquire, synchronize with consumers if t-h+n >= uint32(len(_p_.runq)) { throw("runqsteal: runq overflow") } - atomic.Store(&_p_.runqtail, t+n) // store-release, makes the item available for consumption + atomic.StoreRel(&_p_.runqtail, t+n) // store-release, makes the item available for consumption + return gp +} + +// A gQueue is a dequeue of Gs linked through g.schedlink. A G can only +// be on one gQueue or gList at a time. +type gQueue struct { + head guintptr + tail guintptr +} + +// empty reports whether q is empty. +func (q *gQueue) empty() bool { + return q.head == 0 +} + +// push adds gp to the head of q. +func (q *gQueue) push(gp *g) { + gp.schedlink = q.head + q.head.set(gp) + if q.tail == 0 { + q.tail.set(gp) + } +} + +// pushBack adds gp to the tail of q. +func (q *gQueue) pushBack(gp *g) { + gp.schedlink = 0 + if q.tail != 0 { + q.tail.ptr().schedlink.set(gp) + } else { + q.head.set(gp) + } + q.tail.set(gp) +} + +// pushBackAll adds all Gs in l2 to the tail of q. After this q2 must +// not be used. +func (q *gQueue) pushBackAll(q2 gQueue) { + if q2.tail == 0 { + return + } + q2.tail.ptr().schedlink = 0 + if q.tail != 0 { + q.tail.ptr().schedlink = q2.head + } else { + q.head = q2.head + } + q.tail = q2.tail +} + +// pop removes and returns the head of queue q. It returns nil if +// q is empty. +func (q *gQueue) pop() *g { + gp := q.head.ptr() + if gp != nil { + q.head = gp.schedlink + if q.head == 0 { + q.tail = 0 + } + } + return gp +} + +// popList takes all Gs in q and returns them as a gList. +func (q *gQueue) popList() gList { + stack := gList{q.head} + *q = gQueue{} + return stack +} + +// A gList is a list of Gs linked through g.schedlink. A G can only be +// on one gQueue or gList at a time. +type gList struct { + head guintptr +} + +// empty reports whether l is empty. +func (l *gList) empty() bool { + return l.head == 0 +} + +// push adds gp to the head of l. +func (l *gList) push(gp *g) { + gp.schedlink = l.head + l.head.set(gp) +} + +// pushAll prepends all Gs in q to l. +func (l *gList) pushAll(q gQueue) { + if !q.empty() { + q.tail.ptr().schedlink = l.head + l.head = q.head + } +} + +// pop removes and returns the head of l. If l is empty, it returns nil. +func (l *gList) pop() *g { + gp := l.head.ptr() + if gp != nil { + l.head = gp.schedlink + } return gp } diff --git a/libgo/go/runtime/proc_test.go b/libgo/go/runtime/proc_test.go index 82a2fe486a9..da5ffbbdee2 100644 --- a/libgo/go/runtime/proc_test.go +++ b/libgo/go/runtime/proc_test.go @@ -898,11 +898,22 @@ func testLockOSThreadExit(t *testing.T, prog string) { output := runTestProg(t, prog, "LockOSThreadMain", "GOMAXPROCS=1") want := "OK\n" if output != want { - t.Errorf("want %s, got %s\n", want, output) + t.Errorf("want %q, got %q", want, output) } output = runTestProg(t, prog, "LockOSThreadAlt") if output != want { - t.Errorf("want %s, got %s\n", want, output) + t.Errorf("want %q, got %q", want, output) + } +} + +func TestLockOSThreadAvoidsStatePropagation(t *testing.T) { + want := "OK\n" + skip := "unshare not permitted\n" + output := runTestProg(t, "testprog", "LockOSThreadAvoidsStatePropagation", "GOMAXPROCS=1") + if output == skip { + t.Skip("unshare syscall not permitted on this system") + } else if output != want { + t.Errorf("want %q, got %q", want, output) } } diff --git a/libgo/go/runtime/runtime1.go b/libgo/go/runtime/runtime1.go index 050f180c407..4e771629b0e 100644 --- a/libgo/go/runtime/runtime1.go +++ b/libgo/go/runtime/runtime1.go @@ -157,7 +157,7 @@ func check() { h uint64 i, i1 float32 j, j1 float64 - k, k1 unsafe.Pointer + k unsafe.Pointer l *uint16 m [4]byte ) @@ -246,21 +246,6 @@ func check() { throw("cas6") } - k = unsafe.Pointer(uintptr(0xfedcb123)) - if sys.PtrSize == 8 { - k = unsafe.Pointer(uintptr(k) << 10) - } - if casp(&k, nil, nil) { - throw("casp1") - } - k1 = add(k, 1) - if !casp(&k, k, k1) { - throw("casp2") - } - if k != k1 { - throw("casp3") - } - m = [4]byte{1, 1, 1, 1} atomic.Or8(&m[1], 0xf0) if m[0] != 1 || m[1] != 0xf1 || m[2] != 1 || m[3] != 1 { @@ -332,10 +317,10 @@ var debug struct { gccheckmark int32 gcpacertrace int32 gcshrinkstackoff int32 - gcrescanstacks int32 gcstoptheworld int32 gctrace int32 invalidptr int32 + madvdontneed int32 // for Linux; issue 28466 sbrk int32 scavenge int32 scheddetail int32 @@ -350,10 +335,10 @@ var dbgvars = []dbgVar{ {"gccheckmark", &debug.gccheckmark}, {"gcpacertrace", &debug.gcpacertrace}, {"gcshrinkstackoff", &debug.gcshrinkstackoff}, - {"gcrescanstacks", &debug.gcrescanstacks}, {"gcstoptheworld", &debug.gcstoptheworld}, {"gctrace", &debug.gctrace}, {"invalidptr", &debug.invalidptr}, + {"madvdontneed", &debug.madvdontneed}, {"sbrk", &debug.sbrk}, {"scavenge", &debug.scavenge}, {"scheddetail", &debug.scheddetail}, @@ -454,7 +439,9 @@ func timediv(v int64, div int32, rem *int32) int32 { for bit := 30; bit >= 0; bit-- { if v >= int64(div)<<uint(bit) { v = v - (int64(div) << uint(bit)) - res += 1 << uint(bit) + // Before this for loop, res was 0, thus all these + // power of 2 increments are now just bitsets. + res |= 1 << uint(bit) } } if v >= int64(div) { diff --git a/libgo/go/runtime/runtime2.go b/libgo/go/runtime/runtime2.go index 0e9cf63c10d..4cd68da2ea2 100644 --- a/libgo/go/runtime/runtime2.go +++ b/libgo/go/runtime/runtime2.go @@ -5,6 +5,7 @@ package runtime import ( + "internal/cpu" "runtime/internal/atomic" "runtime/internal/sys" "unsafe" @@ -439,7 +440,12 @@ type g struct { scang uintptr // the g that wants to scan this g's stack (uintptr to avoid write barrier) scangcw uintptr // gc worker for scanning stack (uintptr to avoid write barrier) - isSystemGoroutine bool // whether goroutine is a "system" goroutine + isSystemGoroutine bool // whether goroutine is a "system" goroutine + isFinalizerGoroutine bool // whether goroutine is the finalizer goroutine + + deferring bool // whether we are running a deferred function + goexiting bool // whether we are running Goexit + ranCgocallBackDone bool // whether we deferred CgocallBackDone traceback uintptr // stack traceback buffer @@ -463,6 +469,7 @@ type m struct { caughtsig guintptr // goroutine running during fatal signal p puintptr // attached p for executing go code (nil if not executing go code) nextp puintptr + oldp puintptr // the p that was attached before executing a syscall id int64 mallocing int32 throwing int32 @@ -471,7 +478,6 @@ type m struct { softfloat int32 dying int32 profilehz int32 - helpgc int32 spinning bool // m is out of work and is actively looking for work blocked bool // m is blocked on a note inwb bool // m is executing a write barrier @@ -564,8 +570,10 @@ type p struct { runnext guintptr // Available G's (status == Gdead) - gfree *g - gfreecnt int32 + gFree struct { + gList + n int32 + } sudogcache []*sudog sudogbuf [128]*sudog @@ -604,7 +612,7 @@ type p struct { runSafePointFn uint32 // if 1, run sched.safePointFn at next safe point - pad [sys.CacheLineSize]byte + pad cpu.CacheLinePad } type schedt struct { @@ -632,14 +640,27 @@ type schedt struct { nmspinning uint32 // See "Worker thread parking/unparking" comment in proc.go. // Global runnable queue. - runqhead guintptr - runqtail guintptr + runq gQueue runqsize int32 + // disable controls selective disabling of the scheduler. + // + // Use schedEnableUser to control this. + // + // disable is protected by sched.lock. + disable struct { + // user disables scheduling of user goroutines. + user bool + runnable gQueue // pending runnable Gs + n int32 // length of runnable + } + // Global cache of dead G's. - gflock mutex - gfree *g - ngfree int32 + gFree struct { + lock mutex + list gList // Gs + n int32 + } // Central cache of sudog structs. sudoglock mutex diff --git a/libgo/go/runtime/select.go b/libgo/go/runtime/select.go index fb8373f53b5..d658a349ed2 100644 --- a/libgo/go/runtime/select.go +++ b/libgo/go/runtime/select.go @@ -105,7 +105,7 @@ func block() { // // selectgo returns the index of the chosen scase, which matches the // ordinal position of its respective select{recv,send,default} call. -// Also, if the chosen scase was a receive operation, it returns whether +// Also, if the chosen scase was a receive operation, it reports whether // a value was received. func selectgo(cas0 *scase, order0 *uint16, ncases int) (int, bool) { if debugSelect { diff --git a/libgo/go/runtime/sema.go b/libgo/go/runtime/sema.go index 273e8aa54d9..2c7ad329662 100644 --- a/libgo/go/runtime/sema.go +++ b/libgo/go/runtime/sema.go @@ -20,8 +20,8 @@ package runtime import ( + "internal/cpu" "runtime/internal/atomic" - "runtime/internal/sys" "unsafe" ) @@ -48,7 +48,7 @@ const semTabSize = 251 var semtable [semTabSize]struct { root semaRoot - pad [sys.CacheLineSize - unsafe.Sizeof(semaRoot{})]byte + pad [cpu.CacheLinePadSize - unsafe.Sizeof(semaRoot{})]byte } //go:linkname sync_runtime_Semacquire sync.runtime_Semacquire diff --git a/libgo/go/runtime/semasleep_test.go b/libgo/go/runtime/semasleep_test.go new file mode 100644 index 00000000000..5b2cc64483f --- /dev/null +++ b/libgo/go/runtime/semasleep_test.go @@ -0,0 +1,88 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//+build !nacl,!plan9,!windows,!js + +package runtime_test + +import ( + "internal/testenv" + "io/ioutil" + "os" + "os/exec" + "path/filepath" + "syscall" + "testing" + "time" +) + +// Issue #27250. Spurious wakeups to pthread_cond_timedwait_relative_np +// shouldn't cause semasleep to retry with the same timeout which would +// cause indefinite spinning. +func TestSpuriousWakeupsNeverHangSemasleep(t *testing.T) { + testenv.MustHaveGoBuild(t) + tempDir, err := ioutil.TempDir("", "issue-27250") + if err != nil { + t.Fatalf("Failed to create the temp directory: %v", err) + } + defer os.RemoveAll(tempDir) + + repro := ` + package main + + import "time" + + func main() { + <-time.After(1 * time.Second) + } + ` + mainPath := filepath.Join(tempDir, "main.go") + if err := ioutil.WriteFile(mainPath, []byte(repro), 0644); err != nil { + t.Fatalf("Failed to create temp file for repro.go: %v", err) + } + binaryPath := filepath.Join(tempDir, "binary") + + // Build the binary so that we can send the signal to its PID. + out, err := exec.Command(testenv.GoToolPath(t), "build", "-o", binaryPath, mainPath).CombinedOutput() + if err != nil { + t.Fatalf("Failed to compile the binary: err: %v\nOutput: %s\n", err, out) + } + if err := os.Chmod(binaryPath, 0755); err != nil { + t.Fatalf("Failed to chmod binary: %v", err) + } + + // Now run the binary. + cmd := exec.Command(binaryPath) + if err := cmd.Start(); err != nil { + t.Fatalf("Failed to start command: %v", err) + } + doneCh := make(chan error, 1) + go func() { + doneCh <- cmd.Wait() + }() + + // With the repro running, we can continuously send to it + // a non-terminal signal such as SIGIO, to spuriously + // wakeup pthread_cond_timedwait_relative_np. + unfixedTimer := time.NewTimer(2 * time.Second) + for { + select { + case <-time.After(200 * time.Millisecond): + // Send the pesky signal that toggles spinning + // indefinitely if #27520 is not fixed. + cmd.Process.Signal(syscall.SIGIO) + + case <-unfixedTimer.C: + t.Error("Program failed to return on time and has to be killed, issue #27520 still exists") + cmd.Process.Signal(syscall.SIGKILL) + return + + case err := <-doneCh: + if err != nil { + t.Fatalf("The program returned but unfortunately with an error: %v", err) + } + return + } + } +} diff --git a/libgo/go/runtime/signal_unix.go b/libgo/go/runtime/signal_unix.go index 84623d33c66..0a2cf724c0d 100644 --- a/libgo/go/runtime/signal_unix.go +++ b/libgo/go/runtime/signal_unix.go @@ -725,7 +725,7 @@ func unminitSignals() { } } -// blockableSig returns whether sig may be blocked by the signal mask. +// blockableSig reports whether sig may be blocked by the signal mask. // We never want to block the signals marked _SigUnblock; // these are the synchronous signals that turn into a Go panic. // In a Go program--not a c-archive/c-shared--we never want to block diff --git a/libgo/go/runtime/slice.go b/libgo/go/runtime/slice.go index 7f9db4efa9e..335532d126b 100644 --- a/libgo/go/runtime/slice.go +++ b/libgo/go/runtime/slice.go @@ -5,6 +5,7 @@ package runtime import ( + "runtime/internal/math" "runtime/internal/sys" "unsafe" ) @@ -31,28 +32,6 @@ type notInHeapSlice struct { cap int } -// maxElems is a lookup table containing the maximum capacity for a slice. -// The index is the size of the slice element. -var maxElems = [...]uintptr{ - ^uintptr(0), - maxAlloc / 1, maxAlloc / 2, maxAlloc / 3, maxAlloc / 4, - maxAlloc / 5, maxAlloc / 6, maxAlloc / 7, maxAlloc / 8, - maxAlloc / 9, maxAlloc / 10, maxAlloc / 11, maxAlloc / 12, - maxAlloc / 13, maxAlloc / 14, maxAlloc / 15, maxAlloc / 16, - maxAlloc / 17, maxAlloc / 18, maxAlloc / 19, maxAlloc / 20, - maxAlloc / 21, maxAlloc / 22, maxAlloc / 23, maxAlloc / 24, - maxAlloc / 25, maxAlloc / 26, maxAlloc / 27, maxAlloc / 28, - maxAlloc / 29, maxAlloc / 30, maxAlloc / 31, maxAlloc / 32, -} - -// maxSliceCap returns the maximum capacity for a slice. -func maxSliceCap(elemsize uintptr) uintptr { - if elemsize < uintptr(len(maxElems)) { - return maxElems[elemsize] - } - return maxAlloc / elemsize -} - func panicmakeslicelen() { panic(errorString("makeslice: len out of range")) } @@ -62,21 +41,21 @@ func panicmakeslicecap() { } func makeslice(et *_type, len, cap int) unsafe.Pointer { - // NOTE: The len > maxElements check here is not strictly necessary, - // but it produces a 'len out of range' error instead of a 'cap out of range' error - // when someone does make([]T, bignumber). 'cap out of range' is true too, - // but since the cap is only being supplied implicitly, saying len is clearer. - // See issue 4085. - maxElements := maxSliceCap(et.size) - if len < 0 || uintptr(len) > maxElements { - panicmakeslicelen() - } - - if cap < len || uintptr(cap) > maxElements { + mem, overflow := math.MulUintptr(et.size, uintptr(cap)) + if overflow || mem > maxAlloc || len < 0 || len > cap { + // NOTE: Produce a 'len out of range' error instead of a + // 'cap out of range' error when someone does make([]T, bignumber). + // 'cap out of range' is true too, but since the cap is only being + // supplied implicitly, saying len is clearer. + // See golang.org/issue/4085. + mem, overflow := math.MulUintptr(et.size, uintptr(len)) + if overflow || mem > maxAlloc || len < 0 { + panicmakeslicelen() + } panicmakeslicecap() } - return mallocgc(et.size*uintptr(cap), et, true) + return mallocgc(mem, et, true) } func makeslice64(et *_type, len64, cap64 int64) unsafe.Pointer { @@ -107,10 +86,11 @@ func growslice(et *_type, old slice, cap int) slice { msanread(old.array, uintptr(old.len*int(et.size))) } + if cap < old.cap { + panic(errorString("growslice: cap out of range")) + } + if et.size == 0 { - if cap < old.cap { - panic(errorString("growslice: cap out of range")) - } // append should not create a slice with nil pointer but non-zero len. // We assume that append doesn't need to preserve old.array in this case. return slice{unsafe.Pointer(&zerobase), cap, cap} @@ -172,15 +152,14 @@ func growslice(et *_type, old slice, cap int) slice { default: lenmem = uintptr(old.len) * et.size newlenmem = uintptr(cap) * et.size - capmem = roundupsize(uintptr(newcap) * et.size) - overflow = uintptr(newcap) > maxSliceCap(et.size) + capmem, overflow = math.MulUintptr(et.size, uintptr(newcap)) + capmem = roundupsize(capmem) newcap = int(capmem / et.size) } - // The check of overflow (uintptr(newcap) > maxSliceCap(et.size)) - // in addition to capmem > _MaxMem is needed to prevent an overflow - // which can be used to trigger a segfault on 32bit architectures - // with this example program: + // The check of overflow in addition to capmem > maxAlloc is needed + // to prevent an overflow which can be used to trigger a segfault + // on 32bit architectures with this example program: // // type T [1<<27 + 1]int64 // @@ -191,28 +170,26 @@ func growslice(et *_type, old slice, cap int) slice { // s = append(s, d, d, d, d) // print(len(s), "\n") // } - if cap < old.cap || overflow || capmem > maxAlloc { + if overflow || capmem > maxAlloc { panic(errorString("growslice: cap out of range")) } var p unsafe.Pointer if et.kind&kindNoPointers != 0 { p = mallocgc(capmem, nil, false) - memmove(p, old.array, lenmem) // The append() that calls growslice is going to overwrite from old.len to cap (which will be the new length). // Only clear the part that will not be overwritten. memclrNoHeapPointers(add(p, newlenmem), capmem-newlenmem) } else { // Note: can't use rawmem (which avoids zeroing of memory), because then GC can scan uninitialized memory. p = mallocgc(capmem, et, true) - if !writeBarrier.enabled { - memmove(p, old.array, lenmem) - } else { - for i := uintptr(0); i < lenmem; i += et.size { - typedmemmove(et, add(p, i), add(old.array, i)) - } + if writeBarrier.enabled { + // Only shade the pointers in old.array since we know the destination slice p + // only contains nil pointers because it has been cleared during alloc. + bulkBarrierPreWriteSrcOnly(uintptr(p), uintptr(old.array), lenmem) } } + memmove(p, old.array, lenmem) return slice{p, cap, newcap} } diff --git a/libgo/go/runtime/slice_test.go b/libgo/go/runtime/slice_test.go index c2dfb7afd19..0463fc70a76 100644 --- a/libgo/go/runtime/slice_test.go +++ b/libgo/go/runtime/slice_test.go @@ -10,20 +10,68 @@ import ( const N = 20 -func BenchmarkMakeSlice(b *testing.B) { - var x []byte - for i := 0; i < b.N; i++ { - x = make([]byte, 32) - _ = x - } -} - type ( struct24 struct{ a, b, c int64 } struct32 struct{ a, b, c, d int64 } struct40 struct{ a, b, c, d, e int64 } ) +func BenchmarkMakeSlice(b *testing.B) { + const length = 2 + b.Run("Byte", func(b *testing.B) { + var x []byte + for i := 0; i < b.N; i++ { + x = make([]byte, length, 2*length) + _ = x + } + }) + b.Run("Int16", func(b *testing.B) { + var x []int16 + for i := 0; i < b.N; i++ { + x = make([]int16, length, 2*length) + _ = x + } + }) + b.Run("Int", func(b *testing.B) { + var x []int + for i := 0; i < b.N; i++ { + x = make([]int, length, 2*length) + _ = x + } + }) + b.Run("Ptr", func(b *testing.B) { + var x []*byte + for i := 0; i < b.N; i++ { + x = make([]*byte, length, 2*length) + _ = x + } + }) + b.Run("Struct", func(b *testing.B) { + b.Run("24", func(b *testing.B) { + var x []struct24 + for i := 0; i < b.N; i++ { + x = make([]struct24, length, 2*length) + _ = x + } + }) + b.Run("32", func(b *testing.B) { + var x []struct32 + for i := 0; i < b.N; i++ { + x = make([]struct32, length, 2*length) + _ = x + } + }) + b.Run("40", func(b *testing.B) { + var x []struct40 + for i := 0; i < b.N; i++ { + x = make([]struct40, length, 2*length) + _ = x + } + }) + + }) +} + func BenchmarkGrowSlice(b *testing.B) { b.Run("Byte", func(b *testing.B) { x := make([]byte, 9) diff --git a/libgo/go/runtime/string.go b/libgo/go/runtime/string.go index 5296ebddf39..025ea7a3bd7 100644 --- a/libgo/go/runtime/string.go +++ b/libgo/go/runtime/string.go @@ -146,7 +146,8 @@ func rawstringtmp(buf *tmpBuf, l int) (s string, b []byte) { // and otherwise intrinsified by the compiler. // // Some internal compiler optimizations use this function. -// - Used for m[string(k)] lookup where m is a string-keyed map and k is a []byte. +// - Used for m[T1{... Tn{..., string(k), ...} ...}] and m[string(k)] +// where k is []byte, T1 to Tn is a nesting of struct and array literals. // - Used for "<"+string(b)+">" concatenation where b is []byte. // - Used for string(b)=="foo" comparison where b is []byte. func slicebytetostringtmp(b []byte) string { @@ -344,7 +345,7 @@ func index(s, t string) int { return 0 } for i := 0; i < len(s); i++ { - if s[i] == t[0] && hasprefix(s[i:], t) { + if s[i] == t[0] && hasPrefix(s[i:], t) { return i } } @@ -355,8 +356,8 @@ func contains(s, t string) bool { return index(s, t) >= 0 } -func hasprefix(s, t string) bool { - return len(s) >= len(t) && s[:len(t)] == t +func hasPrefix(s, prefix string) bool { + return len(s) >= len(prefix) && s[:len(prefix)] == prefix } const ( diff --git a/libgo/go/runtime/string_test.go b/libgo/go/runtime/string_test.go index 03327bbb1e2..ec83bb4d458 100644 --- a/libgo/go/runtime/string_test.go +++ b/libgo/go/runtime/string_test.go @@ -243,6 +243,36 @@ func TestCompareTempString(t *testing.T) { } } +func TestStringIndexHaystack(t *testing.T) { + // See issue 25864. + haystack := []byte("hello") + needle := "ll" + n := testing.AllocsPerRun(1000, func() { + if strings.Index(string(haystack), needle) != 2 { + t.Fatalf("needle not found") + } + }) + // was n != 0, changed for gccgo. + if n > 1 { + t.Fatalf("want 0 allocs, got %v", n) + } +} + +func TestStringIndexNeedle(t *testing.T) { + // See issue 25864. + haystack := "hello" + needle := []byte("ll") + n := testing.AllocsPerRun(1000, func() { + if strings.Index(haystack, string(needle)) != 2 { + t.Fatalf("needle not found") + } + }) + // was n != 0, changed for gccgo + if n > 1 { + t.Fatalf("want 0 allocs, got %v", n) + } +} + func TestStringOnStack(t *testing.T) { s := "" for i := 0; i < 3; i++ { diff --git a/libgo/go/runtime/stubs.go b/libgo/go/runtime/stubs.go index 4caa39d5e94..f2b0ee8abaa 100644 --- a/libgo/go/runtime/stubs.go +++ b/libgo/go/runtime/stubs.go @@ -84,12 +84,12 @@ func badsystemstack() { // used only when the caller knows that *ptr contains no heap pointers // because either: // -// 1. *ptr is initialized memory and its type is pointer-free. +// *ptr is initialized memory and its type is pointer-free, or // -// 2. *ptr is uninitialized memory (e.g., memory that's being reused -// for a new allocation) and hence contains only "junk". +// *ptr is uninitialized memory (e.g., memory that's being reused +// for a new allocation) and hence contains only "junk". // -// in memclr_*.s +// The (CPU-specific) implementations of this function are in memclr_*.s. //go:noescape func memclrNoHeapPointers(ptr unsafe.Pointer, n uintptr) @@ -164,7 +164,7 @@ func breakpoint() func asminit() {} -//go:linkname reflectcall reflect.call +//go:linkname reflectcall runtime.reflectcall //go:noescape func reflectcall(fntype *functype, fn *funcval, isInterface, isMethod bool, params, results *unsafe.Pointer) @@ -454,3 +454,15 @@ var usestackmaps bool // probestackmaps detects whether there are stack maps. //go:linkname probestackmaps runtime.probestackmaps func probestackmaps() bool + +// For the math/bits packages for gccgo. +//go:linkname getDivideError runtime.getDivideError +func getDivideError() error { + return divideError +} + +// For the math/bits packages for gccgo. +//go:linkname getOverflowError runtime.getOverflowError +func getOverflowError() error { + return overflowError +} diff --git a/libgo/go/runtime/stubs2.go b/libgo/go/runtime/stubs2.go index 1cb910cd111..304c8e4da37 100644 --- a/libgo/go/runtime/stubs2.go +++ b/libgo/go/runtime/stubs2.go @@ -7,6 +7,7 @@ // +build !nacl // +build !js // +build !darwin +// +build !aix package runtime diff --git a/libgo/go/runtime/stubs3.go b/libgo/go/runtime/stubs3.go index 1af693be2e6..d3e331ab9da 100644 --- a/libgo/go/runtime/stubs3.go +++ b/libgo/go/runtime/stubs3.go @@ -7,6 +7,7 @@ // +build !nacl // +build !freebsd // +build !darwin +// +build !aix package runtime diff --git a/libgo/go/runtime/symtab.go b/libgo/go/runtime/symtab.go index d137122137c..d7e8c18b517 100644 --- a/libgo/go/runtime/symtab.go +++ b/libgo/go/runtime/symtab.go @@ -7,7 +7,7 @@ package runtime // Frames may be used to get function/file/line information for a // slice of PC values returned by Callers. type Frames struct { - // callers is a slice of PCs that have not yet been expanded. + // callers is a slice of PCs that have not yet been expanded to frames. callers []uintptr // The last PC we saw. @@ -125,7 +125,7 @@ type Func struct { // Note that in some situations involving plugins, there may be multiple // copies of a particular special runtime function. // Note: this list must match the list in cmd/internal/objabi/funcid.go. -type funcID uint32 +type funcID uint8 const ( funcID_normal funcID = iota // not a special function @@ -146,13 +146,17 @@ const ( funcID_gogo funcID_externalthreadhandler funcID_debugCallV1 + funcID_gopanic + funcID_panicwrap + funcID_wrapper // any autogenerated code (hash/eq algorithms, method wrappers, etc.) ) // FuncForPC returns a *Func describing the function that contains the // given program counter address, or else nil. // // If pc represents multiple functions because of inlining, it returns -// the *Func describing the outermost function. +// the a *Func describing the innermost function, but with an entry +// of the outermost function. func FuncForPC(pc uintptr) *Func { name, _, _ := funcfileline(pc, -1) if name == "" { diff --git a/libgo/go/runtime/sys_darwin.go b/libgo/go/runtime/sys_darwin.go index 7efbef746cd..f34ac883524 100644 --- a/libgo/go/runtime/sys_darwin.go +++ b/libgo/go/runtime/sys_darwin.go @@ -50,6 +50,61 @@ func libcCall(fn, arg unsafe.Pointer) int32 { return res } +// The X versions of syscall expect the libc call to return a 64-bit result. +// Otherwise (the non-X version) expects a 32-bit result. +// This distinction is required because an error is indicated by returning -1, +// and we need to know whether to check 32 or 64 bits of the result. +// (Some libc functions that return 32 bits put junk in the upper 32 bits of AX.) + +//go:linkname syscall_syscall syscall.syscall +//go:nosplit +//go:cgo_unsafe_args +func syscall_syscall(fn, a1, a2, a3 uintptr) (r1, r2, err uintptr) { + entersyscallblock() + libcCall(unsafe.Pointer(funcPC(syscall)), unsafe.Pointer(&fn)) + exitsyscall() + return +} +func syscall() + +//go:linkname syscall_syscall6 syscall.syscall6 +//go:nosplit +//go:cgo_unsafe_args +func syscall_syscall6(fn, a1, a2, a3, a4, a5, a6 uintptr) (r1, r2, err uintptr) { + entersyscallblock() + libcCall(unsafe.Pointer(funcPC(syscall6)), unsafe.Pointer(&fn)) + exitsyscall() + return +} +func syscall6() + +//go:linkname syscall_syscall6X syscall.syscall6X +//go:nosplit +//go:cgo_unsafe_args +func syscall_syscall6X(fn, a1, a2, a3, a4, a5, a6 uintptr) (r1, r2, err uintptr) { + entersyscallblock() + libcCall(unsafe.Pointer(funcPC(syscall6X)), unsafe.Pointer(&fn)) + exitsyscall() + return +} +func syscall6X() + +//go:linkname syscall_rawSyscall syscall.rawSyscall +//go:nosplit +//go:cgo_unsafe_args +func syscall_rawSyscall(fn, a1, a2, a3 uintptr) (r1, r2, err uintptr) { + libcCall(unsafe.Pointer(funcPC(syscall)), unsafe.Pointer(&fn)) + return +} + +//go:linkname syscall_rawSyscall6 syscall.rawSyscall6 +//go:nosplit +//go:cgo_unsafe_args +func syscall_rawSyscall6(fn, a1, a2, a3, a4, a5, a6 uintptr) (r1, r2, err uintptr) { + libcCall(unsafe.Pointer(funcPC(syscall6)), unsafe.Pointer(&fn)) + return +} + // The *_trampoline functions convert from the Go calling convention to the C calling convention // and then call the underlying libc function. They are defined in sys_darwin_$ARCH.s. @@ -370,5 +425,5 @@ func closeonexec(fd int32) { //go:cgo_import_dynamic libc_pthread_cond_signal pthread_cond_signal "/usr/lib/libSystem.B.dylib" // Magic incantation to get libSystem actually dynamically linked. -// TODO: Why does the code require this? See cmd/compile/internal/ld/go.go:210 +// TODO: Why does the code require this? See cmd/link/internal/ld/go.go //go:cgo_import_dynamic _ _ "/usr/lib/libSystem.B.dylib" diff --git a/libgo/go/runtime/sys_darwin_32.go b/libgo/go/runtime/sys_darwin_32.go new file mode 100644 index 00000000000..2f17091327d --- /dev/null +++ b/libgo/go/runtime/sys_darwin_32.go @@ -0,0 +1,32 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build darwin +// +build 386 arm + +package runtime + +import "unsafe" + +//go:linkname syscall_syscall9 syscall.syscall9 +//go:nosplit +//go:cgo_unsafe_args +func syscall_syscall9(fn, a1, a2, a3, a4, a5, a6, a7, a8, a9 uintptr) (r1, r2, err uintptr) { + entersyscallblock() + libcCall(unsafe.Pointer(funcPC(syscall9)), unsafe.Pointer(&fn)) + exitsyscall() + return +} +func syscall9() + +//go:linkname syscall_syscallPtr syscall.syscallPtr +//go:nosplit +//go:cgo_unsafe_args +func syscall_syscallPtr(fn, a1, a2, a3 uintptr) (r1, r2, err uintptr) { + entersyscallblock() + libcCall(unsafe.Pointer(funcPC(syscallPtr)), unsafe.Pointer(&fn)) + exitsyscall() + return +} +func syscallPtr() diff --git a/libgo/go/runtime/sys_darwin_64.go b/libgo/go/runtime/sys_darwin_64.go new file mode 100644 index 00000000000..8c128811b9f --- /dev/null +++ b/libgo/go/runtime/sys_darwin_64.go @@ -0,0 +1,32 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build darwin +// +build amd64 arm64 + +package runtime + +import "unsafe" + +//go:linkname syscall_syscallX syscall.syscallX +//go:nosplit +//go:cgo_unsafe_args +func syscall_syscallX(fn, a1, a2, a3 uintptr) (r1, r2, err uintptr) { + entersyscallblock() + libcCall(unsafe.Pointer(funcPC(syscallX)), unsafe.Pointer(&fn)) + exitsyscall() + return +} +func syscallX() + +//go:linkname syscall_syscallXPtr syscall.syscallXPtr +//go:nosplit +//go:cgo_unsafe_args +func syscall_syscallXPtr(fn, a1, a2, a3 uintptr) (r1, r2, err uintptr) { + entersyscallblock() + libcCall(unsafe.Pointer(funcPC(syscallXPtr)), unsafe.Pointer(&fn)) + exitsyscall() + return +} +func syscallXPtr() diff --git a/libgo/go/runtime/testdata/testprog/gc.go b/libgo/go/runtime/testdata/testprog/gc.go index 542451753b7..6b308e073b9 100644 --- a/libgo/go/runtime/testdata/testprog/gc.go +++ b/libgo/go/runtime/testdata/testprog/gc.go @@ -17,6 +17,7 @@ func init() { register("GCFairness", GCFairness) register("GCFairness2", GCFairness2) register("GCSys", GCSys) + register("GCPhys", GCPhys) } func GCSys() { @@ -51,8 +52,11 @@ func GCSys() { fmt.Printf("OK\n") } +var sink []byte + func workthegc() []byte { - return make([]byte, 1029) + sink = make([]byte, 1029) + return sink } func GCFairness() { @@ -124,3 +128,85 @@ func GCFairness2() { } fmt.Println("OK") } + +var maybeSaved []byte + +func GCPhys() { + // In this test, we construct a very specific scenario. We first + // allocate N objects and drop half of their pointers on the floor, + // effectively creating N/2 'holes' in our allocated arenas. We then + // try to allocate objects twice as big. At the end, we measure the + // physical memory overhead of large objects. + // + // The purpose of this test is to ensure that the GC scavenges free + // spans eagerly to ensure high physical memory utilization even + // during fragmentation. + const ( + // Unfortunately, measuring actual used physical pages is + // difficult because HeapReleased doesn't include the parts + // of an arena that haven't yet been touched. So, we just + // make objects and size sufficiently large such that even + // 64 MB overhead is relatively small in the final + // calculation. + // + // Currently, we target 480MiB worth of memory for our test, + // computed as size * objects + (size*2) * (objects/2) + // = 2 * size * objects + // + // Size must be also large enough to be considered a large + // object (not in any size-segregated span). + size = 1 << 20 + objects = 240 + ) + // Save objects which we want to survive, and condemn objects which we don't. + // Note that we condemn objects in this way and release them all at once in + // order to avoid having the GC start freeing up these objects while the loop + // is still running and filling in the holes we intend to make. + saved := make([][]byte, 0, objects) + condemned := make([][]byte, 0, objects/2+1) + for i := 0; i < objects; i++ { + // Write into a global, to prevent this from being optimized away by + // the compiler in the future. + maybeSaved = make([]byte, size) + if i%2 == 0 { + saved = append(saved, maybeSaved) + } else { + condemned = append(condemned, maybeSaved) + } + } + condemned = nil + // Clean up the heap. This will free up every other object created above + // (i.e. everything in condemned) creating holes in the heap. + runtime.GC() + // Allocate many new objects of 2x size. + for i := 0; i < objects/2; i++ { + saved = append(saved, make([]byte, size*2)) + } + // Clean up the heap again just to put it in a known state. + runtime.GC() + // heapBacked is an estimate of the amount of physical memory used by + // this test. HeapSys is an estimate of the size of the mapped virtual + // address space (which may or may not be backed by physical pages) + // whereas HeapReleased is an estimate of the amount of bytes returned + // to the OS. Their difference then roughly corresponds to the amount + // of virtual address space that is backed by physical pages. + var stats runtime.MemStats + runtime.ReadMemStats(&stats) + heapBacked := stats.HeapSys - stats.HeapReleased + // If heapBacked exceeds the amount of memory actually used for heap + // allocated objects by 10% (post-GC HeapAlloc should be quite close to + // the size of the working set), then fail. + // + // In the context of this test, that indicates a large amount of + // fragmentation with physical pages that are otherwise unused but not + // returned to the OS. + overuse := (float64(heapBacked) - float64(stats.HeapAlloc)) / float64(stats.HeapAlloc) + if overuse > 0.1 { + fmt.Printf("exceeded physical memory overuse threshold of 10%%: %3.2f%%\n"+ + "(alloc: %d, sys: %d, rel: %d, objs: %d)\n", overuse*100, stats.HeapAlloc, + stats.HeapSys, stats.HeapReleased, len(saved)) + return + } + fmt.Println("OK") + runtime.KeepAlive(saved) +} diff --git a/libgo/go/runtime/testdata/testprog/gettid.go b/libgo/go/runtime/testdata/testprog/gettid.go deleted file mode 100644 index 1b3e29ab08e..00000000000 --- a/libgo/go/runtime/testdata/testprog/gettid.go +++ /dev/null @@ -1,29 +0,0 @@ -// Copyright 2017 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// +build linux - -package main - -import ( - "bytes" - "fmt" - "io/ioutil" - "os" - "syscall" -) - -func gettid() int { - return syscall.Gettid() -} - -func tidExists(tid int) (exists, supported bool) { - stat, err := ioutil.ReadFile(fmt.Sprintf("/proc/self/task/%d/stat", tid)) - if os.IsNotExist(err) { - return false, true - } - // Check if it's a zombie thread. - state := bytes.Fields(stat)[2] - return !(len(state) == 1 && state[0] == 'Z'), true -} diff --git a/libgo/go/runtime/testdata/testprog/lockosthread.go b/libgo/go/runtime/testdata/testprog/lockosthread.go index 88c0d12e4c1..fd3123e6474 100644 --- a/libgo/go/runtime/testdata/testprog/lockosthread.go +++ b/libgo/go/runtime/testdata/testprog/lockosthread.go @@ -24,6 +24,12 @@ func init() { runtime.LockOSThread() }) register("LockOSThreadAlt", LockOSThreadAlt) + + registerInit("LockOSThreadAvoidsStatePropagation", func() { + // Lock the OS thread now so main runs on the main thread. + runtime.LockOSThread() + }) + register("LockOSThreadAvoidsStatePropagation", LockOSThreadAvoidsStatePropagation) } func LockOSThreadMain() { @@ -92,3 +98,100 @@ func LockOSThreadAlt() { ok: println("OK") } + +func LockOSThreadAvoidsStatePropagation() { + // This test is similar to LockOSThreadAlt in that it will detect if a thread + // which should have died is still running. However, rather than do this with + // thread IDs, it does this by unsharing state on that thread. This way, it + // also detects whether new threads were cloned from the dead thread, and not + // from a clean thread. Cloning from a locked thread is undesirable since + // cloned threads will inherit potentially unwanted OS state. + // + // unshareFs, getcwd, and chdir("/tmp") are only guaranteed to work on + // Linux, so on other platforms this just checks that the runtime doesn't + // do anything terrible. + // + // This is running locked to the main OS thread. + + // GOMAXPROCS=1 makes this fail much more reliably if a tainted thread is + // cloned from. + if runtime.GOMAXPROCS(-1) != 1 { + println("requires GOMAXPROCS=1") + os.Exit(1) + } + + if err := chdir("/"); err != nil { + println("failed to chdir:", err.Error()) + os.Exit(1) + } + // On systems other than Linux, cwd == "". + cwd, err := getcwd() + if err != nil { + println("failed to get cwd:", err.Error()) + os.Exit(1) + } + if cwd != "" && cwd != "/" { + println("unexpected cwd", cwd, " wanted /") + os.Exit(1) + } + + ready := make(chan bool, 1) + go func() { + // This goroutine must be running on a new thread. + runtime.LockOSThread() + + // Unshare details about the FS, like the CWD, with + // the rest of the process on this thread. + // On systems other than Linux, this is a no-op. + if err := unshareFs(); err != nil { + if err == errNotPermitted { + println("unshare not permitted") + os.Exit(0) + } + println("failed to unshare fs:", err.Error()) + os.Exit(1) + } + // Chdir to somewhere else on this thread. + // On systems other than Linux, this is a no-op. + if err := chdir("/tmp"); err != nil { + println("failed to chdir:", err.Error()) + os.Exit(1) + } + + // The state on this thread is now considered "tainted", but it + // should no longer be observable in any other context. + + ready <- true + // Exit with the thread locked. + }() + <-ready + + // Spawn yet another goroutine and lock it. Since GOMAXPROCS=1, if + // for some reason state from the (hopefully dead) locked thread above + // propagated into a newly created thread (via clone), or that thread + // is actually being re-used, then we should get scheduled on such a + // thread with high likelihood. + done := make(chan bool) + go func() { + runtime.LockOSThread() + + // Get the CWD and check if this is the same as the main thread's + // CWD. Every thread should share the same CWD. + // On systems other than Linux, wd == "". + wd, err := getcwd() + if err != nil { + println("failed to get cwd:", err.Error()) + os.Exit(1) + } + if wd != cwd { + println("bad state from old thread propagated after it should have died") + os.Exit(1) + } + <-done + + runtime.UnlockOSThread() + }() + done <- true + runtime.UnlockOSThread() + println("OK") +} diff --git a/libgo/go/runtime/testdata/testprog/syscalls.go b/libgo/go/runtime/testdata/testprog/syscalls.go new file mode 100644 index 00000000000..098d5cadf8a --- /dev/null +++ b/libgo/go/runtime/testdata/testprog/syscalls.go @@ -0,0 +1,11 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package main + +import ( + "errors" +) + +var errNotPermitted = errors.New("operation not permitted") diff --git a/libgo/go/runtime/testdata/testprog/syscalls_linux.go b/libgo/go/runtime/testdata/testprog/syscalls_linux.go new file mode 100644 index 00000000000..b8ac0876269 --- /dev/null +++ b/libgo/go/runtime/testdata/testprog/syscalls_linux.go @@ -0,0 +1,59 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package main + +import ( + "bytes" + "fmt" + "io/ioutil" + "os" + "syscall" +) + +func gettid() int { + return syscall.Gettid() +} + +func tidExists(tid int) (exists, supported bool) { + stat, err := ioutil.ReadFile(fmt.Sprintf("/proc/self/task/%d/stat", tid)) + if os.IsNotExist(err) { + return false, true + } + // Check if it's a zombie thread. + state := bytes.Fields(stat)[2] + return !(len(state) == 1 && state[0] == 'Z'), true +} + +func getcwd() (string, error) { + if !syscall.ImplementsGetwd { + return "", nil + } + // Use the syscall to get the current working directory. + // This is imperative for checking for OS thread state + // after an unshare since os.Getwd might just check the + // environment, or use some other mechanism. + var buf [4096]byte + n, err := syscall.Getcwd(buf[:]) + if err != nil { + return "", err + } + // Subtract one for null terminator. + return string(buf[:n-1]), nil +} + +func unshareFs() error { + err := syscall.Unshare(syscall.CLONE_FS) + if err != nil { + errno, ok := err.(syscall.Errno) + if ok && errno == syscall.EPERM { + return errNotPermitted + } + } + return err +} + +func chdir(path string) error { + return syscall.Chdir(path) +} diff --git a/libgo/go/runtime/testdata/testprog/gettid_none.go b/libgo/go/runtime/testdata/testprog/syscalls_none.go index 036db87e10e..7f8ded3994f 100644 --- a/libgo/go/runtime/testdata/testprog/gettid_none.go +++ b/libgo/go/runtime/testdata/testprog/syscalls_none.go @@ -13,3 +13,15 @@ func gettid() int { func tidExists(tid int) (exists, supported bool) { return false, false } + +func getcwd() (string, error) { + return "", nil +} + +func unshareFs() error { + return nil +} + +func chdir(path string) error { + return nil +} diff --git a/libgo/go/runtime/testdata/testprog/traceback_ancestors.go b/libgo/go/runtime/testdata/testprog/traceback_ancestors.go index fe57c1c157e..0ee402c4bdc 100644 --- a/libgo/go/runtime/testdata/testprog/traceback_ancestors.go +++ b/libgo/go/runtime/testdata/testprog/traceback_ancestors.go @@ -5,8 +5,10 @@ package main import ( + "bytes" "fmt" "runtime" + "strings" ) func init() { @@ -18,25 +20,50 @@ const numFrames = 2 func TracebackAncestors() { w := make(chan struct{}) - recurseThenCallGo(w, numGoroutines, numFrames) + recurseThenCallGo(w, numGoroutines, numFrames, true) <-w printStack() close(w) } +var ignoreGoroutines = make(map[string]bool) + func printStack() { buf := make([]byte, 1024) for { n := runtime.Stack(buf, true) if n < len(buf) { - fmt.Print(string(buf[:n])) + tb := string(buf[:n]) + + // Delete any ignored goroutines, if present. + pos := 0 + for pos < len(tb) { + next := pos + strings.Index(tb[pos:], "\n\n") + if next < pos { + next = len(tb) + } else { + next += len("\n\n") + } + + if strings.HasPrefix(tb[pos:], "goroutine ") { + id := tb[pos+len("goroutine "):] + id = id[:strings.IndexByte(id, ' ')] + if ignoreGoroutines[id] { + tb = tb[:pos] + tb[next:] + next = pos + } + } + pos = next + } + + fmt.Print(tb) return } buf = make([]byte, 2*len(buf)) } } -func recurseThenCallGo(w chan struct{}, frames int, goroutines int) { +func recurseThenCallGo(w chan struct{}, frames int, goroutines int, main bool) { if frames == 0 { // Signal to TracebackAncestors that we are done recursing and starting goroutines. w <- struct{}{} @@ -44,10 +71,29 @@ func recurseThenCallGo(w chan struct{}, frames int, goroutines int) { return } if goroutines == 0 { + // Record which goroutine this is so we can ignore it + // in the traceback if it hasn't finished exiting by + // the time we printStack. + if !main { + ignoreGoroutines[goroutineID()] = true + } + // Start the next goroutine now that there are no more recursions left // for this current goroutine. - go recurseThenCallGo(w, frames-1, numFrames) + go recurseThenCallGo(w, frames-1, numFrames, false) return } - recurseThenCallGo(w, frames, goroutines-1) + recurseThenCallGo(w, frames, goroutines-1, main) +} + +func goroutineID() string { + buf := make([]byte, 128) + runtime.Stack(buf, false) + const prefix = "goroutine " + if !bytes.HasPrefix(buf, []byte(prefix)) { + panic(fmt.Sprintf("expected %q at beginning of traceback:\n%s", prefix, buf)) + } + buf = buf[len(prefix):] + n := bytes.IndexByte(buf, ' ') + return string(buf[:n]) } diff --git a/libgo/go/runtime/testdata/testprogcgo/exec.go b/libgo/go/runtime/testdata/testprogcgo/exec.go index 2e948401c87..94da5dc526b 100644 --- a/libgo/go/runtime/testdata/testprogcgo/exec.go +++ b/libgo/go/runtime/testdata/testprogcgo/exec.go @@ -75,6 +75,14 @@ func CgoExecSignalMask() { cmd.Stdout = os.Stdout cmd.Stderr = os.Stderr if err := cmd.Run(); err != nil { + // An overloaded system + // may fail with EAGAIN. + // This doesn't tell us + // anything useful; ignore it. + // Issue #27731. + if isEAGAIN(err) { + return + } fmt.Printf("iteration %d: %v\n", j, err) os.Exit(1) } @@ -87,3 +95,11 @@ func CgoExecSignalMask() { fmt.Println("OK") } + +// isEAGAIN reports whether err is an EAGAIN error from a process execution. +func isEAGAIN(err error) bool { + if p, ok := err.(*os.PathError); ok { + err = p.Err + } + return err == syscall.EAGAIN +} diff --git a/libgo/go/runtime/testdata/testprogcgo/pprof.go b/libgo/go/runtime/testdata/testprogcgo/pprof.go index c4fde0251ae..9fa5c1b518a 100644 --- a/libgo/go/runtime/testdata/testprogcgo/pprof.go +++ b/libgo/go/runtime/testdata/testprogcgo/pprof.go @@ -28,6 +28,9 @@ void cpuHog() { salt2 = foo; } +void cpuHog2() { +} + static int cpuHogCount; struct cgoTracebackArg { @@ -39,10 +42,13 @@ struct cgoTracebackArg { // pprofCgoTraceback is passed to runtime.SetCgoTraceback. // For testing purposes it pretends that all CPU hits in C code are in cpuHog. +// Issue #29034: At least 2 frames are required to verify all frames are captured +// since runtime/pprof ignores the runtime.goexit base frame if it exists. void pprofCgoTraceback(void* parg) { struct cgoTracebackArg* arg = (struct cgoTracebackArg*)(parg); arg->buf[0] = (uintptr_t)(cpuHog) + 0x10; - arg->buf[1] = 0; + arg->buf[1] = (uintptr_t)(cpuHog2) + 0x4; + arg->buf[2] = 0; ++cpuHogCount; } diff --git a/libgo/go/runtime/testdata/testprogcgo/threadpprof.go b/libgo/go/runtime/testdata/testprogcgo/threadpprof.go index bec4ef50d76..f803511c526 100644 --- a/libgo/go/runtime/testdata/testprogcgo/threadpprof.go +++ b/libgo/go/runtime/testdata/testprogcgo/threadpprof.go @@ -31,6 +31,9 @@ void cpuHogThread() { threadSalt2 = foo; } +void cpuHogThread2() { +} + static int cpuHogThreadCount; struct cgoTracebackArg { @@ -45,7 +48,8 @@ struct cgoTracebackArg { void pprofCgoThreadTraceback(void* parg) { struct cgoTracebackArg* arg = (struct cgoTracebackArg*)(parg); arg->buf[0] = (uintptr_t)(cpuHogThread) + 0x10; - arg->buf[1] = 0; + arg->buf[1] = (uintptr_t)(cpuHogThread2) + 0x4; + arg->buf[2] = 0; __sync_add_and_fetch(&cpuHogThreadCount, 1); } diff --git a/libgo/go/runtime/time.go b/libgo/go/runtime/time.go index ea61baa9195..71e7556776e 100644 --- a/libgo/go/runtime/time.go +++ b/libgo/go/runtime/time.go @@ -7,7 +7,7 @@ package runtime import ( - "runtime/internal/sys" + "internal/cpu" "unsafe" ) @@ -50,7 +50,7 @@ var timers [timersLen]struct { // The padding should eliminate false sharing // between timersBucket values. - pad [sys.CacheLineSize - unsafe.Sizeof(timersBucket{})%sys.CacheLineSize]byte + pad [cpu.CacheLinePadSize - unsafe.Sizeof(timersBucket{})%cpu.CacheLinePadSize]byte } func (t *timer) assignBucket() *timersBucket { @@ -156,7 +156,7 @@ func (tb *timersBucket) addtimerLocked(t *timer) bool { } if t.i == 0 { // siftup moved to top: new earliest deadline. - if tb.sleeping { + if tb.sleeping && tb.sleepUntil > t.when { tb.sleeping = false notewakeup(&tb.waitnote) } @@ -164,11 +164,11 @@ func (tb *timersBucket) addtimerLocked(t *timer) bool { tb.rescheduling = false goready(tb.gp, 0) } - } - if !tb.created { - tb.created = true - expectSystemGoroutine() - go timerproc(tb) + if !tb.created { + tb.created = true + expectSystemGoroutine() + go timerproc(tb) + } } return true } @@ -188,14 +188,22 @@ func deltimer(t *timer) bool { tb := t.tb lock(&tb.lock) + removed, ok := tb.deltimerLocked(t) + unlock(&tb.lock) + if !ok { + badTimer() + } + return removed +} + +func (tb *timersBucket) deltimerLocked(t *timer) (removed, ok bool) { // t may not be registered anymore and may have // a bogus i (typically 0, if generated by Go). // Verify it before proceeding. i := t.i last := len(tb.t) - 1 if i < 0 || i > last || tb.t[i] != t { - unlock(&tb.lock) - return false + return false, true } if i != last { tb.t[i] = tb.t[last] @@ -203,7 +211,7 @@ func deltimer(t *timer) bool { } tb.t[last] = nil tb.t = tb.t[:last] - ok := true + ok = true if i != last { if !siftupTimer(tb.t, i) { ok = false @@ -212,11 +220,26 @@ func deltimer(t *timer) bool { ok = false } } + return true, ok +} + +func modtimer(t *timer, when, period int64, f func(interface{}, uintptr), arg interface{}, seq uintptr) { + tb := t.tb + + lock(&tb.lock) + _, ok := tb.deltimerLocked(t) + if ok { + t.when = when + t.period = period + t.f = f + t.arg = arg + t.seq = seq + ok = tb.addtimerLocked(t) + } unlock(&tb.lock) if !ok { badTimer() } - return true } // Timerproc runs the time-driven events. @@ -438,23 +461,3 @@ func siftdownTimer(t []*timer, i int) bool { func badTimer() { panic(errorString("racy use of timers")) } - -// Entry points for net, time to call nanotime. - -//go:linkname poll_runtimeNano internal..z2fpoll.runtimeNano -func poll_runtimeNano() int64 { - return nanotime() -} - -//go:linkname time_runtimeNano time.runtimeNano -func time_runtimeNano() int64 { - return nanotime() -} - -// Monotonic times are reported as offsets from startNano. -// We initialize startNano to nanotime() - 1 so that on systems where -// monotonic time resolution is fairly low (e.g. Windows 2008 -// which appears to have a default resolution of 15ms), -// we avoid ever reporting a nanotime of 0. -// (Callers may want to use 0 as "time not set".) -var startNano int64 = nanotime() - 1 diff --git a/libgo/go/runtime/timeasm.go b/libgo/go/runtime/timeasm.go index 55b0d0729fb..6bfaa8a8f0e 100644 --- a/libgo/go/runtime/timeasm.go +++ b/libgo/go/runtime/timeasm.go @@ -3,8 +3,6 @@ // license that can be found in the LICENSE file. // Declarations for operating systems implementing time.now directly in assembly. -// Those systems are also expected to have nanotime subtract startNano, -// so that time.now and nanotime return the same monotonic clock readings. // +build ignore // +build windows diff --git a/libgo/go/runtime/timestub.go b/libgo/go/runtime/timestub.go index 9f1d111f4fd..14165cee974 100644 --- a/libgo/go/runtime/timestub.go +++ b/libgo/go/runtime/timestub.go @@ -14,5 +14,5 @@ import _ "unsafe" // for go:linkname //go:linkname time_now time.now func time_now() (sec int64, nsec int32, mono int64) { sec, nsec = walltime() - return sec, nsec, nanotime() - startNano + return sec, nsec, nanotime() } diff --git a/libgo/go/runtime/timestub2.go b/libgo/go/runtime/timestub2.go index 9ddc6fed918..00c2c55f461 100644 --- a/libgo/go/runtime/timestub2.go +++ b/libgo/go/runtime/timestub2.go @@ -5,6 +5,7 @@ // +build !darwin // +build !windows // +build !freebsd +// +build !aix package runtime diff --git a/libgo/go/runtime/trace.go b/libgo/go/runtime/trace.go index 530d5e4c86e..6db5b62558e 100644 --- a/libgo/go/runtime/trace.go +++ b/libgo/go/runtime/trace.go @@ -533,12 +533,12 @@ func traceEvent(ev byte, skip int, args ...uint64) { } func traceEventLocked(extraBytes int, mp *m, pid int32, bufp *traceBufPtr, ev byte, skip int, args ...uint64) { - buf := (*bufp).ptr() + buf := bufp.ptr() // TODO: test on non-zero extraBytes param. maxSize := 2 + 5*traceBytesPerNumber + extraBytes // event type, length, sequence, timestamp, stack id and two add params if buf == nil || len(buf.arr)-buf.pos < maxSize { buf = traceFlush(traceBufPtrOf(buf), pid).ptr() - (*bufp).set(buf) + bufp.set(buf) } ticks := uint64(cputicks()) / traceTickDiv @@ -585,7 +585,7 @@ func traceStackID(mp *m, buf []location, skip int) uint64 { gp := mp.curg var nstk int if gp == _g_ { - nstk = callers(skip+1, buf[:]) + nstk = callers(skip+1, buf) } else if gp != nil { // FIXME: get stack trace of different goroutine. } @@ -689,11 +689,11 @@ func traceString(bufp *traceBufPtr, pid int32, s string) (uint64, *traceBufPtr) // so there must be no memory allocation or any activities // that causes tracing after this point. - buf := (*bufp).ptr() + buf := bufp.ptr() size := 1 + 2*traceBytesPerNumber + len(s) if buf == nil || len(buf.arr)-buf.pos < size { buf = traceFlush(traceBufPtrOf(buf), pid).ptr() - (*bufp).set(buf) + bufp.set(buf) } buf.byte(traceEvString) buf.varint(id) @@ -708,7 +708,7 @@ func traceString(bufp *traceBufPtr, pid int32, s string) (uint64, *traceBufPtr) buf.varint(uint64(slen)) buf.pos += copy(buf.arr[buf.pos:], s[:slen]) - (*bufp).set(buf) + bufp.set(buf) return id, bufp } @@ -1201,7 +1201,7 @@ func trace_userLog(id uint64, category, message string) { traceEventLocked(extraSpace, mp, pid, bufp, traceEvUserLog, 3, id, categoryID) // traceEventLocked reserved extra space for val and len(val) // in buf, so buf now has room for the following. - buf := (*bufp).ptr() + buf := bufp.ptr() // double-check the message and its length can fit. // Otherwise, truncate the message. diff --git a/libgo/go/runtime/trace/annotation.go b/libgo/go/runtime/trace/annotation.go index d5a7d003fe2..82cb232dba9 100644 --- a/libgo/go/runtime/trace/annotation.go +++ b/libgo/go/runtime/trace/annotation.go @@ -171,7 +171,7 @@ func (r *Region) End() { userRegion(r.id, regionEndCode, r.regionType) } -// IsEnabled returns whether tracing is enabled. +// IsEnabled reports whether tracing is enabled. // The information is advisory only. The tracing status // may have changed by the time this function returns. func IsEnabled() bool { diff --git a/libgo/go/runtime/traceback_gccgo.go b/libgo/go/runtime/traceback_gccgo.go index 72a83a5ab2d..7581798e852 100644 --- a/libgo/go/runtime/traceback_gccgo.go +++ b/libgo/go/runtime/traceback_gccgo.go @@ -21,16 +21,20 @@ func printcreatedby(gp *g) { tracepc -= sys.PCQuantum } function, file, line := funcfileline(tracepc, -1) - if function != "" && showframe(function, gp) && gp.goid != 1 { - print("created by ", function, "\n") - print("\t", file, ":", line) - if entry != 0 && pc > entry { - print(" +", hex(pc-entry)) - } - print("\n") + if function != "" && showframe(function, gp, false) && gp.goid != 1 { + printcreatedby1(function, file, line, entry, pc) } } +func printcreatedby1(function, file string, line int, entry, pc uintptr) { + print("created by ", function, "\n") + print("\t", file, ":", line) + if entry != 0 && pc > entry { + print(" +", hex(pc-entry)) + } + print("\n") +} + // tracebackg is used to collect stack traces from other goroutines. type tracebackg struct { gp *g @@ -63,50 +67,108 @@ func callers(skip int, locbuf []location) int { func traceback(skip int32) { var locbuf [100]location c := c_callers(skip+1, &locbuf[0], int32(len(locbuf)), false) - printtrace(locbuf[:c], getg()) - printcreatedby(getg()) + gp := getg() + printtrace(locbuf[:c], gp) + printcreatedby(gp) + + if gp.ancestors == nil { + return + } + for _, ancestor := range *gp.ancestors { + printAncestorTraceback(ancestor) + } +} + +// printAncestorTraceback prints the traceback of the given ancestor. +func printAncestorTraceback(ancestor ancestorInfo) { + print("[originating from goroutine ", ancestor.goid, "]:\n") + for fidx, pc := range ancestor.pcs { + function, file, line := funcfileline(pc, -1) + if showfuncinfo(function, fidx == 0) { + printAncestorTracebackFuncInfo(function, file, line, pc) + } + } + if len(ancestor.pcs) == _TracebackMaxFrames { + print("...additional frames elided...\n") + } + // Show what created goroutine, except main goroutine (goid 1). + function, file, line := funcfileline(ancestor.gopc, -1) + if function != "" && showfuncinfo(function, false) && ancestor.goid != 1 { + printcreatedby1(function, file, line, funcentry(ancestor.gopc), ancestor.gopc) + } +} + +// printAncestorTraceback prints the given function info at a given pc +// within an ancestor traceback. The precision of this info is reduced +// due to only have access to the pcs at the time of the caller +// goroutine being created. +func printAncestorTracebackFuncInfo(name, file string, line int, pc uintptr) { + if name == "runtime.gopanic" { + name = "panic" + } + print(name, "(...)\n") + print("\t", file, ":", line) + entry := funcentry(pc) + if pc > entry { + print(" +", hex(pc-entry)) + } + print("\n") } // printtrace prints a traceback from locbuf. func printtrace(locbuf []location, gp *g) { + nprint := 0 for i := range locbuf { - if showframe(locbuf[i].function, gp) { + if showframe(locbuf[i].function, gp, nprint == 0) { name := locbuf[i].function if name == "runtime.gopanic" { name = "panic" } print(name, "\n\t", locbuf[i].filename, ":", locbuf[i].lineno, "\n") + nprint++ } } } // showframe returns whether to print a frame in a traceback. // name is the function name. -func showframe(name string, gp *g) bool { +func showframe(name string, gp *g, firstFrame bool) bool { g := getg() if g.m.throwing > 0 && gp != nil && (gp == g.m.curg || gp == g.m.caughtsig.ptr()) { return true } + return showfuncinfo(name, firstFrame) +} +func showfuncinfo(name string, firstFrame bool) bool { // Gccgo can trace back through C functions called via cgo. // We want to print those in the traceback. // But unless GOTRACEBACK > 1 (checked below), still skip // internal C functions and cgo-generated functions. - if name != "" && !contains(name, ".") && !hasprefix(name, "__go_") && !hasprefix(name, "_cgo_") { + if name != "" && !contains(name, ".") && !hasPrefix(name, "__go_") && !hasPrefix(name, "_cgo_") { return true } level, _, _ := gotraceback() + if level > 1 { + // Show all frames. + return true + } + + if name == "" { + return false + } - // Special case: always show runtime.gopanic frame, so that we can - // see where a panic started in the middle of a stack trace. + // Special case: always show runtime.gopanic frame + // in the middle of a stack trace, so that we can + // see the boundary between ordinary code and + // panic-induced deferred code. // See golang.org/issue/5832. - // __go_panic is the current gccgo name. - if name == "runtime.gopanic" || name == "__go_panic" { + if name == "runtime.gopanic" && !firstFrame { return true } - return level > 1 || contains(name, ".") && (!hasprefix(name, "runtime.") || isExportedRuntime(name)) + return contains(name, ".") && (!hasPrefix(name, "runtime.") || isExportedRuntime(name)) } // isExportedRuntime reports whether name is an exported runtime function. @@ -115,7 +177,7 @@ func showframe(name string, gp *g) bool { // "runtime..z2f". func isExportedRuntime(name string) bool { const n = len("runtime.") - if hasprefix(name, "runtime..z2f") { + if hasPrefix(name, "runtime..z2f") { return true } return len(name) > n && name[:n] == "runtime." && 'A' <= name[n] && name[n] <= 'Z' @@ -169,10 +231,24 @@ func goroutineheader(gp *g) { print("]:\n") } -// isSystemGoroutine reports whether the goroutine g must be omitted in -// stack dumps and deadlock detector. -func isSystemGoroutine(gp *g) bool { - return gp.isSystemGoroutine +// isSystemGoroutine reports whether the goroutine g must be omitted +// in stack dumps and deadlock detector. This is any goroutine that +// starts at a runtime.* entry point, except for runtime.main and +// sometimes runtime.runfinq. +// +// If fixed is true, any goroutine that can vary between user and +// system (that is, the finalizer goroutine) is considered a user +// goroutine. +func isSystemGoroutine(gp *g, fixed bool) bool { + if !gp.isSystemGoroutine { + return false + } + if fixed && gp.isFinalizerGoroutine { + // This goroutine can vary. In fixed mode, + // always consider it a user goroutine. + return false + } + return true } func tracebackothers(me *g) { @@ -200,7 +276,7 @@ func tracebackothers(me *g) { lock(&allglock) for _, gp := range allgs { - if gp == me || gp == g.m.curg || readgstatus(gp) == _Gdead || isSystemGoroutine(gp) && level < 2 { + if gp == me || gp == g.m.curg || readgstatus(gp) == _Gdead || isSystemGoroutine(gp, false) && level < 2 { continue } print("\n") |