diff options
author | Russ Cox <rsc@golang.org> | 2014-12-05 11:40:41 -0500 |
---|---|---|
committer | Russ Cox <rsc@golang.org> | 2014-12-05 11:40:41 -0500 |
commit | 60e02bbbea5d3bf4a6b06551f09c984285f8139c (patch) | |
tree | 69e27f09db888f62c2aa9db29ab0c7233c1b286b | |
parent | 6d3ba1914e289ed223f7bb69f34604c0e2ae5384 (diff) | |
parent | e5fc9ffb729e31c4eb0a6518e819e9fc70f14818 (diff) | |
download | go-60e02bbbea5d3bf4a6b06551f09c984285f8139c.tar.gz |
[dev.garbage] all: merge dev.cc (81884b89bd88) into dev.garbage
TBR=rlh
CC=golang-codereviews
https://codereview.appspot.com/181100044
43 files changed, 1874 insertions, 552 deletions
diff --git a/lib/codereview/codereview.py b/lib/codereview/codereview.py index 9181f1df3..0c9b27a31 100644 --- a/lib/codereview/codereview.py +++ b/lib/codereview/codereview.py @@ -2024,13 +2024,13 @@ def submit(ui, repo, *pats, **opts): # push to remote; if it fails for any reason, roll back try: new_heads = len(hg_heads(ui, repo).split()) - if old_heads != new_heads and not (old_heads == 0 and new_heads == 1): + if cl.desc.find("create new branch") < 0 and old_heads != new_heads and not (old_heads == 0 and new_heads == 1): # Created new head, so we weren't up to date. need_sync() # Push changes to remote. If it works, we're committed. If not, roll back. try: - if hg_push(ui, repo): + if hg_push(ui, repo, new_branch=cl.desc.find("create new branch")>=0): raise hg_util.Abort("push error") except hg_error.Abort, e: if e.message.find("push creates new heads") >= 0: diff --git a/src/cmd/api/goapi.go b/src/cmd/api/goapi.go index 5a8c87603..e49ba33bb 100644 --- a/src/cmd/api/goapi.go +++ b/src/cmd/api/goapi.go @@ -405,6 +405,7 @@ func (w *Walker) parseFile(dir, file string) (*ast.File, error) { " note struct{};" + " p struct{};" + " parfor struct{};" + + " slice struct{};" + " slicetype struct{};" + " stkframe struct{};" + " sudog struct{};" + diff --git a/src/cmd/gc/builtin.c b/src/cmd/gc/builtin.c index fbca4ee5f..aeeadedca 100644 --- a/src/cmd/gc/builtin.c +++ b/src/cmd/gc/builtin.c @@ -24,6 +24,8 @@ char *runtimeimport = "func @\"\".printslice (? any)\n" "func @\"\".printnl ()\n" "func @\"\".printsp ()\n" + "func @\"\".printlock ()\n" + "func @\"\".printunlock ()\n" "func @\"\".concatstring2 (? string, ? string) (? string)\n" "func @\"\".concatstring3 (? string, ? string, ? string) (? string)\n" "func @\"\".concatstring4 (? string, ? string, ? string, ? string) (? string)\n" @@ -86,10 +88,33 @@ char *runtimeimport = "func @\"\".writebarrierstring (@\"\".dst·1 *any, @\"\".src·2 any)\n" "func @\"\".writebarrierslice (@\"\".dst·1 *any, @\"\".src·2 any)\n" "func @\"\".writebarrieriface (@\"\".dst·1 *any, @\"\".src·2 any)\n" - "func @\"\".writebarrierfat2 (@\"\".dst·1 *any, _ *byte, @\"\".src·3 any)\n" - "func @\"\".writebarrierfat3 (@\"\".dst·1 *any, _ *byte, @\"\".src·3 any)\n" - "func @\"\".writebarrierfat4 (@\"\".dst·1 *any, _ *byte, @\"\".src·3 any)\n" + "func @\"\".writebarrierfat01 (@\"\".dst·1 *any, _ *byte, @\"\".src·3 any)\n" + "func @\"\".writebarrierfat10 (@\"\".dst·1 *any, _ *byte, @\"\".src·3 any)\n" + "func @\"\".writebarrierfat11 (@\"\".dst·1 *any, _ *byte, @\"\".src·3 any)\n" + "func @\"\".writebarrierfat001 (@\"\".dst·1 *any, _ *byte, @\"\".src·3 any)\n" + "func @\"\".writebarrierfat010 (@\"\".dst·1 *any, _ *byte, @\"\".src·3 any)\n" + "func @\"\".writebarrierfat011 (@\"\".dst·1 *any, _ *byte, @\"\".src·3 any)\n" + "func @\"\".writebarrierfat100 (@\"\".dst·1 *any, _ *byte, @\"\".src·3 any)\n" + "func @\"\".writebarrierfat101 (@\"\".dst·1 *any, _ *byte, @\"\".src·3 any)\n" + "func @\"\".writebarrierfat110 (@\"\".dst·1 *any, _ *byte, @\"\".src·3 any)\n" + "func @\"\".writebarrierfat111 (@\"\".dst·1 *any, _ *byte, @\"\".src·3 any)\n" + "func @\"\".writebarrierfat0001 (@\"\".dst·1 *any, _ *byte, @\"\".src·3 any)\n" + "func @\"\".writebarrierfat0010 (@\"\".dst·1 *any, _ *byte, @\"\".src·3 any)\n" + "func @\"\".writebarrierfat0011 (@\"\".dst·1 *any, _ *byte, @\"\".src·3 any)\n" + "func @\"\".writebarrierfat0100 (@\"\".dst·1 *any, _ *byte, @\"\".src·3 any)\n" + "func @\"\".writebarrierfat0101 (@\"\".dst·1 *any, _ *byte, @\"\".src·3 any)\n" + "func @\"\".writebarrierfat0110 (@\"\".dst·1 *any, _ *byte, @\"\".src·3 any)\n" + "func @\"\".writebarrierfat0111 (@\"\".dst·1 *any, _ *byte, @\"\".src·3 any)\n" + "func @\"\".writebarrierfat1000 (@\"\".dst·1 *any, _ *byte, @\"\".src·3 any)\n" + "func @\"\".writebarrierfat1001 (@\"\".dst·1 *any, _ *byte, @\"\".src·3 any)\n" + "func @\"\".writebarrierfat1010 (@\"\".dst·1 *any, _ *byte, @\"\".src·3 any)\n" + "func @\"\".writebarrierfat1011 (@\"\".dst·1 *any, _ *byte, @\"\".src·3 any)\n" + "func @\"\".writebarrierfat1100 (@\"\".dst·1 *any, _ *byte, @\"\".src·3 any)\n" + "func @\"\".writebarrierfat1101 (@\"\".dst·1 *any, _ *byte, @\"\".src·3 any)\n" + "func @\"\".writebarrierfat1110 (@\"\".dst·1 *any, _ *byte, @\"\".src·3 any)\n" + "func @\"\".writebarrierfat1111 (@\"\".dst·1 *any, _ *byte, @\"\".src·3 any)\n" "func @\"\".writebarrierfat (@\"\".typ·1 *byte, @\"\".dst·2 *any, @\"\".src·3 *any)\n" + "func @\"\".writebarriercopy (@\"\".typ·2 *byte, @\"\".dst·3 any, @\"\".src·4 any) (? int)\n" "func @\"\".selectnbsend (@\"\".chanType·2 *byte, @\"\".hchan·3 chan<- any, @\"\".elem·4 *any) (? bool)\n" "func @\"\".selectnbrecv (@\"\".chanType·2 *byte, @\"\".elem·3 *any, @\"\".hchan·4 <-chan any) (? bool)\n" "func @\"\".selectnbrecv2 (@\"\".chanType·2 *byte, @\"\".elem·3 *any, @\"\".received·4 *bool, @\"\".hchan·5 <-chan any) (? bool)\n" diff --git a/src/cmd/gc/go.h b/src/cmd/gc/go.h index 6e326961d..5236305f8 100644 --- a/src/cmd/gc/go.h +++ b/src/cmd/gc/go.h @@ -1473,6 +1473,7 @@ void walk(Node *fn); void walkexpr(Node **np, NodeList **init); void walkexprlist(NodeList *l, NodeList **init); void walkexprlistsafe(NodeList *l, NodeList **init); +void walkexprlistcheap(NodeList *l, NodeList **init); void walkstmt(Node **np); void walkstmtlist(NodeList *l); Node* conv(Node*, Type*); diff --git a/src/cmd/gc/popt.c b/src/cmd/gc/popt.c index 993bb2482..6e6db88ef 100644 --- a/src/cmd/gc/popt.c +++ b/src/cmd/gc/popt.c @@ -847,6 +847,10 @@ nilopt(Prog *firstp) Graph *g; int ncheck, nkill; + // TODO(minux): nilopt on power64 throw away seemly random segment of code. + if(thechar == '9') + return; + g = flowstart(firstp, sizeof(NilFlow)); if(g == nil) return; diff --git a/src/cmd/gc/runtime.go b/src/cmd/gc/runtime.go index 0fb15c265..c6007714c 100644 --- a/src/cmd/gc/runtime.go +++ b/src/cmd/gc/runtime.go @@ -36,6 +36,8 @@ func printeface(any) func printslice(any) func printnl() func printsp() +func printlock() +func printunlock() func concatstring2(string, string) string func concatstring3(string, string, string) string @@ -115,10 +117,35 @@ func writebarrieriface(dst *any, src any) // The unused *byte argument makes sure that src is 2-pointer-aligned, // which is the maximum alignment on NaCl amd64p32 // (and possibly on 32-bit systems if we start 64-bit aligning uint64s). -func writebarrierfat2(dst *any, _ *byte, src any) -func writebarrierfat3(dst *any, _ *byte, src any) -func writebarrierfat4(dst *any, _ *byte, src any) +// The bitmap in the name tells which words being copied are pointers. +func writebarrierfat01(dst *any, _ *byte, src any) +func writebarrierfat10(dst *any, _ *byte, src any) +func writebarrierfat11(dst *any, _ *byte, src any) +func writebarrierfat001(dst *any, _ *byte, src any) +func writebarrierfat010(dst *any, _ *byte, src any) +func writebarrierfat011(dst *any, _ *byte, src any) +func writebarrierfat100(dst *any, _ *byte, src any) +func writebarrierfat101(dst *any, _ *byte, src any) +func writebarrierfat110(dst *any, _ *byte, src any) +func writebarrierfat111(dst *any, _ *byte, src any) +func writebarrierfat0001(dst *any, _ *byte, src any) +func writebarrierfat0010(dst *any, _ *byte, src any) +func writebarrierfat0011(dst *any, _ *byte, src any) +func writebarrierfat0100(dst *any, _ *byte, src any) +func writebarrierfat0101(dst *any, _ *byte, src any) +func writebarrierfat0110(dst *any, _ *byte, src any) +func writebarrierfat0111(dst *any, _ *byte, src any) +func writebarrierfat1000(dst *any, _ *byte, src any) +func writebarrierfat1001(dst *any, _ *byte, src any) +func writebarrierfat1010(dst *any, _ *byte, src any) +func writebarrierfat1011(dst *any, _ *byte, src any) +func writebarrierfat1100(dst *any, _ *byte, src any) +func writebarrierfat1101(dst *any, _ *byte, src any) +func writebarrierfat1110(dst *any, _ *byte, src any) +func writebarrierfat1111(dst *any, _ *byte, src any) + func writebarrierfat(typ *byte, dst *any, src *any) +func writebarriercopy(typ *byte, dst any, src any) int func selectnbsend(chanType *byte, hchan chan<- any, elem *any) bool func selectnbrecv(chanType *byte, elem *any, hchan <-chan any) bool diff --git a/src/cmd/gc/typecheck.c b/src/cmd/gc/typecheck.c index 714c66268..f05d8022d 100644 --- a/src/cmd/gc/typecheck.c +++ b/src/cmd/gc/typecheck.c @@ -2891,7 +2891,8 @@ typecheckas(Node *n) case OSLICE3: case OSLICESTR: // For x = x[0:y], x can be updated in place, without touching pointer. - if(samesafeexpr(n->left, n->right->left) && (n->right->right->left == N || iszero(n->right->right->left))) + // TODO(rsc): Reenable once it is actually updated in place without touching the pointer. + if(0 && samesafeexpr(n->left, n->right->left) && (n->right->right->left == N || iszero(n->right->right->left))) n->right->reslice = 1; break; @@ -2899,7 +2900,8 @@ typecheckas(Node *n) // For x = append(x, ...), x can be updated in place when there is capacity, // without touching the pointer; otherwise the emitted code to growslice // can take care of updating the pointer, and only in that case. - if(n->right->list != nil && samesafeexpr(n->left, n->right->list->n)) + // TODO(rsc): Reenable once the emitted code does update the pointer. + if(0 && n->right->list != nil && samesafeexpr(n->left, n->right->list->n)) n->right->reslice = 1; break; } diff --git a/src/cmd/gc/walk.c b/src/cmd/gc/walk.c index 77f9c80f9..061089349 100644 --- a/src/cmd/gc/walk.c +++ b/src/cmd/gc/walk.c @@ -6,6 +6,7 @@ #include <libc.h> #include "go.h" #include "../ld/textflag.h" +#include "../../runtime/mgc0.h" static Node* walkprint(Node*, NodeList**); static Node* writebarrierfn(char*, Type*, Type*); @@ -363,6 +364,15 @@ walkexprlistsafe(NodeList *l, NodeList **init) } void +walkexprlistcheap(NodeList *l, NodeList **init) +{ + for(; l; l=l->next) { + l->n = cheapexpr(l->n, init); + walkexpr(&l->n, init); + } +} + +void walkexpr(Node **np, NodeList **init) { Node *r, *l, *var, *a; @@ -1771,6 +1781,11 @@ walkprint(Node *nn, NodeList **init) calls = nil; notfirst = 0; + // Hoist all the argument evaluation up before the lock. + walkexprlistcheap(all, init); + + calls = list(calls, mkcall("printlock", T, init)); + for(l=all; l; l=l->next) { if(notfirst) { calls = list(calls, mkcall("printsp", T, init)); @@ -1851,6 +1866,9 @@ walkprint(Node *nn, NodeList **init) if(op == OPRINTN) calls = list(calls, mkcall("printnl", T, nil)); + + calls = list(calls, mkcall("printunlock", T, init)); + typechecklist(calls, Etop); walkexprlist(calls, init); @@ -1987,6 +2005,9 @@ applywritebarrier(Node *n, NodeList **init) { Node *l, *r; Type *t; + vlong x; + static Bvec *bv; + char name[32]; if(n->left && n->right && needwritebarrier(n->left, n->right)) { t = n->left->type; @@ -2004,14 +2025,35 @@ applywritebarrier(Node *n, NodeList **init) } else if(isinter(t)) { n = mkcall1(writebarrierfn("writebarrieriface", t, n->right->type), T, init, l, n->right); - } else if(t->width == 2*widthptr) { - n = mkcall1(writebarrierfn("writebarrierfat2", t, n->right->type), T, init, - l, nodnil(), n->right); - } else if(t->width == 3*widthptr) { - n = mkcall1(writebarrierfn("writebarrierfat3", t, n->right->type), T, init, - l, nodnil(), n->right); - } else if(t->width == 4*widthptr) { - n = mkcall1(writebarrierfn("writebarrierfat4", t, n->right->type), T, init, + } else if(t->width <= 4*widthptr) { + x = 0; + if(bv == nil) + bv = bvalloc(BitsPerPointer*4); + bvresetall(bv); + twobitwalktype1(t, &x, bv); + // The bvgets are looking for BitsPointer in successive slots. + enum { + PtrBit = 1, + }; + if(BitsPointer != (1<<PtrBit)) + fatal("wrong PtrBit"); + switch(t->width/widthptr) { + default: + fatal("found writebarrierfat for %d-byte object of type %T", (int)t->width, t); + case 2: + snprint(name, sizeof name, "writebarrierfat%d%d", + bvget(bv, PtrBit), bvget(bv, BitsPerPointer+PtrBit)); + break; + case 3: + snprint(name, sizeof name, "writebarrierfat%d%d%d", + bvget(bv, PtrBit), bvget(bv, BitsPerPointer+PtrBit), bvget(bv, 2*BitsPerPointer+PtrBit)); + break; + case 4: + snprint(name, sizeof name, "writebarrierfat%d%d%d%d", + bvget(bv, PtrBit), bvget(bv, BitsPerPointer+PtrBit), bvget(bv, 2*BitsPerPointer+PtrBit), bvget(bv, 3*BitsPerPointer+PtrBit)); + break; + } + n = mkcall1(writebarrierfn(name, t, n->right->type), T, init, l, nodnil(), n->right); } else { r = n->right; @@ -2873,6 +2915,11 @@ copyany(Node *n, NodeList **init, int runtimecall) { Node *nl, *nr, *nfrm, *nto, *nif, *nlen, *nwid, *fn; NodeList *l; + + if(haspointers(n->left->type->type)) { + fn = writebarrierfn("writebarriercopy", n->left->type, n->right->type); + return mkcall1(fn, n->type, init, typename(n->left->type->type), n->left, n->right); + } if(runtimecall) { if(n->right->type->etype == TSTRING) diff --git a/src/run.bash b/src/run.bash index 6b9ecc33c..b8ce417a0 100755 --- a/src/run.bash +++ b/src/run.bash @@ -162,7 +162,8 @@ esac # Race detector only supported on Linux, FreeBSD and OS X, # and only on amd64, and only when cgo is enabled. # Delayed until here so we know whether to try external linking. -case "$GOHOSTOS-$GOOS-$GOARCH-$CGO_ENABLED" in +# DISABLED until we get garbage collection working. +case "$GOHOSTOS-$GOOS-$GOARCH-$CGO_ENABLED-XXX-DISABLED" in linux-linux-amd64-1 | freebsd-freebsd-amd64-1 | darwin-darwin-amd64-1) echo echo '# Testing race detector.' diff --git a/src/runtime/asm_386.s b/src/runtime/asm_386.s index a02bb5556..7cc64a3a4 100644 --- a/src/runtime/asm_386.s +++ b/src/runtime/asm_386.s @@ -2285,3 +2285,23 @@ TEXT runtime·getg(SB),NOSPLIT,$0-4 MOVL AX, ret+0(FP) RET +TEXT runtime·prefetcht0(SB),NOSPLIT,$0-4 + MOVL addr+0(FP), AX + PREFETCHT0 (AX) + RET + +TEXT runtime·prefetcht1(SB),NOSPLIT,$0-4 + MOVL addr+0(FP), AX + PREFETCHT1 (AX) + RET + + +TEXT runtime·prefetcht2(SB),NOSPLIT,$0-4 + MOVL addr+0(FP), AX + PREFETCHT2 (AX) + RET + +TEXT runtime·prefetchnta(SB),NOSPLIT,$0-4 + MOVL addr+0(FP), AX + PREFETCHNTA (AX) + RET diff --git a/src/runtime/asm_amd64.s b/src/runtime/asm_amd64.s index 6e3f5ff6c..14be2fe92 100644 --- a/src/runtime/asm_amd64.s +++ b/src/runtime/asm_amd64.s @@ -2228,3 +2228,23 @@ TEXT runtime·getg(SB),NOSPLIT,$0-8 MOVQ g(CX), AX MOVQ AX, ret+0(FP) RET + +TEXT runtime·prefetcht0(SB),NOSPLIT,$0-8 + MOVQ addr+0(FP), AX + PREFETCHT0 (AX) + RET + +TEXT runtime·prefetcht1(SB),NOSPLIT,$0-8 + MOVQ addr+0(FP), AX + PREFETCHT1 (AX) + RET + +TEXT runtime·prefetcht2(SB),NOSPLIT,$0-8 + MOVQ addr+0(FP), AX + PREFETCHT2 (AX) + RET + +TEXT runtime·prefetchnta(SB),NOSPLIT,$0-8 + MOVQ addr+0(FP), AX + PREFETCHNTA (AX) + RET diff --git a/src/runtime/asm_amd64p32.s b/src/runtime/asm_amd64p32.s index b8370efd3..c87d848fe 100644 --- a/src/runtime/asm_amd64p32.s +++ b/src/runtime/asm_amd64p32.s @@ -1079,3 +1079,24 @@ TEXT runtime·getg(SB),NOSPLIT,$0-4 MOVL g(CX), AX MOVL AX, ret+0(FP) RET + +TEXT runtime·prefetcht0(SB),NOSPLIT,$0-4 + MOVL addr+0(FP), AX + PREFETCHT0 (AX) + RET + +TEXT runtime·prefetcht1(SB),NOSPLIT,$0-4 + MOVL addr+0(FP), AX + PREFETCHT1 (AX) + RET + + +TEXT runtime·prefetcht2(SB),NOSPLIT,$0-4 + MOVL addr+0(FP), AX + PREFETCHT2 (AX) + RET + +TEXT runtime·prefetchnta(SB),NOSPLIT,$0-4 + MOVL addr+0(FP), AX + PREFETCHNTA (AX) + RET diff --git a/src/runtime/asm_arm.s b/src/runtime/asm_arm.s index 583c7ba50..c6c98b443 100644 --- a/src/runtime/asm_arm.s +++ b/src/runtime/asm_arm.s @@ -1320,3 +1320,15 @@ TEXT runtime·goexit(SB),NOSPLIT,$-4-0 TEXT runtime·getg(SB),NOSPLIT,$-4-4 MOVW g, ret+0(FP) RET + +TEXT runtime·prefetcht0(SB),NOSPLIT,$0-4 + RET + +TEXT runtime·prefetcht1(SB),NOSPLIT,$0-4 + RET + +TEXT runtime·prefetcht2(SB),NOSPLIT,$0-4 + RET + +TEXT runtime·prefetchnta(SB),NOSPLIT,$0-4 + RET diff --git a/src/runtime/asm_power64x.s b/src/runtime/asm_power64x.s index 3f2ab6d0e..548c88e47 100644 --- a/src/runtime/asm_power64x.s +++ b/src/runtime/asm_power64x.s @@ -977,3 +977,15 @@ TEXT runtime·goexit(SB),NOSPLIT,$-8-0 TEXT runtime·getg(SB),NOSPLIT,$-8-8 MOVD g, ret+0(FP) RETURN + +TEXT runtime·prefetcht0(SB),NOSPLIT,$0-8 + RETURN + +TEXT runtime·prefetcht1(SB),NOSPLIT,$0-8 + RETURN + +TEXT runtime·prefetcht2(SB),NOSPLIT,$0-8 + RETURN + +TEXT runtime·prefetchnta(SB),NOSPLIT,$0-8 + RETURN diff --git a/src/runtime/export_test.go b/src/runtime/export_test.go index e871236e4..5ed255026 100644 --- a/src/runtime/export_test.go +++ b/src/runtime/export_test.go @@ -26,7 +26,7 @@ var Exitsyscall = exitsyscall var LockedOSThread = lockedOSThread type LFNode struct { - Next *LFNode + Next uint64 Pushcnt uintptr } diff --git a/src/runtime/heapdump.go b/src/runtime/heapdump.go index 0c1a60c8b..e1693d40f 100644 --- a/src/runtime/heapdump.go +++ b/src/runtime/heapdump.go @@ -464,8 +464,8 @@ func dumpobjs() { if n > uintptr(len(freemark)) { gothrow("freemark array doesn't have enough entries") } - for l := s.freelist; l != nil; l = l.next { - freemark[(uintptr(unsafe.Pointer(l))-p)/size] = true + for l := s.freelist; l.ptr() != nil; l = l.ptr().next { + freemark[(uintptr(l)-p)/size] = true } for j := uintptr(0); j < n; j, p = j+1, p+size { if freemark[j] { diff --git a/src/runtime/lfstack.go b/src/runtime/lfstack.go index 8a36a67b3..fd3325972 100644 --- a/src/runtime/lfstack.go +++ b/src/runtime/lfstack.go @@ -18,7 +18,7 @@ func lfstackpush(head *uint64, node *lfnode) { } for { old := atomicload64(head) - node.next, _ = lfstackUnpack(old) + node.next = old if cas64(head, old, new) { break } @@ -32,12 +32,8 @@ func lfstackpop(head *uint64) unsafe.Pointer { return nil } node, _ := lfstackUnpack(old) - node2 := (*lfnode)(atomicloadp(unsafe.Pointer(&node.next))) - new := uint64(0) - if node2 != nil { - new = lfstackPack(node2, node2.pushcnt) - } - if cas64(head, old, new) { + next := atomicload64(&node.next) + if cas64(head, old, next) { return unsafe.Pointer(node) } } diff --git a/src/runtime/lfstack_test.go b/src/runtime/lfstack_test.go index e51877704..68f221d6e 100644 --- a/src/runtime/lfstack_test.go +++ b/src/runtime/lfstack_test.go @@ -121,7 +121,7 @@ func TestLFStackStress(t *testing.T) { } cnt++ sum2 += node.data - node.Next = nil + node.Next = 0 } } if cnt != K { diff --git a/src/runtime/malloc.go b/src/runtime/malloc.go index d73d1ba6a..e9fec7bb1 100644 --- a/src/runtime/malloc.go +++ b/src/runtime/malloc.go @@ -140,14 +140,14 @@ func mallocgc(size uintptr, typ *_type, flags uint32) unsafe.Pointer { // Allocate a new maxTinySize block. s = c.alloc[tinySizeClass] v := s.freelist - if v == nil { + if v.ptr() == nil { systemstack(func() { mCache_Refill(c, tinySizeClass) }) s = c.alloc[tinySizeClass] v = s.freelist } - s.freelist = v.next + s.freelist = v.ptr().next s.ref++ //TODO: prefetch v.next x = unsafe.Pointer(v) @@ -170,19 +170,19 @@ func mallocgc(size uintptr, typ *_type, flags uint32) unsafe.Pointer { size = uintptr(class_to_size[sizeclass]) s = c.alloc[sizeclass] v := s.freelist - if v == nil { + if v.ptr() == nil { systemstack(func() { mCache_Refill(c, int32(sizeclass)) }) s = c.alloc[sizeclass] v = s.freelist } - s.freelist = v.next + s.freelist = v.ptr().next s.ref++ //TODO: prefetch x = unsafe.Pointer(v) if flags&flagNoZero == 0 { - v.next = nil + v.ptr().next = 0 if size > 2*ptrSize && ((*[2]uintptr)(x))[1] != 0 { memclr(unsafe.Pointer(v), size) } @@ -241,6 +241,8 @@ func mallocgc(size uintptr, typ *_type, flags uint32) unsafe.Pointer { masksize = masksize * pointersPerByte / 8 // 4 bits per word masksize++ // unroll flag in the beginning if masksize > maxGCMask && typ.gc[1] != 0 { + // write barriers have not been updated to deal with this case yet. + gothrow("maxGCMask too small for now") // If the mask is too large, unroll the program directly // into the GC bitmap. It's 7 times slower than copying // from the pre-unrolled mask, but saves 1/16 of type size @@ -295,6 +297,17 @@ func mallocgc(size uintptr, typ *_type, flags uint32) unsafe.Pointer { } } marked: + + // GCmarkterminate allocates black + // All slots hold nil so no scanning is needed. + // This may be racing with GC so do it atomically if there can be + // a race marking the bit. + if gcphase == _GCmarktermination { + systemstack(func() { + gcmarknewobject_m(uintptr(x)) + }) + } + if raceenabled { racemalloc(x, size) } @@ -328,13 +341,43 @@ marked: } } - if memstats.heap_alloc >= memstats.next_gc { + if memstats.heap_alloc >= memstats.next_gc/2 { gogc(0) } return x } +func loadPtrMask(typ *_type) []uint8 { + var ptrmask *uint8 + nptr := (uintptr(typ.size) + ptrSize - 1) / ptrSize + if typ.kind&kindGCProg != 0 { + masksize := nptr + if masksize%2 != 0 { + masksize *= 2 // repeated + } + masksize = masksize * pointersPerByte / 8 // 4 bits per word + masksize++ // unroll flag in the beginning + if masksize > maxGCMask && typ.gc[1] != 0 { + // write barriers have not been updated to deal with this case yet. + gothrow("maxGCMask too small for now") + } + ptrmask = (*uint8)(unsafe.Pointer(uintptr(typ.gc[0]))) + // Check whether the program is already unrolled + // by checking if the unroll flag byte is set + maskword := uintptr(atomicloadp(unsafe.Pointer(ptrmask))) + if *(*uint8)(unsafe.Pointer(&maskword)) == 0 { + systemstack(func() { + unrollgcprog_m(typ) + }) + } + ptrmask = (*uint8)(add(unsafe.Pointer(ptrmask), 1)) // skip the unroll flag byte + } else { + ptrmask = (*uint8)(unsafe.Pointer(typ.gc[0])) // pointer to unrolled mask + } + return (*[1 << 30]byte)(unsafe.Pointer(ptrmask))[:(nptr+1)/2] +} + // implementation of new builtin func newobject(typ *_type) unsafe.Pointer { flags := uint32(0) @@ -429,7 +472,21 @@ func gogc(force int32) { mp = acquirem() mp.gcing = 1 releasem(mp) + systemstack(stoptheworld) + systemstack(finishsweep_m) // finish sweep before we start concurrent scan. + if true { // To turn on concurrent scan and mark set to true... + systemstack(starttheworld) + // Do a concurrent heap scan before we stop the world. + systemstack(gcscan_m) + systemstack(stoptheworld) + systemstack(gcinstallmarkwb_m) + systemstack(starttheworld) + systemstack(gcmark_m) + systemstack(stoptheworld) + systemstack(gcinstalloffwb_m) + } + if mp != acquirem() { gothrow("gogc: rescheduled") } @@ -445,17 +502,21 @@ func gogc(force int32) { if debug.gctrace > 1 { n = 2 } + eagersweep := force >= 2 for i := 0; i < n; i++ { if i > 0 { startTime = nanotime() } // switch to g0, call gc, then switch back - eagersweep := force >= 2 systemstack(func() { gc_m(startTime, eagersweep) }) } + systemstack(func() { + gccheckmark_m(startTime, eagersweep) + }) + // all done mp.gcing = 0 semrelease(&worldsema) @@ -470,6 +531,14 @@ func gogc(force int32) { } } +func GCcheckmarkenable() { + systemstack(gccheckmarkenable_m) +} + +func GCcheckmarkdisable() { + systemstack(gccheckmarkdisable_m) +} + // GC runs a garbage collection. func GC() { gogc(2) diff --git a/src/runtime/malloc2.go b/src/runtime/malloc2.go index c175c2aec..a9d40de30 100644 --- a/src/runtime/malloc2.go +++ b/src/runtime/malloc2.go @@ -139,10 +139,35 @@ const ( ) // A generic linked list of blocks. (Typically the block is bigger than sizeof(MLink).) +// Since assignments to mlink.next will result in a write barrier being preformed +// this can not be used by some of the internal GC structures. For example when +// the sweeper is placing an unmarked object on the free list it does not want the +// write barrier to be called since that could result in the object being reachable. type mlink struct { next *mlink } +// A gclink is a node in a linked list of blocks, like mlink, +// but it is opaque to the garbage collector. +// The GC does not trace the pointers during collection, +// and the compiler does not emit write barriers for assignments +// of gclinkptr values. Code should store references to gclinks +// as gclinkptr, not as *gclink. +type gclink struct { + next gclinkptr +} + +// A gclinkptr is a pointer to a gclink, but it is opaque +// to the garbage collector. +type gclinkptr uintptr + +// ptr returns the *gclink form of p. +// The result should be used for accessing fields, not stored +// in other data structures. +func (p gclinkptr) ptr() *gclink { + return (*gclink)(unsafe.Pointer(p)) +} + // sysAlloc obtains a large chunk of zeroed memory from the // operating system, typically on the order of a hundred kilobytes // or a megabyte. @@ -275,8 +300,8 @@ type mcachelist struct { } type stackfreelist struct { - list *mlink // linked list of free stacks - size uintptr // total size of stacks in list + list gclinkptr // linked list of free stacks + size uintptr // total size of stacks in list } // Per-thread (in Go, per-P) cache for small objects. @@ -299,8 +324,6 @@ type mcache struct { sudogcache *sudog - gcworkbuf unsafe.Pointer - // Local allocator stats, flushed during GC. local_nlookup uintptr // number of pointer lookups local_largefree uintptr // bytes freed for large objects (>maxsmallsize) @@ -348,11 +371,11 @@ const ( ) type mspan struct { - next *mspan // in a span linked list - prev *mspan // in a span linked list - start pageID // starting page number - npages uintptr // number of pages in span - freelist *mlink // list of free objects + next *mspan // in a span linked list + prev *mspan // in a span linked list + start pageID // starting page number + npages uintptr // number of pages in span + freelist gclinkptr // list of free objects // sweep generation: // if sweepgen == h->sweepgen - 2, the span needs sweeping // if sweepgen == h->sweepgen - 1, the span is currently being swept diff --git a/src/runtime/mcache.go b/src/runtime/mcache.go index d3afef6be..f8389c5cb 100644 --- a/src/runtime/mcache.go +++ b/src/runtime/mcache.go @@ -38,7 +38,12 @@ func freemcache(c *mcache) { systemstack(func() { mCache_ReleaseAll(c) stackcache_clear(c) - gcworkbuffree(c.gcworkbuf) + + // NOTE(rsc,rlh): If gcworkbuffree comes back, we need to coordinate + // with the stealing of gcworkbufs during garbage collection to avoid + // a race where the workbuf is double-freed. + // gcworkbuffree(c.gcworkbuf) + lock(&mheap_.lock) purgecachedstats(c) fixAlloc_Free(&mheap_.cachealloc, unsafe.Pointer(c)) @@ -54,7 +59,7 @@ func mCache_Refill(c *mcache, sizeclass int32) *mspan { _g_.m.locks++ // Return the current cached span to the central lists. s := c.alloc[sizeclass] - if s.freelist != nil { + if s.freelist.ptr() != nil { gothrow("refill on a nonempty span") } if s != &emptymspan { @@ -66,7 +71,7 @@ func mCache_Refill(c *mcache, sizeclass int32) *mspan { if s == nil { gothrow("out of memory") } - if s.freelist == nil { + if s.freelist.ptr() == nil { println(s.ref, (s.npages<<_PageShift)/s.elemsize) gothrow("empty span") } diff --git a/src/runtime/mcentral.go b/src/runtime/mcentral.go index 0d172a08b..ae5c6f1d5 100644 --- a/src/runtime/mcentral.go +++ b/src/runtime/mcentral.go @@ -55,7 +55,7 @@ retry: mSpanList_InsertBack(&c.empty, s) unlock(&c.lock) mSpan_Sweep(s, true) - if s.freelist != nil { + if s.freelist.ptr() != nil { goto havespan } lock(&c.lock) @@ -90,7 +90,7 @@ havespan: if n == 0 { gothrow("empty span") } - if s.freelist == nil { + if s.freelist.ptr() == nil { gothrow("freelist empty") } s.incache = true @@ -122,14 +122,14 @@ func mCentral_UncacheSpan(c *mcentral, s *mspan) { // the latest generation. // If preserve=true, don't return the span to heap nor relink in MCentral lists; // caller takes care of it. -func mCentral_FreeSpan(c *mcentral, s *mspan, n int32, start *mlink, end *mlink, preserve bool) bool { +func mCentral_FreeSpan(c *mcentral, s *mspan, n int32, start gclinkptr, end gclinkptr, preserve bool) bool { if s.incache { gothrow("freespan into cached span") } // Add the objects back to s's free list. - wasempty := s.freelist == nil - end.next = s.freelist + wasempty := s.freelist.ptr() == nil + end.ptr().next = s.freelist s.freelist = start s.ref -= uint16(n) @@ -165,7 +165,7 @@ func mCentral_FreeSpan(c *mcentral, s *mspan, n int32, start *mlink, end *mlink, // s is completely freed, return it to the heap. mSpanList_Remove(s) s.needzero = 1 - s.freelist = nil + s.freelist = 0 unlock(&c.lock) unmarkspan(uintptr(s.start)<<_PageShift, s.npages<<_PageShift) mHeap_Free(&mheap_, s, 0) @@ -183,17 +183,21 @@ func mCentral_Grow(c *mcentral) *mspan { return nil } - // Carve span into sequence of blocks. - tailp := &s.freelist p := uintptr(s.start << _PageShift) s.limit = p + size*n - for i := uintptr(0); i < n; i++ { - v := (*mlink)(unsafe.Pointer(p)) - *tailp = v - tailp = &v.next + head := gclinkptr(p) + tail := gclinkptr(p) + // i==0 iteration already done + for i := uintptr(1); i < n; i++ { p += size + tail.ptr().next = gclinkptr(p) + tail = gclinkptr(p) } - *tailp = nil + if s.freelist.ptr() != nil { + gothrow("freelist not empty") + } + tail.ptr().next = 0 + s.freelist = head markspan(unsafe.Pointer(uintptr(s.start)<<_PageShift), size, n, size*n < s.npages<<_PageShift) return s } diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go index f44d7ddbc..a13de0488 100644 --- a/src/runtime/mgc.go +++ b/src/runtime/mgc.go @@ -7,22 +7,72 @@ // Garbage collector (GC). // -// GC is: -// - mark&sweep -// - mostly precise (with the exception of some C-allocated objects, assembly frames/arguments, etc) -// - parallel (up to MaxGcproc threads) -// - partially concurrent (mark is stop-the-world, while sweep is concurrent) -// - non-moving/non-compacting -// - full (non-partial) +// The GC runs concurrently with mutator threads, is type accurate (aka precise), allows multiple GC +// thread to run in parallel. It is a concurrent mark and sweep that uses a write barrier. It is +// non-generational and non-compacting. Allocation is done using size segregated per P allocation +// areas to minimize fragmentation while eliminating locks in the common case. // -// GC rate. -// Next GC is after we've allocated an extra amount of memory proportional to -// the amount already in use. The proportion is controlled by GOGC environment variable -// (100 by default). If GOGC=100 and we're using 4M, we'll GC again when we get to 8M -// (this mark is tracked in next_gc variable). This keeps the GC cost in linear -// proportion to the allocation cost. Adjusting GOGC just changes the linear constant -// (and also the amount of extra memory used). +// The algorithm decomposes into several steps. +// This is a high level description of the algorithm being used. For an overview of GC a good +// place to start is Richard Jones' gchandbook.org. +// +// The algorithm's intellectual heritage includes Dijkstra's on-the-fly algorithm, see +// Edsger W. Dijkstra, Leslie Lamport, A. J. Martin, C. S. Scholten, and E. F. M. Steffens. 1978. +// On-the-fly garbage collection: an exercise in cooperation. Commun. ACM 21, 11 (November 1978), 966-975. +// For journal quality proofs that these steps are complete, correct, and terminate see +// Hudson, R., and Moss, J.E.B. Copying Garbage Collection without stopping the world. +// Concurrency and Computation: Practice and Experience 15(3-5), 2003. // +// 0. Set phase = GCscan from GCoff. +// 1. Wait for all P's to acknowledge phase change. +// At this point all goroutines have passed through a GC safepoint and +// know we are in the GCscan phase. +// 2. GC scans all goroutine stacks, mark and enqueues all encountered pointers +// (marking avoids most duplicate enqueuing but races may produce duplication which is benign). +// Preempted goroutines are scanned before P schedules next goroutine. +// 3. Set phase = GCmark. +// 4. Wait for all P's to acknowledge phase change. +// 5. Now write barrier marks and enqueues black, grey, or white to white pointers. +// Malloc still allocates white (non-marked) objects. +// 6. Meanwhile GC transitively walks the heap marking reachable objects. +// 7. When GC finishes marking heap, it preempts P's one-by-one and +// retakes partial wbufs (filled by write barrier or during a stack scan of the goroutine +// currently scheduled on the P). +// 8. Once the GC has exhausted all available marking work it sets phase = marktermination. +// 9. Wait for all P's to acknowledge phase change. +// 10. Malloc now allocates black objects, so number of unmarked reachable objects +// monotonically decreases. +// 11. GC preempts P's one-by-one taking partial wbufs and marks all unmarked yet reachable objects. +// 12. When GC completes a full cycle over P's and discovers no new grey +// objects, (which means all reachable objects are marked) set phase = GCsweep. +// 13. Wait for all P's to acknowledge phase change. +// 14. Now malloc allocates white (but sweeps spans before use). +// Write barrier becomes nop. +// 15. GC does background sweeping, see description below. +// 16. When sweeping is complete set phase to GCoff. +// 17. When sufficient allocation has taken place replay the sequence starting at 0 above, +// see discussion of GC rate below. + +// Changing phases. +// Phases are changed by setting the gcphase to the next phase and possibly calling ackgcphase. +// All phase action must be benign in the presence of a change. +// Starting with GCoff +// GCoff to GCscan +// GSscan scans stacks and globals greying them and never marks an object black. +// Once all the P's are aware of the new phase they will scan gs on preemption. +// This means that the scanning of preempted gs can't start until all the Ps +// have acknowledged. +// GCscan to GCmark +// GCMark turns on the write barrier which also only greys objects. No scanning +// of objects (making them black) can happen until all the Ps have acknowledged +// the phase change. +// GCmark to GCmarktermination +// The only change here is that we start allocating black so the Ps must acknowledge +// the change before we begin the termination algorithm +// GCmarktermination to GSsweep +// Object currently on the freelist must be marked black for this to work. +// Are things on the free lists black or white? How does the sweep phase work? + // Concurrent sweep. // The sweep phase proceeds concurrently with normal program execution. // The heap is swept span-by-span both lazily (when a goroutine needs another span) @@ -53,6 +103,14 @@ // The finalizer goroutine is kicked off only when all spans are swept. // When the next GC starts, it sweeps all not-yet-swept spans (if any). +// GC rate. +// Next GC is after we've allocated an extra amount of memory proportional to +// the amount already in use. The proportion is controlled by GOGC environment variable +// (100 by default). If GOGC=100 and we're using 4M, we'll GC again when we get to 8M +// (this mark is tracked in next_gc variable). This keeps the GC cost in linear +// proportion to the allocation cost. Adjusting GOGC just changes the linear constant +// (and also the amount of extra memory used). + package runtime import "unsafe" @@ -75,7 +133,7 @@ const ( // ptrmask for an allocation containing a single pointer. var oneptr = [...]uint8{bitsPointer} -// Initialized from $GOGC. GOGC=off means no gc. +// Initialized from $GOGC. GOGC=off means no GC. var gcpercent int32 // Holding worldsema grants an M the right to try to stop the world. @@ -93,6 +151,17 @@ var gcpercent int32 // var worldsema uint32 = 1 +// It is a bug if bits does not have bitBoundary set but +// there are still some cases where this happens related +// to stack spans. +type markbits struct { + bitp *byte // pointer to the byte holding xbits + shift uintptr // bits xbits needs to be shifted to get bits + xbits byte // byte holding all the bits from *bitp + bits byte // mark and boundary bits relevant to corresponding slot. + tbits byte // pointer||scalar bits relevant to corresponding slot. +} + type workbuf struct { node lfnode // must be first nobj uintptr @@ -121,6 +190,7 @@ var nbadblock int32 type workdata struct { full uint64 // lock-free list of full blocks empty uint64 // lock-free list of empty blocks + partial uint64 // lock-free list of partially filled blocks pad0 [_CacheLineSize]uint8 // prevents false-sharing between full/empty and nproc/nwait nproc uint32 tstart int64 @@ -143,293 +213,405 @@ func have_cgo_allocate() bool { return &weak_cgo_allocate != nil } -// scanblock scans a block of n bytes starting at pointer b for references -// to other objects, scanning any it finds recursively until there are no -// unscanned objects left. Instead of using an explicit recursion, it keeps -// a work list in the Workbuf* structures and loops in the main function -// body. Keeping an explicit work list is easier on the stack allocator and -// more efficient. -func scanblock(b, n uintptr, ptrmask *uint8) { - // Cache memory arena parameters in local vars. - arena_start := mheap_.arena_start - arena_used := mheap_.arena_used - - wbuf := getempty(nil) - nobj := wbuf.nobj - wp := &wbuf.obj[nobj] - keepworking := b == 0 +// To help debug the concurrent GC we remark with the world +// stopped ensuring that any object encountered has their normal +// mark bit set. To do this we use an orthogonal bit +// pattern to indicate the object is marked. The following pattern +// uses the upper two bits in the object's bounday nibble. +// 01: scalar not marked +// 10: pointer not marked +// 11: pointer marked +// 00: scalar marked +// Xoring with 01 will flip the pattern from marked to unmarked and vica versa. +// The higher bit is 1 for pointers and 0 for scalars, whether the object +// is marked or not. +// The first nibble no longer holds the bitsDead pattern indicating that the +// there are no more pointers in the object. This information is held +// in the second nibble. + +// When marking an object if the bool checkmark is true one uses the above +// encoding, otherwise one uses the bitMarked bit in the lower two bits +// of the nibble. +var ( + checkmark = false + gccheckmarkenable = true +) - var ptrbitp unsafe.Pointer +// Is address b in the known heap. If it doesn't have a valid gcmap +// returns false. For example pointers into stacks will return false. +func inheap(b uintptr) bool { + if b == 0 || b < mheap_.arena_start || b >= mheap_.arena_used { + return false + } + // Not a beginning of a block, consult span table to find the block beginning. + k := b >> _PageShift + x := k + x -= mheap_.arena_start >> _PageShift + s := h_spans[x] + if s == nil || pageID(k) < s.start || b >= s.limit || s.state != mSpanInUse { + return false + } + return true +} - // ptrmask can have 2 possible values: - // 1. nil - obtain pointer mask from GC bitmap. - // 2. pointer to a compact mask (for stacks and data). - goto_scanobj := b != 0 +// Given an address in the heap return the relevant byte from the gcmap. This routine +// can be used on addresses to the start of an object or to the interior of the an object. +func slottombits(obj uintptr, mbits *markbits) { + off := (obj&^(ptrSize-1) - mheap_.arena_start) / ptrSize + mbits.bitp = (*byte)(unsafe.Pointer(mheap_.arena_start - off/wordsPerBitmapByte - 1)) + mbits.shift = off % wordsPerBitmapByte * gcBits + mbits.xbits = *mbits.bitp + mbits.bits = (mbits.xbits >> mbits.shift) & bitMask + mbits.tbits = ((mbits.xbits >> mbits.shift) & bitPtrMask) >> 2 +} +// b is a pointer into the heap. +// Find the start of the object refered to by b. +// Set mbits to the associated bits from the bit map. +// If b is not a valid heap object return nil and +// undefined values in mbits. +func objectstart(b uintptr, mbits *markbits) uintptr { + obj := b &^ (ptrSize - 1) for { - if goto_scanobj { - goto_scanobj = false - } else { - if nobj == 0 { - // Out of work in workbuf. - if !keepworking { - putempty(wbuf) - return - } + slottombits(obj, mbits) + if mbits.bits&bitBoundary == bitBoundary { + break + } - // Refill workbuf from global queue. - wbuf = getfull(wbuf) - if wbuf == nil { - return - } - nobj = wbuf.nobj - if nobj < uintptr(len(wbuf.obj)) { - wp = &wbuf.obj[nobj] - } else { - wp = nil - } + // Not a beginning of a block, consult span table to find the block beginning. + k := b >> _PageShift + x := k + x -= mheap_.arena_start >> _PageShift + s := h_spans[x] + if s == nil || pageID(k) < s.start || b >= s.limit || s.state != mSpanInUse { + if s != nil && s.state == _MSpanStack { + return 0 // This is legit. } - // If another proc wants a pointer, give it some. - if work.nwait > 0 && nobj > 4 && work.full == 0 { - wbuf.nobj = nobj - wbuf = handoff(wbuf) - nobj = wbuf.nobj - if nobj < uintptr(len(wbuf.obj)) { - wp = &wbuf.obj[nobj] + // The following ensures that we are rigorous about what data + // structures hold valid pointers + if false { + // Still happens sometimes. We don't know why. + printlock() + print("runtime:objectstart Span weird: obj=", hex(obj), " k=", hex(k)) + if s == nil { + print(" s=nil\n") } else { - wp = nil + print(" s.start=", hex(s.start<<_PageShift), " s.limit=", hex(s.limit), " s.state=", s.state, "\n") } + printunlock() + gothrow("objectstart: bad pointer in unexpected span") } - - nobj-- - wp = &wbuf.obj[nobj] - b = *wp - n = arena_used - uintptr(b) - ptrmask = nil // use GC bitmap for pointer info + return 0 } - if _DebugGCPtrs { - print("scanblock ", b, " +", hex(n), " ", ptrmask, "\n") + p := uintptr(s.start) << _PageShift + if s.sizeclass != 0 { + size := s.elemsize + idx := (obj - p) / size + p = p + idx*size } - - // Find bits of the beginning of the object. - if ptrmask == nil { - off := (uintptr(b) - arena_start) / ptrSize - ptrbitp = unsafe.Pointer(arena_start - off/wordsPerBitmapByte - 1) + if p == obj { + print("runtime: failed to find block beginning for ", hex(p), " s=", hex(s.start*_PageSize), " s.limit=", hex(s.limit), "\n") + gothrow("failed to find block beginning") } + obj = p + } - var i uintptr - for i = 0; i < n; i += ptrSize { - // Find bits for this word. - var bits uintptr - if ptrmask == nil { - // Check if we have reached end of span. - if (uintptr(b)+i)%_PageSize == 0 && - h_spans[(uintptr(b)-arena_start)>>_PageShift] != h_spans[(uintptr(b)+i-arena_start)>>_PageShift] { - break - } + // if size(obj.firstfield) < PtrSize, the &obj.secondfield could map to the boundary bit + // Clear any low bits to get to the start of the object. + // greyobject depends on this. + return obj +} - // Consult GC bitmap. - bits = uintptr(*(*byte)(ptrbitp)) +// Slow for now as we serialize this, since this is on a debug path +// speed is not critical at this point. +var andlock mutex - if wordsPerBitmapByte != 2 { - gothrow("alg doesn't work for wordsPerBitmapByte != 2") - } - j := (uintptr(b) + i) / ptrSize & 1 - ptrbitp = add(ptrbitp, -j) - bits >>= gcBits * j +func atomicand8(src *byte, val byte) { + lock(&andlock) + *src &= val + unlock(&andlock) +} - if bits&bitBoundary != 0 && i != 0 { - break // reached beginning of the next object - } - bits = (bits >> 2) & bitsMask - if bits == bitsDead { - break // reached no-scan part of the object - } - } else { - // dense mask (stack or data) - bits = (uintptr(*(*byte)(add(unsafe.Pointer(ptrmask), (i/ptrSize)/4))) >> (((i / ptrSize) % 4) * bitsPerPointer)) & bitsMask - } +// Mark using the checkmark scheme. +func docheckmark(mbits *markbits) { + // xor 01 moves 01(scalar unmarked) to 00(scalar marked) + // and 10(pointer unmarked) to 11(pointer marked) + if mbits.tbits == _BitsScalar { + atomicand8(mbits.bitp, ^byte(_BitsCheckMarkXor<<mbits.shift<<2)) + } else if mbits.tbits == _BitsPointer { + atomicor8(mbits.bitp, byte(_BitsCheckMarkXor<<mbits.shift<<2)) + } - if bits <= _BitsScalar { // BitsScalar || BitsDead - continue - } + // reload bits for ischeckmarked + mbits.xbits = *mbits.bitp + mbits.bits = (mbits.xbits >> mbits.shift) & bitMask + mbits.tbits = ((mbits.xbits >> mbits.shift) & bitPtrMask) >> 2 +} - if bits != _BitsPointer { - gothrow("unexpected garbage collection bits") - } +// In the default scheme does mbits refer to a marked object. +func ismarked(mbits *markbits) bool { + if mbits.bits&bitBoundary != bitBoundary { + gothrow("ismarked: bits should have boundary bit set") + } + return mbits.bits&bitMarked == bitMarked +} - obj := *(*uintptr)(unsafe.Pointer(b + i)) - obj0 := obj +// In the checkmark scheme does mbits refer to a marked object. +func ischeckmarked(mbits *markbits) bool { + if mbits.bits&bitBoundary != bitBoundary { + gothrow("ischeckmarked: bits should have boundary bit set") + } + return mbits.tbits == _BitsScalarMarked || mbits.tbits == _BitsPointerMarked +} - markobj: - var s *mspan - var off, bitp, shift, xbits uintptr +// When in GCmarkterminate phase we allocate black. +func gcmarknewobject_m(obj uintptr) { + if gcphase != _GCmarktermination { + gothrow("marking new object while not in mark termination phase") + } + if checkmark { // The world should be stopped so this should not happen. + gothrow("gcmarknewobject called while doing checkmark") + } - // At this point we have extracted the next potential pointer. - // Check if it points into heap. - if obj == 0 { - continue - } - if obj < arena_start || arena_used <= obj { - if uintptr(obj) < _PhysPageSize && invalidptr != 0 { - s = nil - goto badobj - } - continue - } + var mbits markbits + slottombits(obj, &mbits) + if mbits.bits&bitMarked != 0 { + return + } - // Mark the object. - obj &^= ptrSize - 1 - off = (obj - arena_start) / ptrSize - bitp = arena_start - off/wordsPerBitmapByte - 1 - shift = (off % wordsPerBitmapByte) * gcBits - xbits = uintptr(*(*byte)(unsafe.Pointer(bitp))) - bits = (xbits >> shift) & bitMask - if (bits & bitBoundary) == 0 { - // Not a beginning of a block, consult span table to find the block beginning. - k := pageID(obj >> _PageShift) - x := k - x -= pageID(arena_start >> _PageShift) - s = h_spans[x] - if s == nil || k < s.start || s.limit <= obj || s.state != mSpanInUse { - // Stack pointers lie within the arena bounds but are not part of the GC heap. - // Ignore them. - if s != nil && s.state == _MSpanStack { - continue - } - goto badobj - } - p := uintptr(s.start) << _PageShift - if s.sizeclass != 0 { - size := s.elemsize - idx := (obj - p) / size - p = p + idx*size - } - if p == obj { - print("runtime: failed to find block beginning for ", hex(p), " s=", hex(s.start*_PageSize), " s.limit=", hex(s.limit), "\n") - gothrow("failed to find block beginning") + // Each byte of GC bitmap holds info for two words. + // If the current object is larger than two words, or if the object is one word + // but the object it shares the byte with is already marked, + // then all the possible concurrent updates are trying to set the same bit, + // so we can use a non-atomic update. + if mbits.xbits&(bitMask|(bitMask<<gcBits)) != bitBoundary|bitBoundary<<gcBits || work.nproc == 1 { + *mbits.bitp = mbits.xbits | bitMarked<<mbits.shift + } else { + atomicor8(mbits.bitp, bitMarked<<mbits.shift) + } +} + +// obj is the start of an object with mark mbits. +// If it isn't already marked, mark it and enqueue into workbuf. +// Return possibly new workbuf to use. +func greyobject(obj uintptr, mbits *markbits, wbuf *workbuf) *workbuf { + // obj should be start of allocation, and so must be at least pointer-aligned. + if obj&(ptrSize-1) != 0 { + gothrow("greyobject: obj not pointer-aligned") + } + + if checkmark { + if !ismarked(mbits) { + print("runtime:greyobject: checkmarks finds unexpected unmarked object obj=", hex(obj), ", mbits->bits=", hex(mbits.bits), " *mbits->bitp=", hex(*mbits.bitp), "\n") + + k := obj >> _PageShift + x := k + x -= mheap_.arena_start >> _PageShift + s := h_spans[x] + printlock() + print("runtime:greyobject Span: obj=", hex(obj), " k=", hex(k)) + if s == nil { + print(" s=nil\n") + } else { + print(" s.start=", hex(s.start*_PageSize), " s.limit=", hex(s.limit), " s.sizeclass=", s.sizeclass, " s.elemsize=", s.elemsize, "\n") + // NOTE(rsc): This code is using s.sizeclass as an approximation of the + // number of pointer-sized words in an object. Perhaps not what was intended. + for i := 0; i < int(s.sizeclass); i++ { + print(" *(obj+", i*ptrSize, ") = ", hex(*(*uintptr)(unsafe.Pointer(obj + uintptr(i)*ptrSize))), "\n") } - obj = p - goto markobj } + gothrow("checkmark found unmarked object") + } + if ischeckmarked(mbits) { + return wbuf + } + docheckmark(mbits) + if !ischeckmarked(mbits) { + print("mbits xbits=", hex(mbits.xbits), " bits=", hex(mbits.bits), " tbits=", hex(mbits.tbits), " shift=", mbits.shift, "\n") + gothrow("docheckmark and ischeckmarked disagree") + } + } else { + // If marked we have nothing to do. + if mbits.bits&bitMarked != 0 { + return wbuf + } - if _DebugGCPtrs { - print("scan *", hex(b+i), " = ", hex(obj0), " => base ", hex(obj), "\n") - } + // Each byte of GC bitmap holds info for two words. + // If the current object is larger than two words, or if the object is one word + // but the object it shares the byte with is already marked, + // then all the possible concurrent updates are trying to set the same bit, + // so we can use a non-atomic update. + if mbits.xbits&(bitMask|bitMask<<gcBits) != bitBoundary|bitBoundary<<gcBits || work.nproc == 1 { + *mbits.bitp = mbits.xbits | bitMarked<<mbits.shift + } else { + atomicor8(mbits.bitp, bitMarked<<mbits.shift) + } + } - if nbadblock > 0 && obj == badblock[nbadblock-1] { - // Running garbage collection again because - // we want to find the path from a root to a bad pointer. - // Found possible next step; extend or finish path. - for j := int32(0); j < nbadblock; j++ { - if badblock[j] == b { - goto AlreadyBad - } - } - print("runtime: found *(", hex(b), "+", hex(i), ") = ", hex(obj0), "+", hex(obj-obj0), "\n") - if ptrmask != nil { - gothrow("bad pointer") - } - if nbadblock >= int32(len(badblock)) { - gothrow("badblock trace too long") - } - badblock[nbadblock] = uintptr(b) - nbadblock++ - AlreadyBad: + if !checkmark && (mbits.xbits>>(mbits.shift+2))&_BitsMask == _BitsDead { + return wbuf // noscan object + } + + // Queue the obj for scanning. The PREFETCH(obj) logic has been removed but + // seems like a nice optimization that can be added back in. + // There needs to be time between the PREFETCH and the use. + // Previously we put the obj in an 8 element buffer that is drained at a rate + // to give the PREFETCH time to do its work. + // Use of PREFETCHNTA might be more appropriate than PREFETCH + + // If workbuf is full, obtain an empty one. + if wbuf.nobj >= uintptr(len(wbuf.obj)) { + wbuf = getempty(wbuf) + } + + wbuf.obj[wbuf.nobj] = obj + wbuf.nobj++ + return wbuf +} + +// Scan the object b of size n, adding pointers to wbuf. +// Return possibly new wbuf to use. +// If ptrmask != nil, it specifies where pointers are in b. +// If ptrmask == nil, the GC bitmap should be consulted. +// In this case, n may be an overestimate of the size; the GC bitmap +// must also be used to make sure the scan stops at the end of b. +func scanobject(b, n uintptr, ptrmask *uint8, wbuf *workbuf) *workbuf { + arena_start := mheap_.arena_start + arena_used := mheap_.arena_used + + // Find bits of the beginning of the object. + var ptrbitp unsafe.Pointer + var mbits markbits + if ptrmask == nil { + b = objectstart(b, &mbits) + if b == 0 { + return wbuf + } + ptrbitp = unsafe.Pointer(mbits.bitp) + } + for i := uintptr(0); i < n; i += ptrSize { + // Find bits for this word. + var bits uintptr + if ptrmask != nil { + // dense mask (stack or data) + bits = (uintptr(*(*byte)(add(unsafe.Pointer(ptrmask), (i/ptrSize)/4))) >> (((i / ptrSize) % 4) * bitsPerPointer)) & bitsMask + } else { + // Check if we have reached end of span. + // n is an overestimate of the size of the object. + if (b+i)%_PageSize == 0 && h_spans[(b-arena_start)>>_PageShift] != h_spans[(b+i-arena_start)>>_PageShift] { + break } - // Now we have bits, bitp, and shift correct for - // obj pointing at the base of the object. - // Only care about not marked objects. - if bits&bitMarked != 0 { - continue + // Consult GC bitmap. + bits = uintptr(*(*byte)(ptrbitp)) + if wordsPerBitmapByte != 2 { + gothrow("alg doesn't work for wordsPerBitmapByte != 2") } + j := (uintptr(b) + i) / ptrSize & 1 // j indicates upper nibble or lower nibble + bits >>= gcBits * j + if i == 0 { + bits &^= bitBoundary + } + ptrbitp = add(ptrbitp, -j) - // If obj size is greater than 8, then each byte of GC bitmap - // contains info for at most one object. In such case we use - // non-atomic byte store to mark the object. This can lead - // to double enqueue of the object for scanning, but scanning - // is an idempotent operation, so it is OK. This cannot lead - // to bitmap corruption because the single marked bit is the - // only thing that can change in the byte. - // For 8-byte objects we use non-atomic store, if the other - // quadruple is already marked. Otherwise we resort to CAS - // loop for marking. - if xbits&(bitMask|bitMask<<gcBits) != bitBoundary|bitBoundary<<gcBits || work.nproc == 1 { - *(*byte)(unsafe.Pointer(bitp)) = uint8(xbits | bitMarked<<shift) - } else { - atomicor8((*byte)(unsafe.Pointer(bitp)), bitMarked<<shift) + if bits&bitBoundary != 0 && i != 0 { + break // reached beginning of the next object } + bits = (bits & bitPtrMask) >> 2 // bits refer to the type bits. - if (xbits>>(shift+2))&bitsMask == bitsDead { - continue // noscan object + if i != 0 && bits == bitsDead { // BitsDead in first nibble not valid during checkmark + break // reached no-scan part of the object } + } - // Queue the obj for scanning. - // TODO: PREFETCH here. + if bits <= _BitsScalar { // _BitsScalar, _BitsDead, _BitsScalarMarked + continue + } - // If workbuf is full, obtain an empty one. - if nobj >= uintptr(len(wbuf.obj)) { - wbuf.nobj = nobj - wbuf = getempty(wbuf) - nobj = wbuf.nobj - wp = &wbuf.obj[nobj] - } - *wp = obj - nobj++ - if nobj < uintptr(len(wbuf.obj)) { - wp = &wbuf.obj[nobj] - } else { - wp = nil - } + if bits&_BitsPointer != _BitsPointer { + print("gc checkmark=", checkmark, " b=", hex(b), " ptrmask=", ptrmask, " mbits.bitp=", mbits.bitp, " mbits.xbits=", hex(mbits.xbits), " bits=", hex(bits), "\n") + gothrow("unexpected garbage collection bits") + } + + obj := *(*uintptr)(unsafe.Pointer(b + i)) + + // At this point we have extracted the next potential pointer. + // Check if it points into heap. + if obj == 0 || obj < arena_start || obj >= arena_used { + continue + } + + // Mark the object. return some important bits. + // We we combine the following two rotines we don't have to pass mbits or obj around. + var mbits markbits + obj = objectstart(obj, &mbits) + if obj == 0 { continue + } + wbuf = greyobject(obj, &mbits, wbuf) + } + return wbuf +} - badobj: - // If cgo_allocate is linked into the binary, it can allocate - // memory as []unsafe.Pointer that may not contain actual - // pointers and must be scanned conservatively. - // In this case alone, allow the bad pointer. - if have_cgo_allocate() && ptrmask == nil { - continue +// scanblock starts by scanning b as scanobject would. +// If the gcphase is GCscan, that's all scanblock does. +// Otherwise it traverses some fraction of the pointers it found in b, recursively. +// As a special case, scanblock(nil, 0, nil) means to scan previously queued work, +// stopping only when no work is left in the system. +func scanblock(b, n uintptr, ptrmask *uint8) { + wbuf := getpartialorempty() + if b != 0 { + wbuf = scanobject(b, n, ptrmask, wbuf) + if gcphase == _GCscan { + if inheap(b) && ptrmask == nil { + // b is in heap, we are in GCscan so there should be a ptrmask. + gothrow("scanblock: In GCscan phase and inheap is true.") } + // GCscan only goes one level deep since mark wb not turned on. + putpartial(wbuf) + return + } + } + if gcphase == _GCscan { + gothrow("scanblock: In GCscan phase but no b passed in.") + } - // Anything else indicates a bug somewhere. - // If we're in the middle of chasing down a different bad pointer, - // don't confuse the trace by printing about this one. - if nbadblock > 0 { - continue - } + keepworking := b == 0 - print("runtime: garbage collector found invalid heap pointer *(", hex(b), "+", hex(i), ")=", hex(obj)) - if s == nil { - print(" s=nil\n") - } else { - print(" span=", uintptr(s.start)<<_PageShift, "-", s.limit, "-", (uintptr(s.start)+s.npages)<<_PageShift, " state=", s.state, "\n") + // ptrmask can have 2 possible values: + // 1. nil - obtain pointer mask from GC bitmap. + // 2. pointer to a compact mask (for stacks and data). + for { + if wbuf.nobj == 0 { + if !keepworking { + putempty(wbuf) + return } - if ptrmask != nil { - gothrow("invalid heap pointer") + // Refill workbuf from global queue. + wbuf = getfull(wbuf) + if wbuf == nil { // nil means out of work barrier reached + return } - // Add to badblock list, which will cause the garbage collection - // to keep repeating until it has traced the chain of pointers - // leading to obj all the way back to a root. - if nbadblock == 0 { - badblock[nbadblock] = uintptr(b) - nbadblock++ + + if wbuf.nobj <= 0 { + gothrow("runtime:scanblock getfull returns empty buffer") } } - if _DebugGCPtrs { - print("end scanblock ", hex(b), " +", hex(n), " ", ptrmask, "\n") - } - if _DebugGC > 0 && ptrmask == nil { - // For heap objects ensure that we did not overscan. - var p, n uintptr - if mlookup(b, &p, &n, nil) == 0 || b != p || i > n { - print("runtime: scanned (", hex(b), "+", hex(i), "), heap object (", hex(p), "+", hex(n), ")\n") - gothrow("scanblock: scanned invalid object") - } + + // If another proc wants a pointer, give it some. + if work.nwait > 0 && wbuf.nobj > 4 && work.full == 0 { + wbuf = handoff(wbuf) } + + // This might be a good place to add prefetch code... + // if(wbuf->nobj > 4) { + // PREFETCH(wbuf->obj[wbuf->nobj - 3]; + // } + wbuf.nobj-- + b = wbuf.obj[wbuf.nobj] + wbuf = scanobject(b, mheap_.arena_used-b, nil, wbuf) } } @@ -455,7 +637,8 @@ func markroot(desc *parfor, i uint32) { if s.state != mSpanInUse { continue } - if s.sweepgen != sg { + if !checkmark && s.sweepgen != sg { + // sweepgen was updated (+2) during non-checkmark GC pass print("sweep ", s.sweepgen, " ", sg, "\n") gothrow("gc: unswept span") } @@ -468,13 +651,17 @@ func markroot(desc *parfor, i uint32) { spf := (*specialfinalizer)(unsafe.Pointer(sp)) // A finalizer can be set for an inner byte of an object, find object beginning. p := uintptr(s.start<<_PageShift) + uintptr(spf.special.offset)/s.elemsize*s.elemsize - scanblock(p, s.elemsize, nil) + if gcphase != _GCscan { + scanblock(p, s.elemsize, nil) // scanned during mark phase + } scanblock(uintptr(unsafe.Pointer(&spf.fn)), ptrSize, &oneptr[0]) } } case _RootFlushCaches: - flushallmcaches() + if gcphase != _GCscan { // Do not flush mcaches during GCscan phase. + flushallmcaches() + } default: // the rest is scanning goroutine stacks @@ -482,21 +669,44 @@ func markroot(desc *parfor, i uint32) { gothrow("markroot: bad index") } gp := allgs[i-_RootCount] + // remember when we've first observed the G blocked // needed only to output in traceback - status := readgstatus(gp) + status := readgstatus(gp) // We are not in a scan state if (status == _Gwaiting || status == _Gsyscall) && gp.waitsince == 0 { gp.waitsince = work.tstart } - // Shrink a stack if not much of it is being used. - shrinkstack(gp) + + // Shrink a stack if not much of it is being used but not in the scan phase. + if gcphase != _GCscan { // Do not shrink during GCscan phase. + shrinkstack(gp) + } if readgstatus(gp) == _Gdead { gp.gcworkdone = true } else { gp.gcworkdone = false } restart := stopg(gp) - scanstack(gp) + + // goroutine will scan its own stack when it stops running. + // Wait until it has. + for readgstatus(gp) == _Grunning && !gp.gcworkdone { + } + + // scanstack(gp) is done as part of gcphasework + // But to make sure we finished we need to make sure that + // the stack traps have all responded so drop into + // this while loop until they respond. + for !gp.gcworkdone { + status = readgstatus(gp) + if status == _Gdead { + gp.gcworkdone = true // scan is a noop + break + } + if status == _Gwaiting || status == _Grunnable { + restart = stopg(gp) + } + } if restart { restartg(gp) } @@ -506,48 +716,83 @@ func markroot(desc *parfor, i uint32) { // Get an empty work buffer off the work.empty list, // allocating new buffers as needed. func getempty(b *workbuf) *workbuf { - _g_ := getg() if b != nil { - lfstackpush(&work.full, &b.node) + putfull(b) + b = nil } - b = nil - c := _g_.m.mcache - if c.gcworkbuf != nil { - b = (*workbuf)(c.gcworkbuf) - c.gcworkbuf = nil - } - if b == nil { + if work.empty != 0 { b = (*workbuf)(lfstackpop(&work.empty)) } + if b != nil && b.nobj != 0 { + _g_ := getg() + print("m", _g_.m.id, ": getempty: popped b=", b, " with non-zero b.nobj=", b.nobj, "\n") + gothrow("getempty: workbuffer not empty, b->nobj not 0") + } if b == nil { b = (*workbuf)(persistentalloc(unsafe.Sizeof(*b), _CacheLineSize, &memstats.gc_sys)) + b.nobj = 0 } - b.nobj = 0 return b } func putempty(b *workbuf) { - _g_ := getg() - c := _g_.m.mcache - if c.gcworkbuf == nil { - c.gcworkbuf = (unsafe.Pointer)(b) - return + if b.nobj != 0 { + gothrow("putempty: b->nobj not 0") } lfstackpush(&work.empty, &b.node) } -func gcworkbuffree(b unsafe.Pointer) { - if b != nil { - putempty((*workbuf)(b)) +func putfull(b *workbuf) { + if b.nobj <= 0 { + gothrow("putfull: b->nobj <= 0") + } + lfstackpush(&work.full, &b.node) +} + +// Get an partially empty work buffer +// if none are available get an empty one. +func getpartialorempty() *workbuf { + b := (*workbuf)(lfstackpop(&work.partial)) + if b == nil { + b = getempty(nil) + } + return b +} + +func putpartial(b *workbuf) { + if b.nobj == 0 { + lfstackpush(&work.empty, &b.node) + } else if b.nobj < uintptr(len(b.obj)) { + lfstackpush(&work.partial, &b.node) + } else if b.nobj == uintptr(len(b.obj)) { + lfstackpush(&work.full, &b.node) + } else { + print("b=", b, " b.nobj=", b.nobj, " len(b.obj)=", len(b.obj), "\n") + gothrow("putpartial: bad Workbuf b.nobj") } } -// Get a full work buffer off the work.full list, or return nil. +// Get a full work buffer off the work.full or a partially +// filled one off the work.partial list. If nothing is available +// wait until all the other gc helpers have finished and then +// return nil. +// getfull acts as a barrier for work.nproc helpers. As long as one +// gchelper is actively marking objects it +// may create a workbuffer that the other helpers can work on. +// The for loop either exits when a work buffer is found +// or when _all_ of the work.nproc GC helpers are in the loop +// looking for work and thus not capable of creating new work. +// This is in fact the termination condition for the STW mark +// phase. func getfull(b *workbuf) *workbuf { if b != nil { - lfstackpush(&work.empty, &b.node) + putempty(b) } + b = (*workbuf)(lfstackpop(&work.full)) + if b == nil { + b = (*workbuf)(lfstackpop(&work.partial)) + } if b != nil || work.nproc == 1 { return b } @@ -557,6 +802,9 @@ func getfull(b *workbuf) *workbuf { if work.full != 0 { xadd(&work.nwait, -1) b = (*workbuf)(lfstackpop(&work.full)) + if b == nil { + b = (*workbuf)(lfstackpop(&work.partial)) + } if b != nil { return b } @@ -675,14 +923,11 @@ func scanframe(frame *stkframe, unused unsafe.Pointer) bool { } func scanstack(gp *g) { - // TODO(rsc): Due to a precedence error, this was never checked in the original C version. - // If you enable the check, the gothrow happens. - /* - if readgstatus(gp)&_Gscan == 0 { - print("runtime: gp=", gp, ", goid=", gp.goid, ", gp->atomicstatus=", readgstatus(gp), "\n") - gothrow("mark - bad status") - } - */ + + if readgstatus(gp)&_Gscan == 0 { + print("runtime:scanstack: gp=", gp, ", goid=", gp.goid, ", gp->atomicstatus=", hex(readgstatus(gp)), "\n") + gothrow("scanstack - bad status") + } switch readgstatus(gp) &^ _Gscan { default: @@ -692,7 +937,7 @@ func scanstack(gp *g) { return case _Grunning: print("runtime: gp=", gp, ", goid=", gp.goid, ", gp->atomicstatus=", readgstatus(gp), "\n") - gothrow("mark - world not stopped") + gothrow("scanstack: goroutine not stopped") case _Grunnable, _Gsyscall, _Gwaiting: // ok } @@ -709,18 +954,114 @@ func scanstack(gp *g) { tracebackdefers(gp, scanframe, nil) } -// The gp has been moved to a gc safepoint. If there is gcphase specific -// work it is done here. +// If the slot is grey or black return true, if white return false. +// If the slot is not in the known heap and thus does not have a valid GC bitmap then +// it is considered grey. Globals and stacks can hold such slots. +// The slot is grey if its mark bit is set and it is enqueued to be scanned. +// The slot is black if it has already been scanned. +// It is white if it has a valid mark bit and the bit is not set. +func shaded(slot uintptr) bool { + if !inheap(slot) { // non-heap slots considered grey + return true + } + + var mbits markbits + valid := objectstart(slot, &mbits) + if valid == 0 { + return true + } + + if checkmark { + return ischeckmarked(&mbits) + } + + return mbits.bits&bitMarked != 0 +} + +// Shade the object if it isn't already. +// The object is not nil and known to be in the heap. +func shade(b uintptr) { + if !inheap(b) { + gothrow("shade: passed an address not in the heap") + } + + wbuf := getpartialorempty() + // Mark the object, return some important bits. + // If we combine the following two rotines we don't have to pass mbits or obj around. + var mbits markbits + obj := objectstart(b, &mbits) + if obj != 0 { + wbuf = greyobject(obj, &mbits, wbuf) // augments the wbuf + } + putpartial(wbuf) +} + +// This is the Dijkstra barrier coarsened to always shade the ptr (dst) object. +// The original Dijkstra barrier only shaded ptrs being placed in black slots. +// +// Shade indicates that it has seen a white pointer by adding the referent +// to wbuf as well as marking it. +// +// slot is the destination (dst) in go code +// ptr is the value that goes into the slot (src) in the go code +// +// Dijkstra pointed out that maintaining the no black to white +// pointers means that white to white pointers not need +// to be noted by the write barrier. Furthermore if either +// white object dies before it is reached by the +// GC then the object can be collected during this GC cycle +// instead of waiting for the next cycle. Unfortunately the cost of +// ensure that the object holding the slot doesn't concurrently +// change to black without the mutator noticing seems prohibitive. +// +// Consider the following example where the mutator writes into +// a slot and then loads the slot's mark bit while the GC thread +// writes to the slot's mark bit and then as part of scanning reads +// the slot. +// +// Initially both [slot] and [slotmark] are 0 (nil) +// Mutator thread GC thread +// st [slot], ptr st [slotmark], 1 +// +// ld r1, [slotmark] ld r2, [slot] +// +// This is a classic example of independent reads of independent writes, +// aka IRIW. The question is if r1==r2==0 is allowed and for most HW the +// answer is yes without inserting a memory barriers between the st and the ld. +// These barriers are expensive so we have decided that we will +// always grey the ptr object regardless of the slot's color. +func gcmarkwb_m(slot *uintptr, ptr uintptr) { + switch gcphase { + default: + gothrow("gcphasework in bad gcphase") + + case _GCoff, _GCquiesce, _GCstw, _GCsweep, _GCscan: + // ok + + case _GCmark, _GCmarktermination: + if ptr != 0 && inheap(ptr) { + shade(ptr) + } + } +} + +// The gp has been moved to a GC safepoint. GC phase specific +// work is done here. func gcphasework(gp *g) { switch gcphase { default: gothrow("gcphasework in bad gcphase") case _GCoff, _GCquiesce, _GCstw, _GCsweep: - // No work for now. + // No work. + case _GCscan: + // scan the stack, mark the objects, put pointers in work buffers + // hanging off the P where this is being run. + scanstack(gp) case _GCmark: - // Disabled until concurrent GC is implemented - // but indicate the scan has been done. - // scanstack(gp); + // No work. + case _GCmarktermination: + scanstack(gp) + // All available mark work will be emptied before returning. } gp.gcworkdone = true } @@ -797,6 +1138,7 @@ func iterate_finq(callback func(*funcval, unsafe.Pointer, uintptr, *_type, *ptrt } } +// Returns only when span s has been swept. func mSpan_EnsureSwept(s *mspan) { // Caller must disable preemption. // Otherwise when this function returns the span can become unswept again @@ -810,6 +1152,7 @@ func mSpan_EnsureSwept(s *mspan) { if atomicload(&s.sweepgen) == sg { return } + // The caller must be sure that the span is a MSpanInUse span. if cas(&s.sweepgen, sg-2, sg-1) { mSpan_Sweep(s, false) return @@ -826,6 +1169,10 @@ func mSpan_EnsureSwept(s *mspan) { // If preserve=true, don't return it to heap nor relink in MCentral lists; // caller takes care of it. func mSpan_Sweep(s *mspan, preserve bool) bool { + if checkmark { + gothrow("MSpan_Sweep: checkmark only runs in STW and after the sweep") + } + // It's critical that we enter this function with preemption disabled, // GC must not start while we are in the middle of this function. _g_ := getg() @@ -851,13 +1198,14 @@ func mSpan_Sweep(s *mspan, preserve bool) bool { } res := false nfree := 0 - var head mlink - end := &head + + var head, end gclinkptr + c := _g_.m.mcache sweepgenset := false // Mark any free objects in this span so we don't collect them. - for link := s.freelist; link != nil; link = link.next { + for link := s.freelist; link.ptr() != nil; link = link.ptr().next { off := (uintptr(unsafe.Pointer(link)) - arena_start) / ptrSize bitp := arena_start - off/wordsPerBitmapByte - 1 shift := (off % wordsPerBitmapByte) * gcBits @@ -978,8 +1326,13 @@ func mSpan_Sweep(s *mspan, preserve bool) bool { } else if size > ptrSize { *(*uintptr)(unsafe.Pointer(p + ptrSize)) = 0 } - end.next = (*mlink)(unsafe.Pointer(p)) - end = end.next + if head.ptr() == nil { + head = gclinkptr(p) + } else { + end.ptr().next = gclinkptr(p) + } + end = gclinkptr(p) + end.ptr().next = gclinkptr(0x0bade5) nfree++ } } @@ -1002,7 +1355,7 @@ func mSpan_Sweep(s *mspan, preserve bool) bool { c.local_nsmallfree[cl] += uintptr(nfree) c.local_cachealloc -= intptr(uintptr(nfree) * size) xadd64(&memstats.next_gc, -int64(nfree)*int64(size)*int64(gcpercent+100)/100) - res = mCentral_FreeSpan(&mheap_.central[cl].mcentral, s, int32(nfree), head.next, end, preserve) + res = mCentral_FreeSpan(&mheap_.central[cl].mcentral, s, int32(nfree), head, end, preserve) // MCentral_FreeSpan updates sweepgen } return res @@ -1073,11 +1426,11 @@ func gchelper() { _g_.m.traceback = 2 gchelperstart() - // parallel mark for over gc roots + // parallel mark for over GC roots parfordo(work.markfor) - - // help other threads scan secondary blocks - scanblock(0, 0, nil) + if gcphase != _GCscan { + scanblock(0, 0, nil) // blocks in getfull + } nproc := work.nproc // work.nproc can change right after we increment work.ndone if xadd(&work.ndone, +1) == nproc-1 { @@ -1204,6 +1557,7 @@ func gcinit() { gcbssmask = unrollglobgcprog((*byte)(unsafe.Pointer(&gcbss)), uintptr(unsafe.Pointer(&ebss))-uintptr(unsafe.Pointer(&bss))) } +// Called from malloc.go using onM, stopping and starting the world handled in caller. func gc_m(start_time int64, eagersweep bool) { _g_ := getg() gp := _g_.m.curg @@ -1211,18 +1565,266 @@ func gc_m(start_time int64, eagersweep bool) { gp.waitreason = "garbage collection" gc(start_time, eagersweep) + casgstatus(gp, _Gwaiting, _Grunning) +} + +// Similar to clearcheckmarkbits but works on a single span. +// It preforms two tasks. +// 1. When used before the checkmark phase it converts BitsDead (00) to bitsScalar (01) +// for nibbles with the BoundaryBit set. +// 2. When used after the checkmark phase it converts BitsPointerMark (11) to BitsPointer 10 and +// BitsScalarMark (00) to BitsScalar (01), thus clearing the checkmark mark encoding. +// For the second case it is possible to restore the BitsDead pattern but since +// clearmark is a debug tool performance has a lower priority than simplicity. +// The span is MSpanInUse and the world is stopped. +func clearcheckmarkbitsspan(s *mspan) { + if s.state != _MSpanInUse { + print("runtime:clearcheckmarkbitsspan: state=", s.state, "\n") + gothrow("clearcheckmarkbitsspan: bad span state") + } - if nbadblock > 0 { - // Work out path from root to bad block. - for { - gc(start_time, eagersweep) - if nbadblock >= int32(len(badblock)) { - gothrow("cannot find path to bad pointer") + arena_start := mheap_.arena_start + cl := s.sizeclass + size := s.elemsize + var n int32 + if cl == 0 { + n = 1 + } else { + // Chunk full of small blocks + npages := class_to_allocnpages[cl] + n = npages << _PageShift / int32(size) + } + + // MSpan_Sweep has similar code but instead of overloading and + // complicating that routine we do a simpler walk here. + // Sweep through n objects of given size starting at p. + // This thread owns the span now, so it can manipulate + // the block bitmap without atomic operations. + p := uintptr(s.start) << _PageShift + + // Find bits for the beginning of the span. + off := (p - arena_start) / ptrSize + bitp := (*byte)(unsafe.Pointer(arena_start - off/wordsPerBitmapByte - 1)) + step := size / (ptrSize * wordsPerBitmapByte) + + // The type bit values are: + // 00 - BitsDead, for us BitsScalarMarked + // 01 - BitsScalar + // 10 - BitsPointer + // 11 - unused, for us BitsPointerMarked + // + // When called to prepare for the checkmark phase (checkmark==1), + // we change BitsDead to BitsScalar, so that there are no BitsScalarMarked + // type bits anywhere. + // + // The checkmark phase marks by changing BitsScalar to BitsScalarMarked + // and BitsPointer to BitsPointerMarked. + // + // When called to clean up after the checkmark phase (checkmark==0), + // we unmark by changing BitsScalarMarked back to BitsScalar and + // BitsPointerMarked back to BitsPointer. + // + // There are two problems with the scheme as just described. + // First, the setup rewrites BitsDead to BitsScalar, but the type bits + // following a BitsDead are uninitialized and must not be used. + // Second, objects that are free are expected to have their type + // bits zeroed (BitsDead), so in the cleanup we need to restore + // any BitsDeads that were there originally. + // + // In a one-word object (8-byte allocation on 64-bit system), + // there is no difference between BitsScalar and BitsDead, because + // neither is a pointer and there are no more words in the object, + // so using BitsScalar during the checkmark is safe and mapping + // both back to BitsDead during cleanup is also safe. + // + // In a larger object, we need to be more careful. During setup, + // if the type of the first word is BitsDead, we change it to BitsScalar + // (as we must) but also initialize the type of the second + // word to BitsDead, so that a scan during the checkmark phase + // will still stop before seeing the uninitialized type bits in the + // rest of the object. The sequence 'BitsScalar BitsDead' never + // happens in real type bitmaps - BitsDead is always as early + // as possible, so immediately after the last BitsPointer. + // During cleanup, if we see a BitsScalar, we can check to see if it + // is followed by BitsDead. If so, it was originally BitsDead and + // we can change it back. + + if step == 0 { + // updating top and bottom nibbles, all boundaries + for i := int32(0); i < n/2; i, bitp = i+1, addb(bitp, uintptrMask&-1) { + if *bitp&bitBoundary == 0 { + gothrow("missing bitBoundary") + } + b := (*bitp & bitPtrMask) >> 2 + if !checkmark && (b == _BitsScalar || b == _BitsScalarMarked) { + *bitp &^= 0x0c // convert to _BitsDead + } else if b == _BitsScalarMarked || b == _BitsPointerMarked { + *bitp &^= _BitsCheckMarkXor << 2 + } + + if (*bitp>>gcBits)&bitBoundary == 0 { + gothrow("missing bitBoundary") + } + b = ((*bitp >> gcBits) & bitPtrMask) >> 2 + if !checkmark && (b == _BitsScalar || b == _BitsScalarMarked) { + *bitp &^= 0xc0 // convert to _BitsDead + } else if b == _BitsScalarMarked || b == _BitsPointerMarked { + *bitp &^= _BitsCheckMarkXor << (2 + gcBits) + } + } + } else { + // updating bottom nibble for first word of each object + for i := int32(0); i < n; i, bitp = i+1, addb(bitp, -step) { + if *bitp&bitBoundary == 0 { + gothrow("missing bitBoundary") + } + b := (*bitp & bitPtrMask) >> 2 + + if checkmark && b == _BitsDead { + // move BitsDead into second word. + // set bits to BitsScalar in preparation for checkmark phase. + *bitp &^= 0xc0 + *bitp |= _BitsScalar << 2 + } else if !checkmark && (b == _BitsScalar || b == _BitsScalarMarked) && *bitp&0xc0 == 0 { + // Cleaning up after checkmark phase. + // First word is scalar or dead (we forgot) + // and second word is dead. + // First word might as well be dead too. + *bitp &^= 0x0c + } else if b == _BitsScalarMarked || b == _BitsPointerMarked { + *bitp ^= _BitsCheckMarkXor << 2 } } } +} - casgstatus(gp, _Gwaiting, _Grunning) +// clearcheckmarkbits preforms two tasks. +// 1. When used before the checkmark phase it converts BitsDead (00) to bitsScalar (01) +// for nibbles with the BoundaryBit set. +// 2. When used after the checkmark phase it converts BitsPointerMark (11) to BitsPointer 10 and +// BitsScalarMark (00) to BitsScalar (01), thus clearing the checkmark mark encoding. +// This is a bit expensive but preserves the BitsDead encoding during the normal marking. +// BitsDead remains valid for every nibble except the ones with BitsBoundary set. +func clearcheckmarkbits() { + for _, s := range work.spans { + if s.state == _MSpanInUse { + clearcheckmarkbitsspan(s) + } + } +} + +// Called from malloc.go using onM. +// The world is stopped. Rerun the scan and mark phases +// using the bitMarkedCheck bit instead of the +// bitMarked bit. If the marking encounters an +// bitMarked bit that is not set then we throw. +func gccheckmark_m(startTime int64, eagersweep bool) { + if !gccheckmarkenable { + return + } + + if checkmark { + gothrow("gccheckmark_m, entered with checkmark already true") + } + + checkmark = true + clearcheckmarkbits() // Converts BitsDead to BitsScalar. + gc_m(startTime, eagersweep) // turns off checkmark + // Work done, fixed up the GC bitmap to remove the checkmark bits. + clearcheckmarkbits() +} + +func gccheckmarkenable_m() { + gccheckmarkenable = true +} + +func gccheckmarkdisable_m() { + gccheckmarkenable = false +} + +func finishsweep_m() { + // The world is stopped so we should be able to complete the sweeps + // quickly. + for sweepone() != ^uintptr(0) { + sweep.npausesweep++ + } + + // There may be some other spans being swept concurrently that + // we need to wait for. If finishsweep_m is done with the world stopped + // this code is not required. + sg := mheap_.sweepgen + for _, s := range work.spans { + if s.sweepgen != sg && s.state == _MSpanInUse { + mSpan_EnsureSwept(s) + } + } +} + +// Scan all of the stacks, greying (or graying if in America) the referents +// but not blackening them since the mark write barrier isn't installed. +func gcscan_m() { + _g_ := getg() + + // Grab the g that called us and potentially allow rescheduling. + // This allows it to be scanned like other goroutines. + mastergp := _g_.m.curg + casgstatus(mastergp, _Grunning, _Gwaiting) + mastergp.waitreason = "garbage collection scan" + + // Span sweeping has been done by finishsweep_m. + // Long term we will want to make this goroutine runnable + // by placing it onto a scanenqueue state and then calling + // runtime·restartg(mastergp) to make it Grunnable. + // At the bottom we will want to return this p back to the scheduler. + oldphase := gcphase + + // Prepare flag indicating that the scan has not been completed. + lock(&allglock) + local_allglen := allglen + for i := uintptr(0); i < local_allglen; i++ { + gp := allgs[i] + gp.gcworkdone = false // set to true in gcphasework + } + unlock(&allglock) + + work.nwait = 0 + work.ndone = 0 + work.nproc = 1 // For now do not do this in parallel. + gcphase = _GCscan + // ackgcphase is not needed since we are not scanning running goroutines. + parforsetup(work.markfor, work.nproc, uint32(_RootCount+local_allglen), nil, false, markroot) + parfordo(work.markfor) + + lock(&allglock) + // Check that gc work is done. + for i := uintptr(0); i < local_allglen; i++ { + gp := allgs[i] + if !gp.gcworkdone { + gothrow("scan missed a g") + } + } + unlock(&allglock) + + gcphase = oldphase + casgstatus(mastergp, _Gwaiting, _Grunning) + // Let the g that called us continue to run. +} + +// Mark all objects that are known about. +func gcmark_m() { + scanblock(0, 0, nil) +} + +// For now this must be bracketed with a stoptheworld and a starttheworld to ensure +// all go routines see the new barrier. +func gcinstallmarkwb_m() { + gcphase = _GCmark +} + +// For now this must be bracketed with a stoptheworld and a starttheworld to ensure +// all go routines see the new barrier. +func gcinstalloffwb_m() { + gcphase = _GCoff } func gc(start_time int64, eagersweep bool) { @@ -1244,9 +1846,8 @@ func gc(start_time int64, eagersweep bool) { t1 = nanotime() } - // Sweep what is not sweeped by bgsweep. - for sweepone() != ^uintptr(0) { - sweep.npausesweep++ + if !checkmark { + finishsweep_m() // skip during checkmark debug phase. } // Cache runtime.mheap_.allspans in work.spans to avoid conflicts with @@ -1266,10 +1867,19 @@ func gc(start_time int64, eagersweep bool) { mheap_.gcspans = mheap_.allspans work.spans = h_allspans unlock(&mheap_.lock) + oldphase := gcphase work.nwait = 0 work.ndone = 0 work.nproc = uint32(gcprocs()) + gcphase = _GCmarktermination + + // World is stopped so allglen will not change. + for i := uintptr(0); i < allglen; i++ { + gp := allgs[i] + gp.gcworkdone = false // set to true in gcphasework + } + parforsetup(work.markfor, work.nproc, uint32(_RootCount+allglen), nil, false, markroot) if work.nproc > 1 { noteclear(&work.alldone) @@ -1285,6 +1895,14 @@ func gc(start_time int64, eagersweep bool) { parfordo(work.markfor) scanblock(0, 0, nil) + if work.full != 0 { + gothrow("work.full != 0") + } + if work.partial != 0 { + gothrow("work.partial != 0") + } + + gcphase = oldphase var t3 int64 if debug.gctrace > 0 { t3 = nanotime() @@ -1349,6 +1967,15 @@ func gc(start_time int64, eagersweep bool) { sysFree(unsafe.Pointer(&work.spans[0]), uintptr(len(work.spans))*unsafe.Sizeof(work.spans[0]), &memstats.other_sys) } + if gccheckmarkenable { + if !checkmark { + // first half of two-pass; don't set up sweep + unlock(&mheap_.lock) + return + } + checkmark = false // done checking marks + } + // Cache the current array for sweeping. mheap_.gcspans = mheap_.allspans mheap_.sweepgen += 2 diff --git a/src/runtime/mgc0.go b/src/runtime/mgc0.go index 38406f33a..7797894fc 100644 --- a/src/runtime/mgc0.go +++ b/src/runtime/mgc0.go @@ -97,54 +97,135 @@ func bgsweep() { } } +const ( + _PoisonGC = 0xf969696969696969 & (1<<(8*ptrSize) - 1) + _PoisonStack = 0x6868686868686868 & (1<<(8*ptrSize) - 1) +) + // NOTE: Really dst *unsafe.Pointer, src unsafe.Pointer, // but if we do that, Go inserts a write barrier on *dst = src. //go:nosplit func writebarrierptr(dst *uintptr, src uintptr) { *dst = src + writebarrierptr_nostore(dst, src) +} + +// Like writebarrierptr, but the store has already been applied. +// Do not reapply. +//go:nosplit +func writebarrierptr_nostore(dst *uintptr, src uintptr) { + if getg() == nil { // very low-level startup + return + } + + if src != 0 && (src < _PageSize || src == _PoisonGC || src == _PoisonStack) { + systemstack(func() { gothrow("bad pointer in write barrier") }) + } + + mp := acquirem() + if mp.inwb || mp.dying > 0 { + releasem(mp) + return + } + mp.inwb = true + systemstack(func() { + gcmarkwb_m(dst, src) + }) + mp.inwb = false + releasem(mp) } //go:nosplit func writebarrierstring(dst *[2]uintptr, src [2]uintptr) { - dst[0] = src[0] + writebarrierptr(&dst[0], src[0]) dst[1] = src[1] } //go:nosplit func writebarrierslice(dst *[3]uintptr, src [3]uintptr) { - dst[0] = src[0] + writebarrierptr(&dst[0], src[0]) dst[1] = src[1] dst[2] = src[2] } //go:nosplit func writebarrieriface(dst *[2]uintptr, src [2]uintptr) { - dst[0] = src[0] - dst[1] = src[1] -} - -//go:nosplit -func writebarrierfat2(dst *[2]uintptr, _ *byte, src [2]uintptr) { - dst[0] = src[0] - dst[1] = src[1] + writebarrierptr(&dst[0], src[0]) + writebarrierptr(&dst[1], src[1]) } -//go:nosplit -func writebarrierfat3(dst *[3]uintptr, _ *byte, src [3]uintptr) { - dst[0] = src[0] - dst[1] = src[1] - dst[2] = src[2] -} +//go:generate go run wbfat_gen.go -- wbfat.go +// +// The above line generates multiword write barriers for +// all the combinations of ptr+scalar up to four words. +// The implementations are written to wbfat.go. //go:nosplit -func writebarrierfat4(dst *[4]uintptr, _ *byte, src [4]uintptr) { - dst[0] = src[0] - dst[1] = src[1] - dst[2] = src[2] - dst[3] = src[3] +func writebarrierfat(typ *_type, dst, src unsafe.Pointer) { + mask := loadPtrMask(typ) + nptr := typ.size / ptrSize + for i := uintptr(0); i < nptr; i += 2 { + bits := mask[i/2] + if (bits>>2)&_BitsMask == _BitsPointer { + writebarrierptr((*uintptr)(dst), *(*uintptr)(src)) + } else { + *(*uintptr)(dst) = *(*uintptr)(src) + } + dst = add(dst, ptrSize) + src = add(src, ptrSize) + if i+1 == nptr { + break + } + bits >>= 4 + if (bits>>2)&_BitsMask == _BitsPointer { + writebarrierptr((*uintptr)(dst), *(*uintptr)(src)) + } else { + *(*uintptr)(dst) = *(*uintptr)(src) + } + dst = add(dst, ptrSize) + src = add(src, ptrSize) + } } //go:nosplit -func writebarrierfat(typ *_type, dst, src unsafe.Pointer) { - memmove(dst, src, typ.size) +func writebarriercopy(typ *_type, dst, src slice) int { + n := dst.len + if n > src.len { + n = src.len + } + if n == 0 { + return 0 + } + dstp := unsafe.Pointer(dst.array) + srcp := unsafe.Pointer(src.array) + + if uintptr(srcp) < uintptr(dstp) && uintptr(srcp)+uintptr(n)*typ.size > uintptr(dstp) { + // Overlap with src before dst. + // Copy backward, being careful not to move dstp/srcp + // out of the array they point into. + dstp = add(dstp, uintptr(n-1)*typ.size) + srcp = add(srcp, uintptr(n-1)*typ.size) + i := uint(0) + for { + writebarrierfat(typ, dstp, srcp) + if i++; i >= n { + break + } + dstp = add(dstp, -typ.size) + srcp = add(srcp, -typ.size) + } + } else { + // Copy forward, being careful not to move dstp/srcp + // out of the array they point into. + i := uint(0) + for { + writebarrierfat(typ, dstp, srcp) + if i++; i >= n { + break + } + dstp = add(dstp, typ.size) + srcp = add(srcp, typ.size) + } + } + return int(n) } diff --git a/src/runtime/mgc0.h b/src/runtime/mgc0.h index 62726b4f0..dd0c46024 100644 --- a/src/runtime/mgc0.h +++ b/src/runtime/mgc0.h @@ -12,9 +12,11 @@ enum { BitsPointer = 2, BitsMask = 3, PointersPerByte = 8/BitsPerPointer, - MaxGCMask = 64, insData = 1, insArray, insArrayEnd, insEnd, + + // 64 bytes cover objects of size 1024/512 on 64/32 bits, respectively. + MaxGCMask = 65536, // TODO(rsc): change back to 64 }; diff --git a/src/runtime/mgc1.go b/src/runtime/mgc1.go index d1aab4554..04a5207e5 100644 --- a/src/runtime/mgc1.go +++ b/src/runtime/mgc1.go @@ -50,12 +50,15 @@ const ( // If you change these, also change scanblock. // scanblock does "if(bits == BitsScalar || bits == BitsDead)" as "if(bits <= BitsScalar)". - _BitsDead = 0 - _BitsScalar = 1 - _BitsPointer = 2 + _BitsDead = 0 + _BitsScalar = 1 // 01 + _BitsPointer = 2 // 10 + _BitsCheckMarkXor = 1 // 10 + _BitsScalarMarked = _BitsScalar ^ _BitsCheckMarkXor // 00 + _BitsPointerMarked = _BitsPointer ^ _BitsCheckMarkXor // 11 // 64 bytes cover objects of size 1024/512 on 64/32 bits, respectively. - _MaxGCMask = 64 + _MaxGCMask = 65536 // TODO(rsc): change back to 64 ) // Bits in per-word bitmap. diff --git a/src/runtime/mheap.go b/src/runtime/mheap.go index fedcd69c5..30205d68d 100644 --- a/src/runtime/mheap.go +++ b/src/runtime/mheap.go @@ -196,7 +196,7 @@ func mHeap_Alloc_m(h *mheap, npage uintptr, sizeclass int32, large bool) *mspan // able to map interior pointer to containing span. atomicstore(&s.sweepgen, h.sweepgen) s.state = _MSpanInUse - s.freelist = nil + s.freelist = 0 s.ref = 0 s.sizeclass = uint8(sizeclass) if sizeclass == 0 { @@ -248,7 +248,7 @@ func mHeap_AllocStack(h *mheap, npage uintptr) *mspan { s := mHeap_AllocSpanLocked(h, npage) if s != nil { s.state = _MSpanStack - s.freelist = nil + s.freelist = 0 s.ref = 0 memstats.stacks_inuse += uint64(s.npages << _PageShift) } @@ -571,7 +571,7 @@ func mSpan_Init(span *mspan, start pageID, npages uintptr) { span.prev = nil span.start = start span.npages = npages - span.freelist = nil + span.freelist = 0 span.ref = 0 span.sizeclass = 0 span.incache = false diff --git a/src/runtime/os_linux_386.go b/src/runtime/os_linux_386.go index c4f95804a..adcd5a1c4 100644 --- a/src/runtime/os_linux_386.go +++ b/src/runtime/os_linux_386.go @@ -14,8 +14,7 @@ const ( var _vdso uint32 -//go:nosplit -func linux_setup_vdso(argc int32, argv **byte) { +func sysargs(argc int32, argv **byte) { // skip over argv, envv to get to auxv n := argc + 1 for argv_index(argv, n) != nil { diff --git a/src/runtime/print1.go b/src/runtime/print1.go index 8f8268873..3d812bd04 100644 --- a/src/runtime/print1.go +++ b/src/runtime/print1.go @@ -41,7 +41,31 @@ func snprintf(dst *byte, n int32, s *byte) { gp.writebuf = nil } -//var debuglock mutex +var debuglock mutex + +// The compiler emits calls to printlock and printunlock around +// the multiple calls that implement a single Go print or println +// statement. Some of the print helpers (printsp, for example) +// call print recursively. There is also the problem of a crash +// happening during the print routines and needing to acquire +// the print lock to print information about the crash. +// For both these reasons, let a thread acquire the printlock 'recursively'. + +func printlock() { + mp := getg().m + mp.printlock++ + if mp.printlock == 1 { + lock(&debuglock) + } +} + +func printunlock() { + mp := getg().m + mp.printlock-- + if mp.printlock == 0 { + unlock(&debuglock) + } +} // write to goroutine-local buffer if diverting output, // or else standard error. @@ -80,7 +104,7 @@ func printnl() { // Very simple printf. Only for debugging prints. // Do not add to this without checking with Rob. func vprintf(str string, arg unsafe.Pointer) { - //lock(&debuglock); + printlock() s := bytes(str) start := 0 @@ -160,7 +184,7 @@ func vprintf(str string, arg unsafe.Pointer) { gwrite(s[start:i]) } - //unlock(&debuglock); + printunlock() } func printpc(p unsafe.Pointer) { diff --git a/src/runtime/proc.go b/src/runtime/proc.go index 295190cb4..64f6a3520 100644 --- a/src/runtime/proc.go +++ b/src/runtime/proc.go @@ -181,6 +181,9 @@ func acquireSudog() *sudog { // which keeps the garbage collector from being invoked. mp := acquirem() p := new(sudog) + if p.elem != nil { + gothrow("acquireSudog: found p.elem != nil after new") + } releasem(mp) return p } diff --git a/src/runtime/proc1.go b/src/runtime/proc1.go index aeded0e77..5a898ff41 100644 --- a/src/runtime/proc1.go +++ b/src/runtime/proc1.go @@ -316,6 +316,10 @@ func casfrom_Gscanstatus(gp *g, oldval, newval uint32) { // Check that transition is valid. switch oldval { + default: + print("runtime: casfrom_Gscanstatus bad oldval gp=", gp, ", oldval=", hex(oldval), ", newval=", hex(newval), "\n") + dumpgstatus(gp) + gothrow("casfrom_Gscanstatus:top gp->status is not in scan state") case _Gscanrunnable, _Gscanwaiting, _Gscanrunning, @@ -377,12 +381,12 @@ func casgstatus(gp *g, oldval, newval uint32) { }) } // Help GC if needed. - if gp.preemptscan && !gp.gcworkdone && (oldval == _Grunning || oldval == _Gsyscall) { - gp.preemptscan = false - systemstack(func() { - gcphasework(gp) - }) - } + // if gp.preemptscan && !gp.gcworkdone && (oldval == _Grunning || oldval == _Gsyscall) { + // gp.preemptscan = false + // systemstack(func() { + // gcphasework(gp) + // }) + // } } } @@ -512,9 +516,10 @@ func stopscanstart(gp *g) { // Runs on g0 and does the actual work after putting the g back on the run queue. func mquiesce(gpmaster *g) { - activeglen := len(allgs) // enqueue the calling goroutine. restartg(gpmaster) + + activeglen := len(allgs) for i := 0; i < activeglen; i++ { gp := allgs[i] if readgstatus(gp) == _Gdead { @@ -1579,7 +1584,8 @@ func save(pc, sp uintptr) { _g_.sched.lr = 0 _g_.sched.ret = 0 _g_.sched.ctxt = nil - _g_.sched.g = _g_ + // write as uintptr to avoid write barrier, which will smash _g_.sched. + *(*uintptr)(unsafe.Pointer(&_g_.sched.g)) = uintptr(unsafe.Pointer(_g_)) } // The goroutine g is about to enter a system call. @@ -1625,7 +1631,10 @@ func reentersyscall(pc, sp uintptr) { _g_.syscallpc = pc casgstatus(_g_, _Grunning, _Gsyscall) if _g_.syscallsp < _g_.stack.lo || _g_.stack.hi < _g_.syscallsp { - systemstack(entersyscall_bad) + systemstack(func() { + print("entersyscall inconsistent ", hex(_g_.syscallsp), " [", hex(_g_.stack.lo), ",", hex(_g_.stack.hi), "]\n") + gothrow("entersyscall") + }) } if atomicload(&sched.sysmonwait) != 0 { // TODO: fast atomic @@ -1654,13 +1663,6 @@ func entersyscall(dummy int32) { reentersyscall(getcallerpc(unsafe.Pointer(&dummy)), getcallersp(unsafe.Pointer(&dummy))) } -func entersyscall_bad() { - var gp *g - gp = getg().m.curg - print("entersyscall inconsistent ", hex(gp.syscallsp), " [", hex(gp.stack.lo), ",", hex(gp.stack.hi), "]\n") - gothrow("entersyscall") -} - func entersyscall_sysmon() { lock(&sched.lock) if atomicload(&sched.sysmonwait) != 0 { @@ -1692,12 +1694,26 @@ func entersyscallblock(dummy int32) { _g_.stackguard0 = stackPreempt // see comment in entersyscall // Leave SP around for GC and traceback. - save(getcallerpc(unsafe.Pointer(&dummy)), getcallersp(unsafe.Pointer(&dummy))) + pc := getcallerpc(unsafe.Pointer(&dummy)) + sp := getcallersp(unsafe.Pointer(&dummy)) + save(pc, sp) _g_.syscallsp = _g_.sched.sp _g_.syscallpc = _g_.sched.pc + if _g_.syscallsp < _g_.stack.lo || _g_.stack.hi < _g_.syscallsp { + sp1 := sp + sp2 := _g_.sched.sp + sp3 := _g_.syscallsp + systemstack(func() { + print("entersyscallblock inconsistent ", hex(sp1), " ", hex(sp2), " ", hex(sp3), " [", hex(_g_.stack.lo), ",", hex(_g_.stack.hi), "]\n") + gothrow("entersyscallblock") + }) + } casgstatus(_g_, _Grunning, _Gsyscall) if _g_.syscallsp < _g_.stack.lo || _g_.stack.hi < _g_.syscallsp { - systemstack(entersyscall_bad) + systemstack(func() { + print("entersyscallblock inconsistent ", hex(sp), " ", hex(_g_.sched.sp), " ", hex(_g_.syscallsp), " [", hex(_g_.stack.lo), ",", hex(_g_.stack.hi), "]\n") + gothrow("entersyscallblock") + }) } systemstack(entersyscallblock_handoff) @@ -1776,6 +1792,7 @@ func exitsyscallfast() bool { // Freezetheworld sets stopwait but does not retake P's. if sched.stopwait != 0 { + _g_.m.mcache = nil _g_.m.p = nil return false } @@ -1789,6 +1806,7 @@ func exitsyscallfast() bool { } // Try to get any other idle P. + _g_.m.mcache = nil _g_.m.p = nil if sched.pidle != nil { var ok bool @@ -2363,6 +2381,8 @@ func setcpuprofilerate_m(hz int32) { } // Change number of processors. The world is stopped, sched is locked. +// gcworkbufs are not being modified by either the GC or +// the write barrier code. func procresize(new int32) { old := gomaxprocs if old < 0 || old > _MaxGomaxprocs || new <= 0 || new > _MaxGomaxprocs { diff --git a/src/runtime/rt0_linux_386.s b/src/runtime/rt0_linux_386.s index 352e594d5..47fd908e7 100644 --- a/src/runtime/rt0_linux_386.s +++ b/src/runtime/rt0_linux_386.s @@ -9,7 +9,6 @@ TEXT _rt0_386_linux(SB),NOSPLIT,$8 LEAL 12(SP), BX MOVL AX, 0(SP) MOVL BX, 4(SP) - CALL runtime·linux_setup_vdso(SB) CALL main(SB) INT $3 diff --git a/src/runtime/runtime1.go b/src/runtime/runtime1.go index 15dea01a3..9e19b68be 100644 --- a/src/runtime/runtime1.go +++ b/src/runtime/runtime1.go @@ -97,7 +97,10 @@ func testAtomic64() { z64 = 42 x64 = 0 - // TODO: PREFETCH((unsafe.Pointer)(&z64)) + prefetcht0(uintptr(unsafe.Pointer(&z64))) + prefetcht1(uintptr(unsafe.Pointer(&z64))) + prefetcht2(uintptr(unsafe.Pointer(&z64))) + prefetchnta(uintptr(unsafe.Pointer(&z64))) if cas64(&z64, x64, 1) { gothrow("cas64 failed") } diff --git a/src/runtime/runtime2.go b/src/runtime/runtime2.go index 7987a7373..d18178d09 100644 --- a/src/runtime/runtime2.go +++ b/src/runtime/runtime2.go @@ -227,6 +227,8 @@ type m struct { helpgc int32 spinning bool // m is out of work and is actively looking for work blocked bool // m is blocked on a note + inwb bool // m is executing a write barrier + printlock int8 fastrand uint32 ncgocall uint64 // number of cgo calls in total ncgo int32 // number of cgo calls currently in progress @@ -402,8 +404,9 @@ type itab struct { } // Lock-free stack node. +// // Also known to export_test.go. type lfnode struct { - next *lfnode + next uint64 pushcnt uintptr } @@ -448,11 +451,13 @@ type debugvars struct { // Indicates to write barrier and sychronization task to preform. const ( - _GCoff = iota // stop and start nop - _GCquiesce // stop and start nop - _GCstw // stop the ps nop - _GCmark // scan the stacks and start no white to black - _GCsweep // stop and start nop + _GCoff = iota // GC not running, write barrier disabled + _GCquiesce // unused state + _GCstw // unused state + _GCscan // GC collecting roots into workbufs, write barrier disabled + _GCmark // GC marking from workbufs, write barrier ENABLED + _GCmarktermination // GC mark termination: allocate black, P's help GC, write barrier ENABLED + _GCsweep // GC mark completed; sweeping in background, write barrier disabled ) type forcegcstate struct { diff --git a/src/runtime/select.go b/src/runtime/select.go index 6717b9395..5e5047bc1 100644 --- a/src/runtime/select.go +++ b/src/runtime/select.go @@ -377,12 +377,7 @@ loop: // iterating through the linked list they are in reverse order. cas = nil sglist = gp.waiting - // Clear all selectdone and elem before unlinking from gp.waiting. - // They must be cleared before being put back into the sudog cache. - // Clear before unlinking, because if a stack copy happens after the unlink, - // they will not be updated, they will be left pointing to the old stack, - // which creates dangling pointers, which may be detected by the - // garbage collector. + // Clear all elem before unlinking from gp.waiting. for sg1 := gp.waiting; sg1 != nil; sg1 = sg1.waitlink { sg1.selectdone = nil sg1.elem = nil diff --git a/src/runtime/stack1.go b/src/runtime/stack1.go index 1fd61ce1a..28000864d 100644 --- a/src/runtime/stack1.go +++ b/src/runtime/stack1.go @@ -58,7 +58,7 @@ func stackinit() { // Allocates a stack from the free pool. Must be called with // stackpoolmu held. -func stackpoolalloc(order uint8) *mlink { +func stackpoolalloc(order uint8) gclinkptr { list := &stackpool[order] s := list.next if s == list { @@ -70,23 +70,23 @@ func stackpoolalloc(order uint8) *mlink { if s.ref != 0 { gothrow("bad ref") } - if s.freelist != nil { + if s.freelist.ptr() != nil { gothrow("bad freelist") } for i := uintptr(0); i < _StackCacheSize; i += _FixedStack << order { - x := (*mlink)(unsafe.Pointer(uintptr(s.start)<<_PageShift + i)) - x.next = s.freelist + x := gclinkptr(uintptr(s.start)<<_PageShift + i) + x.ptr().next = s.freelist s.freelist = x } mSpanList_Insert(list, s) } x := s.freelist - if x == nil { + if x.ptr() == nil { gothrow("span has no free stacks") } - s.freelist = x.next + s.freelist = x.ptr().next s.ref++ - if s.freelist == nil { + if s.freelist.ptr() == nil { // all stacks in s are allocated. mSpanList_Remove(s) } @@ -94,22 +94,22 @@ func stackpoolalloc(order uint8) *mlink { } // Adds stack x to the free pool. Must be called with stackpoolmu held. -func stackpoolfree(x *mlink, order uint8) { +func stackpoolfree(x gclinkptr, order uint8) { s := mHeap_Lookup(&mheap_, (unsafe.Pointer)(x)) if s.state != _MSpanStack { gothrow("freeing stack not in a stack span") } - if s.freelist == nil { + if s.freelist.ptr() == nil { // s will now have a free stack mSpanList_Insert(&stackpool[order], s) } - x.next = s.freelist + x.ptr().next = s.freelist s.freelist = x s.ref-- if s.ref == 0 { // span is completely free - return to heap mSpanList_Remove(s) - s.freelist = nil + s.freelist = 0 mHeap_FreeStack(&mheap_, s) } } @@ -123,12 +123,12 @@ func stackcacherefill(c *mcache, order uint8) { // Grab some stacks from the global cache. // Grab half of the allowed capacity (to prevent thrashing). - var list *mlink + var list gclinkptr var size uintptr lock(&stackpoolmu) for size < _StackCacheSize/2 { x := stackpoolalloc(order) - x.next = list + x.ptr().next = list list = x size += _FixedStack << order } @@ -145,7 +145,7 @@ func stackcacherelease(c *mcache, order uint8) { size := c.stackcache[order].size lock(&stackpoolmu) for size > _StackCacheSize/2 { - y := x.next + y := x.ptr().next stackpoolfree(x, order) x = y size -= _FixedStack << order @@ -162,12 +162,12 @@ func stackcache_clear(c *mcache) { lock(&stackpoolmu) for order := uint8(0); order < _NumStackOrders; order++ { x := c.stackcache[order].list - for x != nil { - y := x.next + for x.ptr() != nil { + y := x.ptr().next stackpoolfree(x, order) x = y } - c.stackcache[order].list = nil + c.stackcache[order].list = 0 c.stackcache[order].size = 0 } unlock(&stackpoolmu) @@ -207,7 +207,7 @@ func stackalloc(n uint32) stack { order++ n2 >>= 1 } - var x *mlink + var x gclinkptr c := thisg.m.mcache if c == nil || thisg.m.gcing != 0 || thisg.m.helpgc != 0 { // c == nil can happen in the guts of exitsyscall or @@ -219,11 +219,11 @@ func stackalloc(n uint32) stack { unlock(&stackpoolmu) } else { x = c.stackcache[order].list - if x == nil { + if x.ptr() == nil { stackcacherefill(c, order) x = c.stackcache[order].list } - c.stackcache[order].list = x.next + c.stackcache[order].list = x.ptr().next c.stackcache[order].size -= uintptr(n) } v = (unsafe.Pointer)(x) @@ -270,7 +270,7 @@ func stackfree(stk stack) { order++ n2 >>= 1 } - x := (*mlink)(v) + x := gclinkptr(v) c := gp.m.mcache if c == nil || gp.m.gcing != 0 || gp.m.helpgc != 0 { lock(&stackpoolmu) @@ -280,7 +280,7 @@ func stackfree(stk stack) { if c.stackcache[order].size >= _StackCacheSize { stackcacherelease(c, order) } - x.next = c.stackcache[order].list + x.ptr().next = c.stackcache[order].list c.stackcache[order].list = x c.stackcache[order].size += n } @@ -526,6 +526,7 @@ func fillstack(stk stack, b byte) { } // Copies gp's stack to a new stack of a different size. +// Caller must have changed gp status to Gcopystack. func copystack(gp *g, newsize uintptr) { if gp.syscallsp != 0 { gothrow("stack growth not allowed in system call") @@ -563,15 +564,11 @@ func copystack(gp *g, newsize uintptr) { } memmove(unsafe.Pointer(new.hi-used), unsafe.Pointer(old.hi-used), used) - oldstatus := casgcopystack(gp) // cas from Gwaiting or Grunnable to Gcopystack, return old status - // Swap out old stack for new one gp.stack = new gp.stackguard0 = new.lo + _StackGuard // NOTE: might clobber a preempt request gp.sched.sp = new.hi - used - casgstatus(gp, _Gcopystack, oldstatus) // oldstatus is Gwaiting or Grunnable - // free old stack if stackPoisonCopy != 0 { fillstack(old, 0xfc) @@ -669,6 +666,14 @@ func newstack() { gothrow("runtime: split stack overflow") } + if gp.sched.ctxt != nil { + // morestack wrote sched.ctxt on its way in here, + // without a write barrier. Run the write barrier now. + // It is not possible to be preempted between then + // and now, so it's okay. + writebarrierptr_nostore((*uintptr)(unsafe.Pointer(&gp.sched.ctxt)), uintptr(gp.sched.ctxt)) + } + if gp.stackguard0 == stackPreempt { if gp == thisg.m.g0 { gothrow("runtime: preempt g0") @@ -677,7 +682,12 @@ func newstack() { gothrow("runtime: g is running but p is not") } if gp.preemptscan { + for !castogscanstatus(gp, _Gwaiting, _Gscanwaiting) { + // Likely to be racing with the GC as it sees a _Gwaiting and does the stack scan. + // If so this stack will be scanned twice which does not change correctness. + } gcphasework(gp) + casfrom_Gscanstatus(gp, _Gscanwaiting, _Gwaiting) casgstatus(gp, _Gwaiting, _Grunning) gp.stackguard0 = gp.stack.lo + _StackGuard gp.preempt = false @@ -708,13 +718,15 @@ func newstack() { gothrow("stack overflow") } - // Note that the concurrent GC might be scanning the stack as we try to replace it. - // copystack takes care of the appropriate coordination with the stack scanner. + casgstatus(gp, _Gwaiting, _Gcopystack) + + // The concurrent GC will not scan the stack while we are doing the copy since + // the gp is in a Gcopystack status. copystack(gp, uintptr(newsize)) if stackDebug >= 1 { print("stack grow done\n") } - casgstatus(gp, _Gwaiting, _Grunning) + casgstatus(gp, _Gcopystack, _Grunning) gogo(&gp.sched) } @@ -767,17 +779,17 @@ func shrinkstack(gp *g) { if gp.syscallsp != 0 { return } - - /* TODO - if goos_windows && gp.m != nil && gp.m.libcallsp != 0 { + if goos_windows != 0 && gp.m != nil && gp.m.libcallsp != 0 { return } - */ if stackDebug > 0 { print("shrinking stack ", oldsize, "->", newsize, "\n") } + + oldstatus := casgcopystack(gp) copystack(gp, newsize) + casgstatus(gp, _Gcopystack, oldstatus) } // Do any delayed stack freeing that was queued up during GC. diff --git a/src/runtime/stubs.go b/src/runtime/stubs.go index aa7577cf9..4408e22bf 100644 --- a/src/runtime/stubs.go +++ b/src/runtime/stubs.go @@ -231,3 +231,8 @@ func call536870912(fn, arg unsafe.Pointer, n, retoffset uint32) func call1073741824(fn, arg unsafe.Pointer, n, retoffset uint32) func systemstack_switch() + +func prefetcht0(addr uintptr) +func prefetcht1(addr uintptr) +func prefetcht2(addr uintptr) +func prefetchnta(addr uintptr) diff --git a/src/runtime/vdso_none.go b/src/runtime/vdso_none.go index ac6f8cb18..6f83ecc89 100644 --- a/src/runtime/vdso_none.go +++ b/src/runtime/vdso_none.go @@ -3,6 +3,7 @@ // license that can be found in the LICENSE file. // +build !linux !amd64 +// +build !linux !386 package runtime diff --git a/src/runtime/wbfat.go b/src/runtime/wbfat.go new file mode 100644 index 000000000..75c58b26b --- /dev/null +++ b/src/runtime/wbfat.go @@ -0,0 +1,190 @@ +// generated by wbfat_gen.go; use go generate + +package runtime + +//go:nosplit +func writebarrierfat01(dst *[2]uintptr, _ *byte, src [2]uintptr) { + dst[0] = src[0] + writebarrierptr(&dst[1], src[1]) +} + +//go:nosplit +func writebarrierfat10(dst *[2]uintptr, _ *byte, src [2]uintptr) { + writebarrierptr(&dst[0], src[0]) + dst[1] = src[1] +} + +//go:nosplit +func writebarrierfat11(dst *[2]uintptr, _ *byte, src [2]uintptr) { + writebarrierptr(&dst[0], src[0]) + writebarrierptr(&dst[1], src[1]) +} + +//go:nosplit +func writebarrierfat001(dst *[3]uintptr, _ *byte, src [3]uintptr) { + dst[0] = src[0] + dst[1] = src[1] + writebarrierptr(&dst[2], src[2]) +} + +//go:nosplit +func writebarrierfat010(dst *[3]uintptr, _ *byte, src [3]uintptr) { + dst[0] = src[0] + writebarrierptr(&dst[1], src[1]) + dst[2] = src[2] +} + +//go:nosplit +func writebarrierfat011(dst *[3]uintptr, _ *byte, src [3]uintptr) { + dst[0] = src[0] + writebarrierptr(&dst[1], src[1]) + writebarrierptr(&dst[2], src[2]) +} + +//go:nosplit +func writebarrierfat100(dst *[3]uintptr, _ *byte, src [3]uintptr) { + writebarrierptr(&dst[0], src[0]) + dst[1] = src[1] + dst[2] = src[2] +} + +//go:nosplit +func writebarrierfat101(dst *[3]uintptr, _ *byte, src [3]uintptr) { + writebarrierptr(&dst[0], src[0]) + dst[1] = src[1] + writebarrierptr(&dst[2], src[2]) +} + +//go:nosplit +func writebarrierfat110(dst *[3]uintptr, _ *byte, src [3]uintptr) { + writebarrierptr(&dst[0], src[0]) + writebarrierptr(&dst[1], src[1]) + dst[2] = src[2] +} + +//go:nosplit +func writebarrierfat111(dst *[3]uintptr, _ *byte, src [3]uintptr) { + writebarrierptr(&dst[0], src[0]) + writebarrierptr(&dst[1], src[1]) + writebarrierptr(&dst[2], src[2]) +} + +//go:nosplit +func writebarrierfat0001(dst *[4]uintptr, _ *byte, src [4]uintptr) { + dst[0] = src[0] + dst[1] = src[1] + dst[2] = src[2] + writebarrierptr(&dst[3], src[3]) +} + +//go:nosplit +func writebarrierfat0010(dst *[4]uintptr, _ *byte, src [4]uintptr) { + dst[0] = src[0] + dst[1] = src[1] + writebarrierptr(&dst[2], src[2]) + dst[3] = src[3] +} + +//go:nosplit +func writebarrierfat0011(dst *[4]uintptr, _ *byte, src [4]uintptr) { + dst[0] = src[0] + dst[1] = src[1] + writebarrierptr(&dst[2], src[2]) + writebarrierptr(&dst[3], src[3]) +} + +//go:nosplit +func writebarrierfat0100(dst *[4]uintptr, _ *byte, src [4]uintptr) { + dst[0] = src[0] + writebarrierptr(&dst[1], src[1]) + dst[2] = src[2] + dst[3] = src[3] +} + +//go:nosplit +func writebarrierfat0101(dst *[4]uintptr, _ *byte, src [4]uintptr) { + dst[0] = src[0] + writebarrierptr(&dst[1], src[1]) + dst[2] = src[2] + writebarrierptr(&dst[3], src[3]) +} + +//go:nosplit +func writebarrierfat0110(dst *[4]uintptr, _ *byte, src [4]uintptr) { + dst[0] = src[0] + writebarrierptr(&dst[1], src[1]) + writebarrierptr(&dst[2], src[2]) + dst[3] = src[3] +} + +//go:nosplit +func writebarrierfat0111(dst *[4]uintptr, _ *byte, src [4]uintptr) { + dst[0] = src[0] + writebarrierptr(&dst[1], src[1]) + writebarrierptr(&dst[2], src[2]) + writebarrierptr(&dst[3], src[3]) +} + +//go:nosplit +func writebarrierfat1000(dst *[4]uintptr, _ *byte, src [4]uintptr) { + writebarrierptr(&dst[0], src[0]) + dst[1] = src[1] + dst[2] = src[2] + dst[3] = src[3] +} + +//go:nosplit +func writebarrierfat1001(dst *[4]uintptr, _ *byte, src [4]uintptr) { + writebarrierptr(&dst[0], src[0]) + dst[1] = src[1] + dst[2] = src[2] + writebarrierptr(&dst[3], src[3]) +} + +//go:nosplit +func writebarrierfat1010(dst *[4]uintptr, _ *byte, src [4]uintptr) { + writebarrierptr(&dst[0], src[0]) + dst[1] = src[1] + writebarrierptr(&dst[2], src[2]) + dst[3] = src[3] +} + +//go:nosplit +func writebarrierfat1011(dst *[4]uintptr, _ *byte, src [4]uintptr) { + writebarrierptr(&dst[0], src[0]) + dst[1] = src[1] + writebarrierptr(&dst[2], src[2]) + writebarrierptr(&dst[3], src[3]) +} + +//go:nosplit +func writebarrierfat1100(dst *[4]uintptr, _ *byte, src [4]uintptr) { + writebarrierptr(&dst[0], src[0]) + writebarrierptr(&dst[1], src[1]) + dst[2] = src[2] + dst[3] = src[3] +} + +//go:nosplit +func writebarrierfat1101(dst *[4]uintptr, _ *byte, src [4]uintptr) { + writebarrierptr(&dst[0], src[0]) + writebarrierptr(&dst[1], src[1]) + dst[2] = src[2] + writebarrierptr(&dst[3], src[3]) +} + +//go:nosplit +func writebarrierfat1110(dst *[4]uintptr, _ *byte, src [4]uintptr) { + writebarrierptr(&dst[0], src[0]) + writebarrierptr(&dst[1], src[1]) + writebarrierptr(&dst[2], src[2]) + dst[3] = src[3] +} + +//go:nosplit +func writebarrierfat1111(dst *[4]uintptr, _ *byte, src [4]uintptr) { + writebarrierptr(&dst[0], src[0]) + writebarrierptr(&dst[1], src[1]) + writebarrierptr(&dst[2], src[2]) + writebarrierptr(&dst[3], src[3]) +} diff --git a/src/runtime/wbfat_gen.go b/src/runtime/wbfat_gen.go new file mode 100644 index 000000000..78d5b6271 --- /dev/null +++ b/src/runtime/wbfat_gen.go @@ -0,0 +1,41 @@ +// Copyright 2014 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build ignore + +package main + +import ( + "flag" + "fmt" + "log" + "os" +) + +func main() { + flag.Parse() + if flag.NArg() > 0 { + f, err := os.Create(flag.Arg(0)) + if err != nil { + log.Fatal(err) + } + os.Stdout = f + } + fmt.Printf("// generated by wbfat_gen.go; use go generate\n\n") + fmt.Printf("package runtime\n") + for i := uint(2); i <= 4; i++ { + for j := 1; j < 1<<i; j++ { + fmt.Printf("\n//go:nosplit\n") + fmt.Printf("func writebarrierfat%0*b(dst *[%d]uintptr, _ *byte, src [%d]uintptr) {\n", int(i), j, i, i) + for k := uint(0); k < i; k++ { + if j&(1<<(i-1-k)) != 0 { + fmt.Printf("\twritebarrierptr(&dst[%d], src[%d])\n", k, k) + } else { + fmt.Printf("\tdst[%d] = src[%d]\n", k, k) + } + } + fmt.Printf("}\n") + } + } +} diff --git a/src/sync/atomic/atomic_test.go b/src/sync/atomic/atomic_test.go index 9f13af48b..ec573aa8c 100644 --- a/src/sync/atomic/atomic_test.go +++ b/src/sync/atomic/atomic_test.go @@ -164,7 +164,7 @@ func TestSwapPointer(t *testing.T) { x.before = magicptr x.after = magicptr var j uintptr - for delta := uintptr(1); delta+delta > delta; delta += delta { + for delta := uintptr(1 << 16); delta+delta > delta; delta += delta { k := SwapPointer(&x.i, unsafe.Pointer(delta)) if uintptr(x.i) != delta || uintptr(k) != j { t.Fatalf("delta=%d i=%d j=%d k=%d", delta, x.i, j, k) @@ -456,7 +456,7 @@ func TestCompareAndSwapPointer(t *testing.T) { magicptr := uintptr(m) x.before = magicptr x.after = magicptr - for val := uintptr(1); val+val > val; val += val { + for val := uintptr(1 << 16); val+val > val; val += val { x.i = unsafe.Pointer(val) if !CompareAndSwapPointer(&x.i, unsafe.Pointer(val), unsafe.Pointer(val+1)) { t.Fatalf("should have swapped %#x %#x", val, val+1) @@ -595,7 +595,7 @@ func TestLoadPointer(t *testing.T) { magicptr := uintptr(m) x.before = magicptr x.after = magicptr - for delta := uintptr(1); delta+delta > delta; delta += delta { + for delta := uintptr(1 << 16); delta+delta > delta; delta += delta { k := LoadPointer(&x.i) if k != x.i { t.Fatalf("delta=%d i=%d k=%d", delta, x.i, k) @@ -731,7 +731,7 @@ func TestStorePointer(t *testing.T) { x.before = magicptr x.after = magicptr v := unsafe.Pointer(uintptr(0)) - for delta := uintptr(1); delta+delta > delta; delta += delta { + for delta := uintptr(1 << 16); delta+delta > delta; delta += delta { StorePointer(&x.i, unsafe.Pointer(v)) if x.i != v { t.Fatalf("delta=%d i=%d v=%d", delta, x.i, v) diff --git a/test/live.go b/test/live.go index f69d0a4c1..62c6a0b0e 100644 --- a/test/live.go +++ b/test/live.go @@ -9,20 +9,39 @@ package main +func printnl() + +//go:noescape +func printpointer(**int) + +//go:noescape +func printintpointer(*int) + +//go:noescape +func printstringpointer(*string) + +//go:noescape +func printstring(string) + +//go:noescape +func printbytepointer(*byte) + +func printint(int) + func f1() { var x *int - print(&x) // ERROR "live at call to printpointer: x$" - print(&x) // ERROR "live at call to printpointer: x$" + printpointer(&x) // ERROR "live at call to printpointer: x$" + printpointer(&x) // ERROR "live at call to printpointer: x$" } func f2(b bool) { if b { - print(0) // nothing live here + printint(0) // nothing live here return } var x *int - print(&x) // ERROR "live at call to printpointer: x$" - print(&x) // ERROR "live at call to printpointer: x$" + printpointer(&x) // ERROR "live at call to printpointer: x$" + printpointer(&x) // ERROR "live at call to printpointer: x$" } func f3(b bool) { @@ -30,22 +49,22 @@ func f3(b bool) { // live throughout the function, to avoid being poisoned // in GODEBUG=gcdead=1 mode. - print(0) // ERROR "live at call to printint: x y$" + printint(0) // ERROR "live at call to printint: x y$" if b == false { - print(0) // ERROR "live at call to printint: x y$" + printint(0) // ERROR "live at call to printint: x y$" return } if b { var x *int - print(&x) // ERROR "live at call to printpointer: x y$" - print(&x) // ERROR "live at call to printpointer: x y$" + printpointer(&x) // ERROR "live at call to printpointer: x y$" + printpointer(&x) // ERROR "live at call to printpointer: x y$" } else { var y *int - print(&y) // ERROR "live at call to printpointer: x y$" - print(&y) // ERROR "live at call to printpointer: x y$" + printpointer(&y) // ERROR "live at call to printpointer: x y$" + printpointer(&y) // ERROR "live at call to printpointer: x y$" } - print(0) // ERROR "live at call to printint: x y$" "x \(type \*int\) is ambiguously live" "y \(type \*int\) is ambiguously live" + printint(0) // ERROR "live at call to printint: x y$" "x \(type \*int\) is ambiguously live" "y \(type \*int\) is ambiguously live" } // The old algorithm treated x as live on all code that @@ -56,20 +75,20 @@ func f3(b bool) { func f4(b1, b2 bool) { // x not live here if b2 { - print(0) // x not live here + printint(0) // x not live here return } var z **int x := new(int) *x = 42 z = &x - print(**z) // ERROR "live at call to printint: x z$" + printint(**z) // ERROR "live at call to printint: x z$" if b2 { - print(1) // ERROR "live at call to printint: x$" + printint(1) // ERROR "live at call to printint: x$" return } for { - print(**z) // ERROR "live at call to printint: x z$" + printint(**z) // ERROR "live at call to printint: x z$" } } @@ -84,7 +103,7 @@ func f5(b1 bool) { *y = 54 z = &y } - print(**z) // ERROR "live at call to printint: x y$" "x \(type \*int\) is ambiguously live" "y \(type \*int\) is ambiguously live" + printint(**z) // ERROR "live at call to printint: x y$" "x \(type \*int\) is ambiguously live" "y \(type \*int\) is ambiguously live" } // confusion about the _ result used to cause spurious "live at entry to f6: _". @@ -155,8 +174,8 @@ func f11b() *int { // At this point p is dead: the code here cannot // get to the bottom of the function. // This used to have a spurious "live at call to printint: p". - print(1) // nothing live here! - select { // ERROR "live at call to newselect: autotmp" "live at call to selectgo: autotmp" + printint(1) // nothing live here! + select { // ERROR "live at call to newselect: autotmp" "live at call to selectgo: autotmp" case <-c: // ERROR "live at call to selectrecv: autotmp" return nil case <-c: // ERROR "live at call to selectrecv: autotmp" @@ -172,8 +191,8 @@ func f11c() *int { if b { // Unlike previous, the cases in this select fall through, // so we can get to the println, so p is not dead. - print(1) // ERROR "live at call to printint: p" - select { // ERROR "live at call to newselect: autotmp.* p" "live at call to selectgo: autotmp.* p" + printint(1) // ERROR "live at call to printint: p" + select { // ERROR "live at call to newselect: autotmp.* p" "live at call to selectgo: autotmp.* p" case <-c: // ERROR "live at call to selectrecv: autotmp.* p" case <-c: // ERROR "live at call to selectrecv: autotmp.* p" } @@ -209,7 +228,7 @@ func h13(string, string) string func f14() { x := g14() - print(&x) // ERROR "live at call to printpointer: x" + printstringpointer(&x) // ERROR "live at call to printstringpointer: x" } func g14() string @@ -217,8 +236,8 @@ func g14() string func f15() { var x string _ = &x - x = g15() // ERROR "live at call to g15: x" - print(x) // ERROR "live at call to printstring: x" + x = g15() // ERROR "live at call to g15: x" + printstring(x) // ERROR "live at call to printstring: x" } func g15() string @@ -282,7 +301,7 @@ func f18() { } z = m2[g18()] // ERROR "live at call to mapaccess1: autotmp_[0-9]+$" z = m2[g18()] // ERROR "live at call to mapaccess1: autotmp_[0-9]+$" - print(z) + printbytepointer(z) } var ch chan *byte @@ -296,7 +315,7 @@ func f19() { } z = <-ch // ERROR "live at call to chanrecv1: autotmp_[0-9]+$" z = <-ch // ERROR "live at call to chanrecv1: autotmp_[0-9]+$" - print(z) + printbytepointer(z) } func f20() { @@ -316,7 +335,7 @@ func f21() { } z = m2[[2]string{"x", "y"}] // ERROR "live at call to mapaccess1: autotmp_[0-9]+$" z = m2[[2]string{"x", "y"}] // ERROR "live at call to mapaccess1: autotmp_[0-9]+$" - print(z) + printbytepointer(z) } func f23() { @@ -328,7 +347,8 @@ func f23() { } z, ok = m2[[2]string{"x", "y"}] // ERROR "live at call to mapaccess2: autotmp_[0-9]+$" z, ok = m2[[2]string{"x", "y"}] // ERROR "live at call to mapaccess2: autotmp_[0-9]+$" - print(z, ok) + printbytepointer(z) + print(ok) } func f24() { @@ -350,8 +370,8 @@ func f25(b bool) { } var x string _ = &x - x = g15() // ERROR "live at call to g15: x" - print(x) // ERROR "live at call to printstring: x" + x = g15() // ERROR "live at call to g15: x" + printstring(x) // ERROR "live at call to printstring: x" } // ERROR "live at call to deferreturn: x" func g25() @@ -366,7 +386,7 @@ func f26(b bool) { } print26((*int)(nil), (*int)(nil), (*int)(nil)) // ERROR "live at call to print26: autotmp_[0-9]+$" print26((*int)(nil), (*int)(nil), (*int)(nil)) // ERROR "live at call to print26: autotmp_[0-9]+$" - println() + printnl() } //go:noescape @@ -381,7 +401,7 @@ func f27(b bool) { } call27(func() { x++ }) // ERROR "live at call to call27: autotmp_[0-9]+$" call27(func() { x++ }) // ERROR "live at call to call27: autotmp_[0-9]+$" - println() + printnl() } // but defer does escape to later execution in the function @@ -392,7 +412,7 @@ func f27defer(b bool) { defer call27(func() { x++ }) // ERROR "live at call to deferproc: autotmp_[0-9]+$" "live at call to deferreturn: autotmp_[0-9]+$" } defer call27(func() { x++ }) // ERROR "live at call to deferproc: autotmp_[0-9]+ autotmp_[0-9]+$" "live at call to deferreturn: autotmp_[0-9]+ autotmp_[0-9]+$" "ambiguously live" - println() // ERROR "live at call to printnl: autotmp_[0-9]+ autotmp_[0-9]+$" + printnl() // ERROR "live at call to printnl: autotmp_[0-9]+ autotmp_[0-9]+$" } // ERROR "live at call to deferreturn: autotmp_[0-9]+ autotmp_[0-9]+$" // and newproc (go) escapes to the heap @@ -403,7 +423,7 @@ func f27go(b bool) { go call27(func() { x++ }) // ERROR "live at call to newobject: &x" "live at call to newproc: &x$" } go call27(func() { x++ }) // ERROR "live at call to newobject: &x" - println() + printnl() } //go:noescape @@ -415,10 +435,10 @@ var s1, s2, s3, s4, s5, s6, s7, s8, s9, s10 string func f28(b bool) { if b { - print(s1 + s2 + s3 + s4 + s5 + s6 + s7 + s8 + s9 + s10) // ERROR "live at call to concatstrings: autotmp_[0-9]+$" "live at call to printstring: autotmp_[0-9]+$" + printstring(s1 + s2 + s3 + s4 + s5 + s6 + s7 + s8 + s9 + s10) // ERROR "live at call to concatstrings: autotmp_[0-9]+$" "live at call to printstring: autotmp_[0-9]+$" } - print(s1 + s2 + s3 + s4 + s5 + s6 + s7 + s8 + s9 + s10) // ERROR "live at call to concatstrings: autotmp_[0-9]+$" "live at call to printstring: autotmp_[0-9]+$" - print(s1 + s2 + s3 + s4 + s5 + s6 + s7 + s8 + s9 + s10) // ERROR "live at call to concatstrings: autotmp_[0-9]+$" "live at call to printstring: autotmp_[0-9]+$" + printstring(s1 + s2 + s3 + s4 + s5 + s6 + s7 + s8 + s9 + s10) // ERROR "live at call to concatstrings: autotmp_[0-9]+$" "live at call to printstring: autotmp_[0-9]+$" + printstring(s1 + s2 + s3 + s4 + s5 + s6 + s7 + s8 + s9 + s10) // ERROR "live at call to concatstrings: autotmp_[0-9]+$" "live at call to printstring: autotmp_[0-9]+$" } // map iterator should die on end of range loop @@ -426,14 +446,14 @@ func f28(b bool) { func f29(b bool) { if b { for k := range m { // ERROR "live at call to mapiterinit: autotmp_[0-9]+$" "live at call to mapiternext: autotmp_[0-9]+$" - print(k) // ERROR "live at call to printstring: autotmp_[0-9]+$" + printstring(k) // ERROR "live at call to printstring: autotmp_[0-9]+$" } } for k := range m { // ERROR "live at call to mapiterinit: autotmp_[0-9]+$" "live at call to mapiternext: autotmp_[0-9]+$" - print(k) // ERROR "live at call to printstring: autotmp_[0-9]+$" + printstring(k) // ERROR "live at call to printstring: autotmp_[0-9]+$" } for k := range m { // ERROR "live at call to mapiterinit: autotmp_[0-9]+$" "live at call to mapiternext: autotmp_[0-9]+$" - print(k) // ERROR "live at call to printstring: autotmp_[0-9]+$" + printstring(k) // ERROR "live at call to printstring: autotmp_[0-9]+$" } } @@ -446,14 +466,14 @@ func f30(b bool) { // the copy of ptrarr and the internal iterator pointer. if b { for _, p := range ptrarr { - print(p) // ERROR "live at call to printpointer: autotmp_[0-9]+ autotmp_[0-9]+$" + printintpointer(p) // ERROR "live at call to printintpointer: autotmp_[0-9]+ autotmp_[0-9]+$" } } for _, p := range ptrarr { - print(p) // ERROR "live at call to printpointer: autotmp_[0-9]+ autotmp_[0-9]+$" + printintpointer(p) // ERROR "live at call to printintpointer: autotmp_[0-9]+ autotmp_[0-9]+$" } for _, p := range ptrarr { - print(p) // ERROR "live at call to printpointer: autotmp_[0-9]+ autotmp_[0-9]+$" + printintpointer(p) // ERROR "live at call to printintpointer: autotmp_[0-9]+ autotmp_[0-9]+$" } } @@ -503,44 +523,44 @@ var m33 map[interface{}]int func f33() { if m33[nil] == 0 { // ERROR "live at call to mapaccess1: autotmp_[0-9]+$" - println() + printnl() return } else { - println() + printnl() } - println() + printnl() } func f34() { if m33[nil] == 0 { // ERROR "live at call to mapaccess1: autotmp_[0-9]+$" - println() + printnl() return } - println() + printnl() } func f35() { if m33[nil] == 0 && m33[nil] == 0 { // ERROR "live at call to mapaccess1: autotmp_[0-9]+$" - println() + printnl() return } - println() + printnl() } func f36() { if m33[nil] == 0 || m33[nil] == 0 { // ERROR "live at call to mapaccess1: autotmp_[0-9]+$" - println() + printnl() return } - println() + printnl() } func f37() { if (m33[nil] == 0 || m33[nil] == 0) && m33[nil] == 0 { // ERROR "live at call to mapaccess1: autotmp_[0-9]+$" - println() + printnl() return } - println() + printnl() } // select temps should disappear in the case bodies @@ -558,44 +578,44 @@ func f38(b bool) { if b { select { // ERROR "live at call" case <-fc38(): // ERROR "live at call" - println() + printnl() case fc38() <- *fi38(1): // ERROR "live at call" - println() + printnl() case *fi38(2) = <-fc38(): // ERROR "live at call" - println() + printnl() case *fi38(3), *fb38() = <-fc38(): // ERROR "live at call" - println() + printnl() } - println() + printnl() } - println() + printnl() } // issue 8097: mishandling of x = x during return. func f39() (x []int) { x = []int{1} - println() // ERROR "live at call to printnl: x" + printnl() // ERROR "live at call to printnl: x" return x } func f39a() (x []int) { x = []int{1} - println() // ERROR "live at call to printnl: x" + printnl() // ERROR "live at call to printnl: x" return } func f39b() (x [10]*int) { x = [10]*int{} x[0] = new(int) // ERROR "live at call to newobject: x" - println() // ERROR "live at call to printnl: x" + printnl() // ERROR "live at call to printnl: x" return x } func f39c() (x [10]*int) { x = [10]*int{} x[0] = new(int) // ERROR "live at call to newobject: x" - println() // ERROR "live at call to printnl: x" + printnl() // ERROR "live at call to printnl: x" return } @@ -615,13 +635,13 @@ func newT40() *T40 { func bad40() { t := newT40() _ = t - println() + printnl() } func good40() { ret := T40{} ret.m = make(map[int]int) // ERROR "live at call to makemap: ret" t := &ret - println() // ERROR "live at call to printnl: ret" + printnl() // ERROR "live at call to printnl: ret" _ = t } diff --git a/test/live2.go b/test/live2.go index ef6ad994c..1bd0af2cc 100644 --- a/test/live2.go +++ b/test/live2.go @@ -12,6 +12,8 @@ package main // issue 8142: lost 'addrtaken' bit on inlined variables. // no inlining in this test, so just checking that non-inlined works. +func printnl() + type T40 struct { m map[int]int } @@ -24,7 +26,7 @@ func newT40() *T40 { func bad40() { t := newT40() // ERROR "live at call to makemap: ret" - println() // ERROR "live at call to printnl: ret" + printnl() // ERROR "live at call to printnl: ret" _ = t } @@ -32,6 +34,6 @@ func good40() { ret := T40{} ret.m = make(map[int]int) // ERROR "live at call to makemap: ret" t := &ret - println() // ERROR "live at call to printnl: ret" + printnl() // ERROR "live at call to printnl: ret" _ = t } |