summaryrefslogtreecommitdiff
path: root/src/runtime
diff options
context:
space:
mode:
authorRuss Cox <rsc@golang.org>2014-11-14 11:37:54 -0500
committerRuss Cox <rsc@golang.org>2014-11-14 11:37:54 -0500
commitf4110c2e9cc8f316e14f3a4a35789bc821b326bf (patch)
tree3a2b461d76cc134f7b71e5e1ef63bc37e7d25059 /src/runtime
parent78d351d121615f1101f28f88920029f20884c689 (diff)
parent4664f7441b495d8fa8aa5001755cb5f85e790b19 (diff)
downloadgo-f4110c2e9cc8f316e14f3a4a35789bc821b326bf.tar.gz
[dev.garbage] all: merge default (f38460037b72) into dev.garbage
This is the revision that dev.cc is branched from. LGTM=austin R=austin CC=golang-codereviews https://codereview.appspot.com/169590043
Diffstat (limited to 'src/runtime')
-rw-r--r--src/runtime/arch_power64.go8
-rw-r--r--src/runtime/arch_power64.h14
-rw-r--r--src/runtime/arch_power64le.go8
-rw-r--r--src/runtime/arch_power64le.h14
-rw-r--r--src/runtime/asm_386.s64
-rw-r--r--src/runtime/asm_amd64.s84
-rw-r--r--src/runtime/asm_amd64p32.s64
-rw-r--r--src/runtime/asm_arm.s24
-rw-r--r--src/runtime/asm_power64x.s981
-rw-r--r--src/runtime/atomic.go38
-rw-r--r--src/runtime/atomic_power64x.s40
-rw-r--r--src/runtime/debug/stubs.s6
-rw-r--r--src/runtime/defs1_linux.go6
-rw-r--r--src/runtime/defs3_linux.go43
-rw-r--r--src/runtime/defs_linux.go11
-rw-r--r--src/runtime/defs_linux_power64.h204
-rw-r--r--src/runtime/defs_linux_power64le.h204
-rw-r--r--src/runtime/export_test.go2
-rw-r--r--src/runtime/gcinfo_test.go10
-rw-r--r--src/runtime/heapdump.c15
-rw-r--r--src/runtime/lfstack.c14
-rw-r--r--src/runtime/lfstack_test.go2
-rw-r--r--src/runtime/malloc.go78
-rw-r--r--src/runtime/malloc.h5
-rw-r--r--src/runtime/mcache.c2
-rw-r--r--src/runtime/mem_linux.c16
-rw-r--r--src/runtime/memclr_386.s46
-rw-r--r--src/runtime/memclr_amd64.s44
-rw-r--r--src/runtime/memclr_plan9_386.s24
-rw-r--r--src/runtime/memclr_power64x.s20
-rw-r--r--src/runtime/memmove_power64x.s40
-rw-r--r--src/runtime/mgc0.c1431
-rw-r--r--src/runtime/mgc0.go131
-rw-r--r--src/runtime/mgc0.h10
-rw-r--r--src/runtime/noasm.go (renamed from src/runtime/noasm_arm.go)2
-rw-r--r--src/runtime/os_darwin.c3
-rw-r--r--src/runtime/os_dragonfly.c3
-rw-r--r--src/runtime/os_freebsd.c3
-rw-r--r--src/runtime/os_linux.c40
-rw-r--r--src/runtime/os_nacl.c3
-rw-r--r--src/runtime/os_netbsd.c3
-rw-r--r--src/runtime/os_openbsd.c3
-rw-r--r--src/runtime/os_plan9.c6
-rw-r--r--src/runtime/os_solaris.c3
-rw-r--r--src/runtime/panic.c2
-rw-r--r--src/runtime/panic.go4
-rw-r--r--src/runtime/print1.go30
-rw-r--r--src/runtime/proc.c42
-rw-r--r--src/runtime/proc.go3
-rw-r--r--src/runtime/race_amd64.s18
-rw-r--r--src/runtime/rt0_linux_power64.s17
-rw-r--r--src/runtime/rt0_linux_power64le.s14
-rw-r--r--src/runtime/runtime.c6
-rw-r--r--src/runtime/runtime.h37
-rw-r--r--src/runtime/select.go7
-rw-r--r--src/runtime/signal_linux_power64.h49
-rw-r--r--src/runtime/signal_linux_power64le.h49
-rw-r--r--src/runtime/signal_power64x.c137
-rw-r--r--src/runtime/stack.c83
-rw-r--r--src/runtime/string.c2
-rw-r--r--src/runtime/string.go4
-rw-r--r--src/runtime/stubs.go13
-rw-r--r--src/runtime/sys_darwin_386.s4
-rw-r--r--src/runtime/sys_darwin_amd64.s4
-rw-r--r--src/runtime/sys_dragonfly_386.s4
-rw-r--r--src/runtime/sys_freebsd_386.s4
-rw-r--r--src/runtime/sys_linux_amd64.s8
-rw-r--r--src/runtime/sys_linux_arm.s8
-rw-r--r--src/runtime/sys_linux_power64x.s383
-rw-r--r--src/runtime/sys_nacl_386.s4
-rw-r--r--src/runtime/sys_nacl_amd64p32.s1
-rw-r--r--src/runtime/sys_nacl_arm.s1
-rw-r--r--src/runtime/sys_openbsd_386.s4
-rw-r--r--src/runtime/sys_power64x.c38
-rw-r--r--src/runtime/sys_solaris_amd64.s12
-rw-r--r--src/runtime/sys_windows_386.s12
-rw-r--r--src/runtime/sys_windows_amd64.s12
-rw-r--r--src/runtime/sys_x86.c1
-rw-r--r--src/runtime/thunk.s6
-rw-r--r--src/runtime/wbfat.go190
-rw-r--r--src/runtime/wbfat_gen.go41
81 files changed, 4236 insertions, 790 deletions
diff --git a/src/runtime/arch_power64.go b/src/runtime/arch_power64.go
new file mode 100644
index 000000000..270cd7b95
--- /dev/null
+++ b/src/runtime/arch_power64.go
@@ -0,0 +1,8 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+type uintreg uint64
+type intptr int64 // TODO(rsc): remove
diff --git a/src/runtime/arch_power64.h b/src/runtime/arch_power64.h
new file mode 100644
index 000000000..7cfb9da2f
--- /dev/null
+++ b/src/runtime/arch_power64.h
@@ -0,0 +1,14 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+enum {
+ thechar = '9',
+ BigEndian = 1,
+ CacheLineSize = 64,
+ RuntimeGogoBytes = 64,
+ PhysPageSize = 65536,
+ PCQuantum = 4,
+ Int64Align = 8
+};
+
diff --git a/src/runtime/arch_power64le.go b/src/runtime/arch_power64le.go
new file mode 100644
index 000000000..270cd7b95
--- /dev/null
+++ b/src/runtime/arch_power64le.go
@@ -0,0 +1,8 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+type uintreg uint64
+type intptr int64 // TODO(rsc): remove
diff --git a/src/runtime/arch_power64le.h b/src/runtime/arch_power64le.h
new file mode 100644
index 000000000..684ac9953
--- /dev/null
+++ b/src/runtime/arch_power64le.h
@@ -0,0 +1,14 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+enum {
+ thechar = '9',
+ BigEndian = 0,
+ CacheLineSize = 64,
+ RuntimeGogoBytes = 64,
+ PhysPageSize = 65536,
+ PCQuantum = 4,
+ Int64Align = 8
+};
+
diff --git a/src/runtime/asm_386.s b/src/runtime/asm_386.s
index b4b81d739..501e64b09 100644
--- a/src/runtime/asm_386.s
+++ b/src/runtime/asm_386.s
@@ -486,11 +486,11 @@ TEXT runtime·cas64(SB), NOSPLIT, $0-21
MOVL new_hi+16(FP), CX
LOCK
CMPXCHG8B 0(BP)
- JNZ cas64_fail
+ JNZ fail
MOVL $1, AX
MOVB AX, ret+20(FP)
RET
-cas64_fail:
+fail:
MOVL $0, AX
MOVB AX, ret+20(FP)
RET
@@ -502,7 +502,7 @@ cas64_fail:
// return 1;
// }else
// return 0;
-TEXT runtime·casp(SB), NOSPLIT, $0-13
+TEXT runtime·casp1(SB), NOSPLIT, $0-13
MOVL ptr+0(FP), BX
MOVL old+4(FP), AX
MOVL new+8(FP), CX
@@ -537,7 +537,7 @@ TEXT runtime·xchg(SB), NOSPLIT, $0-12
MOVL AX, ret+8(FP)
RET
-TEXT runtime·xchgp(SB), NOSPLIT, $0-12
+TEXT runtime·xchgp1(SB), NOSPLIT, $0-12
MOVL ptr+0(FP), BX
MOVL new+4(FP), AX
XCHGL AX, 0(BX)
@@ -555,7 +555,7 @@ again:
JNZ again
RET
-TEXT runtime·atomicstorep(SB), NOSPLIT, $0-8
+TEXT runtime·atomicstorep1(SB), NOSPLIT, $0-8
MOVL ptr+0(FP), BX
MOVL val+4(FP), AX
XCHGL AX, 0(BX)
@@ -1356,29 +1356,29 @@ TEXT strings·IndexByte(SB),NOSPLIT,$0
// AX = 1/0/-1
TEXT runtime·cmpbody(SB),NOSPLIT,$0-0
CMPL SI, DI
- JEQ cmp_allsame
+ JEQ allsame
CMPL BX, DX
MOVL DX, BP
CMOVLLT BX, BP // BP = min(alen, blen)
CMPL BP, $4
- JB cmp_small
+ JB small
TESTL $0x4000000, runtime·cpuid_edx(SB) // check for sse2
- JE cmp_mediumloop
-cmp_largeloop:
+ JE mediumloop
+largeloop:
CMPL BP, $16
- JB cmp_mediumloop
+ JB mediumloop
MOVOU (SI), X0
MOVOU (DI), X1
PCMPEQB X0, X1
PMOVMSKB X1, AX
XORL $0xffff, AX // convert EQ to NE
- JNE cmp_diff16 // branch if at least one byte is not equal
+ JNE diff16 // branch if at least one byte is not equal
ADDL $16, SI
ADDL $16, DI
SUBL $16, BP
- JMP cmp_largeloop
+ JMP largeloop
-cmp_diff16:
+diff16:
BSFL AX, BX // index of first byte that differs
XORL AX, AX
MOVB (SI)(BX*1), CX
@@ -1387,25 +1387,25 @@ cmp_diff16:
LEAL -1(AX*2), AX // convert 1/0 to +1/-1
RET
-cmp_mediumloop:
+mediumloop:
CMPL BP, $4
- JBE cmp_0through4
+ JBE _0through4
MOVL (SI), AX
MOVL (DI), CX
CMPL AX, CX
- JNE cmp_diff4
+ JNE diff4
ADDL $4, SI
ADDL $4, DI
SUBL $4, BP
- JMP cmp_mediumloop
+ JMP mediumloop
-cmp_0through4:
+_0through4:
MOVL -4(SI)(BP*1), AX
MOVL -4(DI)(BP*1), CX
CMPL AX, CX
- JEQ cmp_allsame
+ JEQ allsame
-cmp_diff4:
+diff4:
BSWAPL AX // reverse order of bytes
BSWAPL CX
XORL AX, CX // find bit differences
@@ -1416,37 +1416,37 @@ cmp_diff4:
RET
// 0-3 bytes in common
-cmp_small:
+small:
LEAL (BP*8), CX
NEGL CX
- JEQ cmp_allsame
+ JEQ allsame
// load si
CMPB SI, $0xfc
- JA cmp_si_high
+ JA si_high
MOVL (SI), SI
- JMP cmp_si_finish
-cmp_si_high:
+ JMP si_finish
+si_high:
MOVL -4(SI)(BP*1), SI
SHRL CX, SI
-cmp_si_finish:
+si_finish:
SHLL CX, SI
// same for di
CMPB DI, $0xfc
- JA cmp_di_high
+ JA di_high
MOVL (DI), DI
- JMP cmp_di_finish
-cmp_di_high:
+ JMP di_finish
+di_high:
MOVL -4(DI)(BP*1), DI
SHRL CX, DI
-cmp_di_finish:
+di_finish:
SHLL CX, DI
BSWAPL SI // reverse order of bytes
BSWAPL DI
XORL SI, DI // find bit differences
- JEQ cmp_allsame
+ JEQ allsame
BSRL DI, CX // index of highest bit difference
SHRL CX, SI // move a's bit to bottom
ANDL $1, SI // mask bit
@@ -1455,7 +1455,7 @@ cmp_di_finish:
// all the bytes in common are the same, so we just need
// to compare the lengths.
-cmp_allsame:
+allsame:
XORL AX, AX
XORL CX, CX
CMPL BX, DX
diff --git a/src/runtime/asm_amd64.s b/src/runtime/asm_amd64.s
index 39d7c78f2..1aa2d71a8 100644
--- a/src/runtime/asm_amd64.s
+++ b/src/runtime/asm_amd64.s
@@ -461,11 +461,11 @@ TEXT runtime·cas64(SB), NOSPLIT, $0-25
MOVQ new+16(FP), CX
LOCK
CMPXCHGQ CX, 0(BX)
- JNZ cas64_fail
+ JNZ fail
MOVL $1, AX
MOVB AX, ret+24(FP)
RET
-cas64_fail:
+fail:
MOVL $0, AX
MOVB AX, ret+24(FP)
RET
@@ -489,7 +489,7 @@ TEXT runtime·atomicstoreuintptr(SB), NOSPLIT, $0-16
// return 1;
// } else
// return 0;
-TEXT runtime·casp(SB), NOSPLIT, $0-25
+TEXT runtime·casp1(SB), NOSPLIT, $0-25
MOVQ ptr+0(FP), BX
MOVQ old+8(FP), AX
MOVQ new+16(FP), CX
@@ -541,7 +541,7 @@ TEXT runtime·xchg64(SB), NOSPLIT, $0-24
MOVQ AX, ret+16(FP)
RET
-TEXT runtime·xchgp(SB), NOSPLIT, $0-24
+TEXT runtime·xchgp1(SB), NOSPLIT, $0-24
MOVQ ptr+0(FP), BX
MOVQ new+8(FP), AX
XCHGQ AX, 0(BX)
@@ -559,7 +559,7 @@ again:
JNZ again
RET
-TEXT runtime·atomicstorep(SB), NOSPLIT, $0-16
+TEXT runtime·atomicstorep1(SB), NOSPLIT, $0-16
MOVQ ptr+0(FP), BX
MOVQ val+8(FP), AX
XCHGQ AX, 0(BX)
@@ -890,24 +890,24 @@ TEXT runtime·aeshashbody(SB),NOSPLIT,$0-32
MOVO runtime·aeskeysched+0(SB), X2
MOVO runtime·aeskeysched+16(SB), X3
CMPQ CX, $16
- JB aessmall
-aesloop:
+ JB small
+loop:
CMPQ CX, $16
- JBE aesloopend
+ JBE loopend
MOVOU (AX), X1
AESENC X2, X0
AESENC X1, X0
SUBQ $16, CX
ADDQ $16, AX
- JMP aesloop
+ JMP loop
// 1-16 bytes remaining
-aesloopend:
+loopend:
// This load may overlap with the previous load above.
// We'll hash some bytes twice, but that's ok.
MOVOU -16(AX)(CX*1), X1
JMP partial
// 0-15 bytes
-aessmall:
+small:
TESTQ CX, CX
JE finalize // 0 bytes
@@ -1050,18 +1050,18 @@ TEXT runtime·eqstring(SB),NOSPLIT,$0-33
MOVQ s1len+8(FP), AX
MOVQ s2len+24(FP), BX
CMPQ AX, BX
- JNE different
+ JNE noteq
MOVQ s1str+0(FP), SI
MOVQ s2str+16(FP), DI
CMPQ SI, DI
- JEQ same
+ JEQ eq
CALL runtime·memeqbody(SB)
MOVB AX, v+32(FP)
RET
-same:
+eq:
MOVB $1, v+32(FP)
RET
-different:
+noteq:
MOVB $0, v+32(FP)
RET
@@ -1184,29 +1184,29 @@ TEXT runtime·cmpbytes(SB),NOSPLIT,$0-56
// AX = 1/0/-1
TEXT runtime·cmpbody(SB),NOSPLIT,$0-0
CMPQ SI, DI
- JEQ cmp_allsame
+ JEQ allsame
CMPQ BX, DX
MOVQ DX, BP
CMOVQLT BX, BP // BP = min(alen, blen) = # of bytes to compare
CMPQ BP, $8
- JB cmp_small
+ JB small
-cmp_loop:
+loop:
CMPQ BP, $16
- JBE cmp_0through16
+ JBE _0through16
MOVOU (SI), X0
MOVOU (DI), X1
PCMPEQB X0, X1
PMOVMSKB X1, AX
XORQ $0xffff, AX // convert EQ to NE
- JNE cmp_diff16 // branch if at least one byte is not equal
+ JNE diff16 // branch if at least one byte is not equal
ADDQ $16, SI
ADDQ $16, DI
SUBQ $16, BP
- JMP cmp_loop
+ JMP loop
// AX = bit mask of differences
-cmp_diff16:
+diff16:
BSFQ AX, BX // index of first byte that differs
XORQ AX, AX
MOVB (SI)(BX*1), CX
@@ -1216,21 +1216,21 @@ cmp_diff16:
RET
// 0 through 16 bytes left, alen>=8, blen>=8
-cmp_0through16:
+_0through16:
CMPQ BP, $8
- JBE cmp_0through8
+ JBE _0through8
MOVQ (SI), AX
MOVQ (DI), CX
CMPQ AX, CX
- JNE cmp_diff8
-cmp_0through8:
+ JNE diff8
+_0through8:
MOVQ -8(SI)(BP*1), AX
MOVQ -8(DI)(BP*1), CX
CMPQ AX, CX
- JEQ cmp_allsame
+ JEQ allsame
// AX and CX contain parts of a and b that differ.
-cmp_diff8:
+diff8:
BSWAPQ AX // reverse order of bytes
BSWAPQ CX
XORQ AX, CX
@@ -1241,44 +1241,44 @@ cmp_diff8:
RET
// 0-7 bytes in common
-cmp_small:
+small:
LEAQ (BP*8), CX // bytes left -> bits left
NEGQ CX // - bits lift (== 64 - bits left mod 64)
- JEQ cmp_allsame
+ JEQ allsame
// load bytes of a into high bytes of AX
CMPB SI, $0xf8
- JA cmp_si_high
+ JA si_high
MOVQ (SI), SI
- JMP cmp_si_finish
-cmp_si_high:
+ JMP si_finish
+si_high:
MOVQ -8(SI)(BP*1), SI
SHRQ CX, SI
-cmp_si_finish:
+si_finish:
SHLQ CX, SI
// load bytes of b in to high bytes of BX
CMPB DI, $0xf8
- JA cmp_di_high
+ JA di_high
MOVQ (DI), DI
- JMP cmp_di_finish
-cmp_di_high:
+ JMP di_finish
+di_high:
MOVQ -8(DI)(BP*1), DI
SHRQ CX, DI
-cmp_di_finish:
+di_finish:
SHLQ CX, DI
BSWAPQ SI // reverse order of bytes
BSWAPQ DI
XORQ SI, DI // find bit differences
- JEQ cmp_allsame
+ JEQ allsame
BSRQ DI, CX // index of highest bit difference
SHRQ CX, SI // move a's bit to bottom
ANDQ $1, SI // mask bit
LEAQ -1(SI*2), AX // 1/0 => +1/-1
RET
-cmp_allsame:
+allsame:
XORQ AX, AX
XORQ CX, CX
CMPQ BX, DX
@@ -1313,7 +1313,7 @@ TEXT runtime·indexbytebody(SB),NOSPLIT,$0
MOVQ SI, DI
CMPQ BX, $16
- JLT indexbyte_small
+ JLT small
// round up to first 16-byte boundary
TESTQ $15, SI
@@ -1371,7 +1371,7 @@ failure:
RET
// handle for lengths < 16
-indexbyte_small:
+small:
MOVQ BX, CX
REPN; SCASB
JZ success
diff --git a/src/runtime/asm_amd64p32.s b/src/runtime/asm_amd64p32.s
index a1116b5d4..153564b14 100644
--- a/src/runtime/asm_amd64p32.s
+++ b/src/runtime/asm_amd64p32.s
@@ -444,11 +444,11 @@ TEXT runtime·cas64(SB), NOSPLIT, $0-25
MOVQ new+16(FP), CX
LOCK
CMPXCHGQ CX, 0(BX)
- JNZ cas64_fail
+ JNZ fail
MOVL $1, AX
MOVB AX, ret+24(FP)
RET
-cas64_fail:
+fail:
MOVL $0, AX
MOVB AX, ret+24(FP)
RET
@@ -460,7 +460,7 @@ cas64_fail:
// return 1;
// } else
// return 0;
-TEXT runtime·casp(SB), NOSPLIT, $0-17
+TEXT runtime·casp1(SB), NOSPLIT, $0-17
MOVL ptr+0(FP), BX
MOVL old+4(FP), AX
MOVL new+8(FP), CX
@@ -512,7 +512,7 @@ TEXT runtime·xchg64(SB), NOSPLIT, $0-24
MOVQ AX, ret+16(FP)
RET
-TEXT runtime·xchgp(SB), NOSPLIT, $0-12
+TEXT runtime·xchgp1(SB), NOSPLIT, $0-12
MOVL ptr+0(FP), BX
MOVL new+4(FP), AX
XCHGL AX, 0(BX)
@@ -530,7 +530,7 @@ again:
JNZ again
RET
-TEXT runtime·atomicstorep(SB), NOSPLIT, $0-8
+TEXT runtime·atomicstorep1(SB), NOSPLIT, $0-8
MOVL ptr+0(FP), BX
MOVL val+4(FP), AX
XCHGL AX, 0(BX)
@@ -834,29 +834,29 @@ TEXT runtime·cmpbytes(SB),NOSPLIT,$0-28
// AX = 1/0/-1
TEXT runtime·cmpbody(SB),NOSPLIT,$0-0
CMPQ SI, DI
- JEQ cmp_allsame
+ JEQ allsame
CMPQ BX, DX
MOVQ DX, R8
CMOVQLT BX, R8 // R8 = min(alen, blen) = # of bytes to compare
CMPQ R8, $8
- JB cmp_small
+ JB small
-cmp_loop:
+loop:
CMPQ R8, $16
- JBE cmp_0through16
+ JBE _0through16
MOVOU (SI), X0
MOVOU (DI), X1
PCMPEQB X0, X1
PMOVMSKB X1, AX
XORQ $0xffff, AX // convert EQ to NE
- JNE cmp_diff16 // branch if at least one byte is not equal
+ JNE diff16 // branch if at least one byte is not equal
ADDQ $16, SI
ADDQ $16, DI
SUBQ $16, R8
- JMP cmp_loop
+ JMP loop
// AX = bit mask of differences
-cmp_diff16:
+diff16:
BSFQ AX, BX // index of first byte that differs
XORQ AX, AX
ADDQ BX, SI
@@ -868,23 +868,23 @@ cmp_diff16:
RET
// 0 through 16 bytes left, alen>=8, blen>=8
-cmp_0through16:
+_0through16:
CMPQ R8, $8
- JBE cmp_0through8
+ JBE _0through8
MOVQ (SI), AX
MOVQ (DI), CX
CMPQ AX, CX
- JNE cmp_diff8
-cmp_0through8:
+ JNE diff8
+_0through8:
ADDQ R8, SI
ADDQ R8, DI
MOVQ -8(SI), AX
MOVQ -8(DI), CX
CMPQ AX, CX
- JEQ cmp_allsame
+ JEQ allsame
// AX and CX contain parts of a and b that differ.
-cmp_diff8:
+diff8:
BSWAPQ AX // reverse order of bytes
BSWAPQ CX
XORQ AX, CX
@@ -895,46 +895,46 @@ cmp_diff8:
RET
// 0-7 bytes in common
-cmp_small:
+small:
LEAQ (R8*8), CX // bytes left -> bits left
NEGQ CX // - bits lift (== 64 - bits left mod 64)
- JEQ cmp_allsame
+ JEQ allsame
// load bytes of a into high bytes of AX
CMPB SI, $0xf8
- JA cmp_si_high
+ JA si_high
MOVQ (SI), SI
- JMP cmp_si_finish
-cmp_si_high:
+ JMP si_finish
+si_high:
ADDQ R8, SI
MOVQ -8(SI), SI
SHRQ CX, SI
-cmp_si_finish:
+si_finish:
SHLQ CX, SI
// load bytes of b in to high bytes of BX
CMPB DI, $0xf8
- JA cmp_di_high
+ JA di_high
MOVQ (DI), DI
- JMP cmp_di_finish
-cmp_di_high:
+ JMP di_finish
+di_high:
ADDQ R8, DI
MOVQ -8(DI), DI
SHRQ CX, DI
-cmp_di_finish:
+di_finish:
SHLQ CX, DI
BSWAPQ SI // reverse order of bytes
BSWAPQ DI
XORQ SI, DI // find bit differences
- JEQ cmp_allsame
+ JEQ allsame
BSRQ DI, CX // index of highest bit difference
SHRQ CX, SI // move a's bit to bottom
ANDQ $1, SI // mask bit
LEAQ -1(SI*2), AX // 1/0 => +1/-1
RET
-cmp_allsame:
+allsame:
XORQ AX, AX
XORQ CX, CX
CMPQ BX, DX
@@ -969,7 +969,7 @@ TEXT runtime·indexbytebody(SB),NOSPLIT,$0
MOVL SI, DI
CMPL BX, $16
- JLT indexbyte_small
+ JLT small
// round up to first 16-byte boundary
TESTL $15, SI
@@ -1027,7 +1027,7 @@ failure:
RET
// handle for lengths < 16
-indexbyte_small:
+small:
MOVL BX, CX
REPN; SCASB
JZ success
diff --git a/src/runtime/asm_arm.s b/src/runtime/asm_arm.s
index 0f3b5eeb8..58aebf388 100644
--- a/src/runtime/asm_arm.s
+++ b/src/runtime/asm_arm.s
@@ -492,7 +492,7 @@ TEXT asmcgocall<>(SB),NOSPLIT,$0-0
MOVW g_m(g), R8
MOVW m_g0(R8), R3
CMP R3, g
- BEQ asmcgocall_g0
+ BEQ g0
BL gosave<>(SB)
MOVW R0, R5
MOVW R3, R0
@@ -501,7 +501,7 @@ TEXT asmcgocall<>(SB),NOSPLIT,$0-0
MOVW (g_sched+gobuf_sp)(g), R13
// Now on a scheduling stack (a pthread-created stack).
-asmcgocall_g0:
+g0:
SUB $24, R13
BIC $0x7, R13 // alignment for gcc ABI
MOVW R4, 20(R13) // save old g
@@ -751,13 +751,13 @@ TEXT runtime·memeq(SB),NOSPLIT,$-4-13
ADD R1, R3, R6
MOVW $1, R0
MOVB R0, ret+12(FP)
-_next2:
+loop:
CMP R1, R6
RET.EQ
MOVBU.P 1(R1), R4
MOVBU.P 1(R2), R5
CMP R4, R5
- BEQ _next2
+ BEQ loop
MOVW $0, R0
MOVB R0, ret+12(FP)
@@ -780,13 +780,13 @@ TEXT runtime·eqstring(SB),NOSPLIT,$-4-17
CMP R2, R3
RET.EQ
ADD R2, R0, R6
-_eqnext:
+loop:
CMP R2, R6
RET.EQ
MOVBU.P 1(R2), R4
MOVBU.P 1(R3), R5
CMP R4, R5
- BEQ _eqnext
+ BEQ loop
MOVB R7, v+16(FP)
RET
@@ -801,26 +801,26 @@ TEXT bytes·Equal(SB),NOSPLIT,$0
MOVW b_len+16(FP), R3
CMP R1, R3 // unequal lengths are not equal
- B.NE _notequal
+ B.NE notequal
MOVW a+0(FP), R0
MOVW b+12(FP), R2
ADD R0, R1 // end
-_byteseq_next:
+loop:
CMP R0, R1
- B.EQ _equal // reached the end
+ B.EQ equal // reached the end
MOVBU.P 1(R0), R4
MOVBU.P 1(R2), R5
CMP R4, R5
- B.EQ _byteseq_next
+ B.EQ loop
-_notequal:
+notequal:
MOVW $0, R0
MOVBU R0, ret+24(FP)
RET
-_equal:
+equal:
MOVW $1, R0
MOVBU R0, ret+24(FP)
RET
diff --git a/src/runtime/asm_power64x.s b/src/runtime/asm_power64x.s
new file mode 100644
index 000000000..fd0c6be16
--- /dev/null
+++ b/src/runtime/asm_power64x.s
@@ -0,0 +1,981 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build power64 power64le
+
+#include "zasm_GOOS_GOARCH.h"
+#include "funcdata.h"
+#include "textflag.h"
+
+TEXT runtime·rt0_go(SB),NOSPLIT,$0
+ // initialize essential registers
+ BL runtime·reginit(SB)
+
+ SUB $24, R1
+ MOVW R3, 8(R1) // argc
+ MOVD R4, 16(R1) // argv
+
+ // create istack out of the given (operating system) stack.
+ // _cgo_init may update stackguard.
+ MOVD $runtime·g0(SB), g
+ MOVD $(-64*1024), R31
+ ADD R31, R1, R3
+ MOVD R3, g_stackguard0(g)
+ MOVD R3, g_stackguard1(g)
+ MOVD R3, (g_stack+stack_lo)(g)
+ MOVD R1, (g_stack+stack_hi)(g)
+
+ // TODO: if there is a _cgo_init, call it.
+ // TODO: add TLS
+
+ // set the per-goroutine and per-mach "registers"
+ MOVD $runtime·m0(SB), R3
+
+ // save m->g0 = g0
+ MOVD g, m_g0(R3)
+ // save m0 to g0->m
+ MOVD R3, g_m(g)
+
+ BL runtime·check(SB)
+
+ // args are already prepared
+ BL runtime·args(SB)
+ BL runtime·osinit(SB)
+ BL runtime·schedinit(SB)
+
+ // create a new goroutine to start program
+ MOVD $runtime·main·f(SB), R3 // entry
+ MOVDU R3, -8(R1)
+ MOVDU R0, -8(R1)
+ MOVDU R0, -8(R1)
+ BL runtime·newproc(SB)
+ ADD $24, R1
+
+ // start this M
+ BL runtime·mstart(SB)
+
+ MOVD R0, 1(R0)
+ RETURN
+
+DATA runtime·main·f+0(SB)/8,$runtime·main(SB)
+GLOBL runtime·main·f(SB),RODATA,$8
+
+TEXT runtime·breakpoint(SB),NOSPLIT,$-8-0
+ MOVD R0, 2(R0) // TODO: TD
+ RETURN
+
+TEXT runtime·asminit(SB),NOSPLIT,$-8-0
+ RETURN
+
+TEXT runtime·reginit(SB),NOSPLIT,$-8-0
+ // set R0 to zero, it's expected by the toolchain
+ XOR R0, R0
+ // initialize essential FP registers
+ FMOVD $4503601774854144.0, F27
+ FMOVD $0.5, F29
+ FSUB F29, F29, F28
+ FADD F29, F29, F30
+ FADD F30, F30, F31
+ RETURN
+
+/*
+ * go-routine
+ */
+
+// void gosave(Gobuf*)
+// save state in Gobuf; setjmp
+TEXT runtime·gosave(SB), NOSPLIT, $-8-8
+ MOVD gobuf+0(FP), R3
+ MOVD R1, gobuf_sp(R3)
+ MOVD LR, R31
+ MOVD R31, gobuf_pc(R3)
+ MOVD g, gobuf_g(R3)
+ MOVD R0, gobuf_lr(R3)
+ MOVD R0, gobuf_ret(R3)
+ MOVD R0, gobuf_ctxt(R3)
+ RETURN
+
+// void gogo(Gobuf*)
+// restore state from Gobuf; longjmp
+TEXT runtime·gogo(SB), NOSPLIT, $-8-8
+ MOVD gobuf+0(FP), R5
+ MOVD gobuf_g(R5), g // make sure g is not nil
+ MOVD 0(g), R4
+ MOVD gobuf_sp(R5), R1
+ MOVD gobuf_lr(R5), R31
+ MOVD R31, LR
+ MOVD gobuf_ret(R5), R3
+ MOVD gobuf_ctxt(R5), R11
+ MOVD R0, gobuf_sp(R5)
+ MOVD R0, gobuf_ret(R5)
+ MOVD R0, gobuf_lr(R5)
+ MOVD R0, gobuf_ctxt(R5)
+ CMP R0, R0 // set condition codes for == test, needed by stack split
+ MOVD gobuf_pc(R5), R31
+ MOVD R31, CTR
+ BR (CTR)
+
+// void mcall(fn func(*g))
+// Switch to m->g0's stack, call fn(g).
+// Fn must never return. It should gogo(&g->sched)
+// to keep running g.
+TEXT runtime·mcall(SB), NOSPLIT, $-8-8
+ // Save caller state in g->sched
+ MOVD R1, (g_sched+gobuf_sp)(g)
+ MOVD LR, R31
+ MOVD R31, (g_sched+gobuf_pc)(g)
+ MOVD R0, (g_sched+gobuf_lr)(g)
+ MOVD g, (g_sched+gobuf_g)(g)
+
+ // Switch to m->g0 & its stack, call fn.
+ MOVD g, R3
+ MOVD g_m(g), R8
+ MOVD m_g0(R8), g
+ CMP g, R3
+ BNE 2(PC)
+ BR runtime·badmcall(SB)
+ MOVD fn+0(FP), R11 // context
+ MOVD 0(R11), R4 // code pointer
+ MOVD R4, CTR
+ MOVD (g_sched+gobuf_sp)(g), R1 // sp = m->g0->sched.sp
+ MOVDU R3, -8(R1)
+ MOVDU R0, -8(R1)
+ BL (CTR)
+ BR runtime·badmcall2(SB)
+
+// switchtoM is a dummy routine that onM leaves at the bottom
+// of the G stack. We need to distinguish the routine that
+// lives at the bottom of the G stack from the one that lives
+// at the top of the M stack because the one at the top of
+// the M stack terminates the stack walk (see topofstack()).
+TEXT runtime·switchtoM(SB), NOSPLIT, $0-0
+ UNDEF
+ BL (LR) // make sure this function is not leaf
+ RETURN
+
+// func onM_signalok(fn func())
+TEXT runtime·onM_signalok(SB), NOSPLIT, $8-8
+ MOVD g, R3 // R3 = g
+ MOVD g_m(R3), R4 // R4 = g->m
+ MOVD m_gsignal(R4), R4 // R4 = g->m->gsignal
+ MOVD fn+0(FP), R11 // context for call below
+ CMP R3, R4
+ BEQ onsignal
+ MOVD R11, 8(R1)
+ BL runtime·onM(SB)
+ RETURN
+
+onsignal:
+ MOVD 0(R11), R3 // code pointer
+ MOVD R3, CTR
+ BL (CTR)
+ RETURN
+
+// void onM(fn func())
+TEXT runtime·onM(SB), NOSPLIT, $0-8
+ MOVD fn+0(FP), R3 // R3 = fn
+ MOVD R3, R11 // context
+ MOVD g_m(g), R4 // R4 = m
+
+ MOVD m_g0(R4), R5 // R5 = g0
+ CMP g, R5
+ BEQ onm
+
+ MOVD m_curg(R4), R6
+ CMP g, R6
+ BEQ oncurg
+
+ // Not g0, not curg. Must be gsignal, but that's not allowed.
+ // Hide call from linker nosplit analysis.
+ MOVD $runtime·badonm(SB), R3
+ MOVD R3, CTR
+ BL (CTR)
+
+oncurg:
+ // save our state in g->sched. Pretend to
+ // be switchtoM if the G stack is scanned.
+ MOVD $runtime·switchtoM(SB), R6
+ ADD $8, R6 // get past prologue
+ MOVD R6, (g_sched+gobuf_pc)(g)
+ MOVD R1, (g_sched+gobuf_sp)(g)
+ MOVD R0, (g_sched+gobuf_lr)(g)
+ MOVD g, (g_sched+gobuf_g)(g)
+
+ // switch to g0
+ MOVD R5, g
+ MOVD (g_sched+gobuf_sp)(g), R3
+ // make it look like mstart called onM on g0, to stop traceback
+ SUB $8, R3
+ MOVD $runtime·mstart(SB), R4
+ MOVD R4, 0(R3)
+ MOVD R3, R1
+
+ // call target function
+ MOVD 0(R11), R3 // code pointer
+ MOVD R3, CTR
+ BL (CTR)
+
+ // switch back to g
+ MOVD g_m(g), R3
+ MOVD m_curg(R3), g
+ MOVD (g_sched+gobuf_sp)(g), R1
+ MOVD R0, (g_sched+gobuf_sp)(g)
+ RETURN
+
+onm:
+ // already on m stack, just call directly
+ MOVD 0(R11), R3 // code pointer
+ MOVD R3, CTR
+ BL (CTR)
+ RETURN
+
+/*
+ * support for morestack
+ */
+
+// Called during function prolog when more stack is needed.
+// Caller has already loaded:
+// R3: framesize, R4: argsize, R5: LR
+//
+// The traceback routines see morestack on a g0 as being
+// the top of a stack (for example, morestack calling newstack
+// calling the scheduler calling newm calling gc), so we must
+// record an argument size. For that purpose, it has no arguments.
+TEXT runtime·morestack(SB),NOSPLIT,$-8-0
+ // Cannot grow scheduler stack (m->g0).
+ MOVD g_m(g), R7
+ MOVD m_g0(R7), R8
+ CMP g, R8
+ BNE 2(PC)
+ BL runtime·abort(SB)
+
+ // Cannot grow signal stack (m->gsignal).
+ MOVD m_gsignal(R7), R8
+ CMP g, R8
+ BNE 2(PC)
+ BL runtime·abort(SB)
+
+ // Called from f.
+ // Set g->sched to context in f.
+ MOVD R11, (g_sched+gobuf_ctxt)(g)
+ MOVD R1, (g_sched+gobuf_sp)(g)
+ MOVD LR, R8
+ MOVD R8, (g_sched+gobuf_pc)(g)
+ MOVD R5, (g_sched+gobuf_lr)(g)
+
+ // Called from f.
+ // Set m->morebuf to f's caller.
+ MOVD R5, (m_morebuf+gobuf_pc)(R7) // f's caller's PC
+ MOVD R1, (m_morebuf+gobuf_sp)(R7) // f's caller's SP
+ MOVD g, (m_morebuf+gobuf_g)(R7)
+
+ // Call newstack on m->g0's stack.
+ MOVD m_g0(R7), g
+ MOVD (g_sched+gobuf_sp)(g), R1
+ BL runtime·newstack(SB)
+
+ // Not reached, but make sure the return PC from the call to newstack
+ // is still in this function, and not the beginning of the next.
+ UNDEF
+
+TEXT runtime·morestack_noctxt(SB),NOSPLIT,$-8-0
+ MOVD R0, R11
+ BR runtime·morestack(SB)
+
+// reflectcall: call a function with the given argument list
+// func call(f *FuncVal, arg *byte, argsize, retoffset uint32).
+// we don't have variable-sized frames, so we use a small number
+// of constant-sized-frame functions to encode a few bits of size in the pc.
+// Caution: ugly multiline assembly macros in your future!
+
+#define DISPATCH(NAME,MAXSIZE) \
+ MOVD $MAXSIZE, R31; \
+ CMP R3, R31; \
+ BGT 4(PC); \
+ MOVD $NAME(SB), R31; \
+ MOVD R31, CTR; \
+ BR (CTR)
+// Note: can't just "BR NAME(SB)" - bad inlining results.
+
+TEXT ·reflectcall(SB), NOSPLIT, $-8-24
+ MOVW argsize+16(FP), R3
+ DISPATCH(runtime·call16, 16)
+ DISPATCH(runtime·call32, 32)
+ DISPATCH(runtime·call64, 64)
+ DISPATCH(runtime·call128, 128)
+ DISPATCH(runtime·call256, 256)
+ DISPATCH(runtime·call512, 512)
+ DISPATCH(runtime·call1024, 1024)
+ DISPATCH(runtime·call2048, 2048)
+ DISPATCH(runtime·call4096, 4096)
+ DISPATCH(runtime·call8192, 8192)
+ DISPATCH(runtime·call16384, 16384)
+ DISPATCH(runtime·call32768, 32768)
+ DISPATCH(runtime·call65536, 65536)
+ DISPATCH(runtime·call131072, 131072)
+ DISPATCH(runtime·call262144, 262144)
+ DISPATCH(runtime·call524288, 524288)
+ DISPATCH(runtime·call1048576, 1048576)
+ DISPATCH(runtime·call2097152, 2097152)
+ DISPATCH(runtime·call4194304, 4194304)
+ DISPATCH(runtime·call8388608, 8388608)
+ DISPATCH(runtime·call16777216, 16777216)
+ DISPATCH(runtime·call33554432, 33554432)
+ DISPATCH(runtime·call67108864, 67108864)
+ DISPATCH(runtime·call134217728, 134217728)
+ DISPATCH(runtime·call268435456, 268435456)
+ DISPATCH(runtime·call536870912, 536870912)
+ DISPATCH(runtime·call1073741824, 1073741824)
+ MOVD $runtime·badreflectcall(SB), R31
+ MOVD R31, CTR
+ BR (CTR)
+
+#define CALLFN(NAME,MAXSIZE) \
+TEXT NAME(SB), WRAPPER, $MAXSIZE-24; \
+ NO_LOCAL_POINTERS; \
+ /* copy arguments to stack */ \
+ MOVD argptr+8(FP), R3; \
+ MOVW argsize+16(FP), R4; \
+ MOVD R1, R5; \
+ ADD $(8-1), R5; \
+ SUB $1, R3; \
+ ADD R5, R4; \
+ CMP R5, R4; \
+ BEQ 4(PC); \
+ MOVBZU 1(R3), R6; \
+ MOVBZU R6, 1(R5); \
+ BR -4(PC); \
+ /* call function */ \
+ MOVD f+0(FP), R11; \
+ MOVD (R11), R31; \
+ MOVD R31, CTR; \
+ PCDATA $PCDATA_StackMapIndex, $0; \
+ BL (CTR); \
+ /* copy return values back */ \
+ MOVD argptr+8(FP), R3; \
+ MOVW argsize+16(FP), R4; \
+ MOVW retoffset+20(FP), R6; \
+ MOVD R1, R5; \
+ ADD R6, R5; \
+ ADD R6, R3; \
+ SUB R6, R4; \
+ ADD $(8-1), R5; \
+ SUB $1, R3; \
+ ADD R5, R4; \
+ CMP R5, R4; \
+ BEQ 4(PC); \
+ MOVBZU 1(R5), R6; \
+ MOVBZU R6, 1(R3); \
+ BR -4(PC); \
+ RETURN
+
+CALLFN(·call16, 16)
+CALLFN(·call32, 32)
+CALLFN(·call64, 64)
+CALLFN(·call128, 128)
+CALLFN(·call256, 256)
+CALLFN(·call512, 512)
+CALLFN(·call1024, 1024)
+CALLFN(·call2048, 2048)
+CALLFN(·call4096, 4096)
+CALLFN(·call8192, 8192)
+CALLFN(·call16384, 16384)
+CALLFN(·call32768, 32768)
+CALLFN(·call65536, 65536)
+CALLFN(·call131072, 131072)
+CALLFN(·call262144, 262144)
+CALLFN(·call524288, 524288)
+CALLFN(·call1048576, 1048576)
+CALLFN(·call2097152, 2097152)
+CALLFN(·call4194304, 4194304)
+CALLFN(·call8388608, 8388608)
+CALLFN(·call16777216, 16777216)
+CALLFN(·call33554432, 33554432)
+CALLFN(·call67108864, 67108864)
+CALLFN(·call134217728, 134217728)
+CALLFN(·call268435456, 268435456)
+CALLFN(·call536870912, 536870912)
+CALLFN(·call1073741824, 1073741824)
+
+// bool cas(int32 *val, int32 old, int32 new)
+// Atomically:
+// if(*val == old){
+// *val = new;
+// return 1;
+// } else
+// return 0;
+TEXT runtime·cas(SB), NOSPLIT, $0-17
+ MOVD p+0(FP), R3
+ MOVW old+8(FP), R4
+ MOVW new+12(FP), R5
+cas_again:
+ SYNC
+ LWAR (R3), R6
+ CMPW R6, R4
+ BNE cas_fail
+ STWCCC R5, (R3)
+ BNE cas_again
+ MOVD $1, R3
+ SYNC
+ ISYNC
+ MOVB R3, ret+16(FP)
+ RETURN
+cas_fail:
+ MOVD $0, R3
+ BR -5(PC)
+
+// bool runtime·cas64(uint64 *val, uint64 old, uint64 new)
+// Atomically:
+// if(*val == *old){
+// *val = new;
+// return 1;
+// } else {
+// return 0;
+// }
+TEXT runtime·cas64(SB), NOSPLIT, $0-25
+ MOVD p+0(FP), R3
+ MOVD old+8(FP), R4
+ MOVD new+16(FP), R5
+cas64_again:
+ SYNC
+ LDAR (R3), R6
+ CMP R6, R4
+ BNE cas64_fail
+ STDCCC R5, (R3)
+ BNE cas64_again
+ MOVD $1, R3
+ SYNC
+ ISYNC
+ MOVB R3, ret+24(FP)
+ RETURN
+cas64_fail:
+ MOVD $0, R3
+ BR -5(PC)
+
+TEXT runtime·casuintptr(SB), NOSPLIT, $0-25
+ BR runtime·cas64(SB)
+
+TEXT runtime·atomicloaduintptr(SB), NOSPLIT, $-8-16
+ BR runtime·atomicload64(SB)
+
+TEXT runtime·atomicloaduint(SB), NOSPLIT, $-8-16
+ BR runtime·atomicload64(SB)
+
+TEXT runtime·atomicstoreuintptr(SB), NOSPLIT, $0-16
+ BR runtime·atomicstore64(SB)
+
+// bool casp(void **val, void *old, void *new)
+// Atomically:
+// if(*val == old){
+// *val = new;
+// return 1;
+// } else
+// return 0;
+TEXT runtime·casp1(SB), NOSPLIT, $0-25
+ BR runtime·cas64(SB)
+
+// uint32 xadd(uint32 volatile *val, int32 delta)
+// Atomically:
+// *val += delta;
+// return *val;
+TEXT runtime·xadd(SB), NOSPLIT, $0-20
+ MOVD p+0(FP), R4
+ MOVW delta+8(FP), R5
+ SYNC
+ LWAR (R4), R3
+ ADD R5, R3
+ STWCCC R3, (R4)
+ BNE -4(PC)
+ SYNC
+ ISYNC
+ MOVW R3, ret+16(FP)
+ RETURN
+
+TEXT runtime·xadd64(SB), NOSPLIT, $0-24
+ MOVD p+0(FP), R4
+ MOVD delta+8(FP), R5
+ SYNC
+ LDAR (R4), R3
+ ADD R5, R3
+ STDCCC R3, (R4)
+ BNE -4(PC)
+ SYNC
+ ISYNC
+ MOVD R3, ret+16(FP)
+ RETURN
+
+TEXT runtime·xchg(SB), NOSPLIT, $0-20
+ MOVD p+0(FP), R4
+ MOVW new+8(FP), R5
+ SYNC
+ LWAR (R4), R3
+ STWCCC R5, (R4)
+ BNE -3(PC)
+ SYNC
+ ISYNC
+ MOVW R3, ret+16(FP)
+ RETURN
+
+TEXT runtime·xchg64(SB), NOSPLIT, $0-24
+ MOVD p+0(FP), R4
+ MOVD new+8(FP), R5
+ SYNC
+ LDAR (R4), R3
+ STDCCC R5, (R4)
+ BNE -3(PC)
+ SYNC
+ ISYNC
+ MOVD R3, ret+16(FP)
+ RETURN
+
+TEXT runtime·xchgp1(SB), NOSPLIT, $0-24
+ BR runtime·xchg64(SB)
+
+TEXT runtime·xchguintptr(SB), NOSPLIT, $0-24
+ BR runtime·xchg64(SB)
+
+TEXT runtime·procyield(SB),NOSPLIT,$0-0
+ RETURN
+
+TEXT runtime·atomicstorep1(SB), NOSPLIT, $0-16
+ BR runtime·atomicstore64(SB)
+
+TEXT runtime·atomicstore(SB), NOSPLIT, $0-12
+ MOVD ptr+0(FP), R3
+ MOVW val+8(FP), R4
+ SYNC
+ MOVW R4, 0(R3)
+ RETURN
+
+TEXT runtime·atomicstore64(SB), NOSPLIT, $0-16
+ MOVD ptr+0(FP), R3
+ MOVD val+8(FP), R4
+ SYNC
+ MOVD R4, 0(R3)
+ RETURN
+
+// void runtime·atomicor8(byte volatile*, byte);
+TEXT runtime·atomicor8(SB), NOSPLIT, $0-9
+ MOVD ptr+0(FP), R3
+ MOVBZ val+8(FP), R4
+ // Align ptr down to 4 bytes so we can use 32-bit load/store.
+ // R5 = (R3 << 0) & ~3
+ RLDCR $0, R3, $~3, R5
+ // Compute val shift.
+#ifdef GOARCH_power64
+ // Big endian. ptr = ptr ^ 3
+ XOR $3, R3
+#endif
+ // R6 = ((ptr & 3) * 8) = (ptr << 3) & (3*8)
+ RLDC $3, R3, $(3*8), R6
+ // Shift val for aligned ptr. R4 = val << R6
+ SLD R6, R4, R4
+
+atomicor8_again:
+ SYNC
+ LWAR (R5), R6
+ OR R4, R6
+ STWCCC R6, (R5)
+ BNE atomicor8_again
+ SYNC
+ ISYNC
+ RETURN
+
+// void jmpdefer(fv, sp);
+// called from deferreturn.
+// 1. grab stored LR for caller
+// 2. sub 4 bytes to get back to BL deferreturn
+// 3. BR to fn
+TEXT runtime·jmpdefer(SB), NOSPLIT, $-8-16
+ MOVD 0(R1), R31
+ SUB $4, R31
+ MOVD R31, LR
+
+ MOVD fv+0(FP), R11
+ MOVD argp+8(FP), R1
+ SUB $8, R1
+ MOVD 0(R11), R3
+ MOVD R3, CTR
+ BR (CTR)
+
+// Save state of caller into g->sched. Smashes R31.
+TEXT gosave<>(SB),NOSPLIT,$-8
+ MOVD LR, R31
+ MOVD R31, (g_sched+gobuf_pc)(g)
+ MOVD R1, (g_sched+gobuf_sp)(g)
+ MOVD R0, (g_sched+gobuf_lr)(g)
+ MOVD R0, (g_sched+gobuf_ret)(g)
+ MOVD R0, (g_sched+gobuf_ctxt)(g)
+ RETURN
+
+// asmcgocall(void(*fn)(void*), void *arg)
+// Call fn(arg) on the scheduler stack,
+// aligned appropriately for the gcc ABI.
+// See cgocall.c for more details.
+TEXT ·asmcgocall(SB),NOSPLIT,$0-16
+ MOVD R0, 21(R0)
+
+// cgocallback(void (*fn)(void*), void *frame, uintptr framesize)
+// Turn the fn into a Go func (by taking its address) and call
+// cgocallback_gofunc.
+TEXT runtime·cgocallback(SB),NOSPLIT,$24-24
+ MOVD R0, 22(R0)
+
+// cgocallback_gofunc(FuncVal*, void *frame, uintptr framesize)
+// See cgocall.c for more details.
+TEXT ·cgocallback_gofunc(SB),NOSPLIT,$8-24
+ MOVD R0, 23(R0)
+
+// void setg(G*); set g. for use by needm.
+TEXT runtime·setg(SB), NOSPLIT, $0-8
+ MOVD R0, 24(R0)
+
+// void setg_gcc(G*); set g called from gcc.
+TEXT setg_gcc<>(SB),NOSPLIT,$0
+ MOVD R0, 25(R0)
+
+TEXT runtime·getcallerpc(SB),NOSPLIT,$-8-16
+ MOVD 0(R1), R3
+ MOVD R3, ret+8(FP)
+ RETURN
+
+TEXT runtime·gogetcallerpc(SB),NOSPLIT,$-8-16
+ MOVD 0(R1), R3
+ MOVD R3,ret+8(FP)
+ RETURN
+
+TEXT runtime·setcallerpc(SB),NOSPLIT,$-8-16
+ MOVD pc+8(FP), R3
+ MOVD R3, 0(R1) // set calling pc
+ RETURN
+
+TEXT runtime·getcallersp(SB),NOSPLIT,$0-16
+ MOVD sp+0(FP), R3
+ SUB $8, R3
+ MOVD R3, ret+8(FP)
+ RETURN
+
+// func gogetcallersp(p unsafe.Pointer) uintptr
+TEXT runtime·gogetcallersp(SB),NOSPLIT,$0-16
+ MOVD sp+0(FP), R3
+ SUB $8, R3
+ MOVD R3,ret+8(FP)
+ RETURN
+
+TEXT runtime·abort(SB),NOSPLIT,$-8-0
+ MOVW (R0), R0
+ UNDEF
+
+#define TBRL 268
+#define TBRU 269 /* Time base Upper/Lower */
+
+// int64 runtime·cputicks(void)
+TEXT runtime·cputicks(SB),NOSPLIT,$0-8
+ MOVW SPR(TBRU), R4
+ MOVW SPR(TBRL), R3
+ MOVW SPR(TBRU), R5
+ CMPW R4, R5
+ BNE -4(PC)
+ SLD $32, R5
+ OR R5, R3
+ MOVD R3, ret+0(FP)
+ RETURN
+
+// AES hashing not implemented for Power
+TEXT runtime·aeshash(SB),NOSPLIT,$-8-0
+ MOVW (R0), R1
+TEXT runtime·aeshash32(SB),NOSPLIT,$-8-0
+ MOVW (R0), R1
+TEXT runtime·aeshash64(SB),NOSPLIT,$-8-0
+ MOVW (R0), R1
+TEXT runtime·aeshashstr(SB),NOSPLIT,$-8-0
+ MOVW (R0), R1
+
+TEXT runtime·memeq(SB),NOSPLIT,$-8-25
+ MOVD a+0(FP), R3
+ MOVD b+8(FP), R4
+ MOVD count+16(FP), R5
+ SUB $1, R3
+ SUB $1, R4
+ ADD R3, R5, R8
+loop:
+ CMP R3, R8
+ BNE 4(PC)
+ MOVD $1, R3
+ MOVB R3, ret+24(FP)
+ RETURN
+ MOVBZU 1(R3), R6
+ MOVBZU 1(R4), R7
+ CMP R6, R7
+ BEQ loop
+
+ MOVB R0, ret+24(FP)
+ RETURN
+
+// eqstring tests whether two strings are equal.
+// See runtime_test.go:eqstring_generic for
+// equivalent Go code.
+TEXT runtime·eqstring(SB),NOSPLIT,$0-33
+ MOVD s1len+8(FP), R4
+ MOVD s2len+24(FP), R5
+ CMP R4, R5
+ BNE noteq
+
+ MOVD s1str+0(FP), R3
+ MOVD s2str+16(FP), R4
+ SUB $1, R3
+ SUB $1, R4
+ ADD R3, R5, R8
+loop:
+ CMP R3, R8
+ BNE 4(PC)
+ MOVD $1, R3
+ MOVB R3, ret+32(FP)
+ RETURN
+ MOVBZU 1(R3), R6
+ MOVBZU 1(R4), R7
+ CMP R6, R7
+ BEQ loop
+noteq:
+ MOVB R0, ret+32(FP)
+ RETURN
+
+// TODO: share code with memeq?
+TEXT bytes·Equal(SB),NOSPLIT,$0-49
+ MOVD a_len+8(FP), R3
+ MOVD b_len+32(FP), R4
+
+ CMP R3, R4 // unequal lengths are not equal
+ BNE noteq
+
+ MOVD a+0(FP), R5
+ MOVD b+24(FP), R6
+ SUB $1, R5
+ SUB $1, R6
+ ADD R5, R3 // end-1
+
+loop:
+ CMP R5, R3
+ BEQ equal // reached the end
+ MOVBZU 1(R5), R4
+ MOVBZU 1(R6), R7
+ CMP R4, R7
+ BEQ loop
+
+noteq:
+ MOVBZ R0, ret+48(FP)
+ RETURN
+
+equal:
+ MOVD $1, R3
+ MOVBZ R3, ret+48(FP)
+ RETURN
+
+TEXT bytes·IndexByte(SB),NOSPLIT,$0-40
+ MOVD s+0(FP), R3
+ MOVD s_len+8(FP), R4
+ MOVBZ c+24(FP), R5 // byte to find
+ MOVD R3, R6 // store base for later
+ SUB $1, R3
+ ADD R3, R4 // end-1
+
+loop:
+ CMP R3, R4
+ BEQ notfound
+ MOVBZU 1(R3), R7
+ CMP R7, R5
+ BNE loop
+
+ SUB R6, R3 // remove base
+ MOVD R3, ret+32(FP)
+ RETURN
+
+notfound:
+ MOVD $-1, R3
+ MOVD R3, ret+32(FP)
+ RETURN
+
+TEXT strings·IndexByte(SB),NOSPLIT,$0
+ MOVD p+0(FP), R3
+ MOVD b_len+8(FP), R4
+ MOVBZ c+16(FP), R5 // byte to find
+ MOVD R3, R6 // store base for later
+ SUB $1, R3
+ ADD R3, R4 // end-1
+
+loop:
+ CMP R3, R4
+ BEQ notfound
+ MOVBZU 1(R3), R7
+ CMP R7, R5
+ BNE loop
+
+ SUB R6, R3 // remove base
+ MOVD R3, ret+24(FP)
+ RETURN
+
+notfound:
+ MOVD $-1, R3
+ MOVD R3, ret+24(FP)
+ RETURN
+
+
+// A Duff's device for zeroing memory.
+// The compiler jumps to computed addresses within
+// this routine to zero chunks of memory. Do not
+// change this code without also changing the code
+// in ../../cmd/9g/ggen.c:/^clearfat.
+// R0: always zero
+// R3 (aka REGRT1): ptr to memory to be zeroed - 8
+// R3 is updated as a side effect.
+TEXT runtime·duffzero(SB), NOSPLIT, $-8-0
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ MOVDU R0, 8(R3)
+ RETURN
+
+TEXT runtime·fastrand1(SB), NOSPLIT, $0-4
+ MOVD g_m(g), R4
+ MOVWZ m_fastrand(R4), R3
+ ADD R3, R3
+ CMP R3, $0
+ BGE 2(PC)
+ XOR $0x88888eef, R3
+ MOVW R3, m_fastrand(R4)
+ MOVW R3, ret+0(FP)
+ RETURN
+
+TEXT runtime·return0(SB), NOSPLIT, $0
+ MOVW $0, R3
+ RETURN
+
+// Called from cgo wrappers, this function returns g->m->curg.stack.hi.
+// Must obey the gcc calling convention.
+TEXT _cgo_topofstack(SB),NOSPLIT,$0
+ MOVD R0, 26(R0)
diff --git a/src/runtime/atomic.go b/src/runtime/atomic.go
index 7e9d9b3aa..a0e4d84e9 100644
--- a/src/runtime/atomic.go
+++ b/src/runtime/atomic.go
@@ -20,8 +20,16 @@ func xchg(ptr *uint32, new uint32) uint32
//go:noescape
func xchg64(ptr *uint64, new uint64) uint64
-//go:noescape
-func xchgp(ptr unsafe.Pointer, new unsafe.Pointer) unsafe.Pointer
+// Cannot use noescape here: ptr does not but new does escape.
+// Instead use noescape(ptr) in wrapper below.
+func xchgp1(ptr unsafe.Pointer, new unsafe.Pointer) unsafe.Pointer
+
+//go:nosplit
+func xchgp(ptr unsafe.Pointer, new unsafe.Pointer) unsafe.Pointer {
+ old := xchgp1(noescape(ptr), new)
+ writebarrierptr_nostore((*uintptr)(ptr), uintptr(new))
+ return old
+}
//go:noescape
func xchguintptr(ptr *uintptr, new uintptr) uintptr
@@ -47,5 +55,27 @@ func atomicstore(ptr *uint32, val uint32)
//go:noescape
func atomicstore64(ptr *uint64, val uint64)
-//go:noescape
-func atomicstorep(ptr unsafe.Pointer, val unsafe.Pointer)
+// Cannot use noescape here: ptr does not but val does escape.
+// Instead use noescape(ptr) in wrapper below.
+func atomicstorep1(ptr unsafe.Pointer, val unsafe.Pointer)
+
+//go:nosplit
+func atomicstorep(ptr unsafe.Pointer, val unsafe.Pointer) {
+ atomicstorep1(noescape(ptr), val)
+ // TODO(rsc): Why does the compiler think writebarrierptr_nostore's dst argument escapes?
+ writebarrierptr_nostore((*uintptr)(noescape(ptr)), uintptr(val))
+}
+
+// Cannot use noescape here: ptr does not but new does escape.
+// Instead use noescape(ptr) in wrapper below.
+func casp1(ptr *unsafe.Pointer, old, new unsafe.Pointer) bool
+
+//go:nosplit
+func casp(ptr *unsafe.Pointer, old, new unsafe.Pointer) bool {
+ ok := casp1((*unsafe.Pointer)(noescape(unsafe.Pointer(ptr))), old, new)
+ if !ok {
+ return false
+ }
+ writebarrierptr_nostore((*uintptr)(unsafe.Pointer(ptr)), uintptr(new))
+ return true
+}
diff --git a/src/runtime/atomic_power64x.s b/src/runtime/atomic_power64x.s
new file mode 100644
index 000000000..e72871761
--- /dev/null
+++ b/src/runtime/atomic_power64x.s
@@ -0,0 +1,40 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build power64 power64le
+
+#include "textflag.h"
+
+// uint32 runtime·atomicload(uint32 volatile* addr)
+TEXT ·atomicload(SB),NOSPLIT,$-8-12
+ MOVD 0(FP), R3
+ SYNC
+ MOVWZ 0(R3), R3
+ CMPW R3, R3, CR7
+ BC 4, 30, 1(PC) // bne- cr7,0x4
+ ISYNC
+ MOVW R3, ret+8(FP)
+ RETURN
+
+// uint64 runtime·atomicload64(uint64 volatile* addr)
+TEXT ·atomicload64(SB),NOSPLIT,$-8-16
+ MOVD 0(FP), R3
+ SYNC
+ MOVD 0(R3), R3
+ CMP R3, R3, CR7
+ BC 4, 30, 1(PC) // bne- cr7,0x4
+ ISYNC
+ MOVD R3, ret+8(FP)
+ RETURN
+
+// void *runtime·atomicloadp(void *volatile *addr)
+TEXT ·atomicloadp(SB),NOSPLIT,$-8-16
+ MOVD 0(FP), R3
+ SYNC
+ MOVD 0(R3), R3
+ CMP R3, R3, CR7
+ BC 4, 30, 1(PC) // bne- cr7,0x4
+ ISYNC
+ MOVD R3, ret+8(FP)
+ RETURN
diff --git a/src/runtime/debug/stubs.s b/src/runtime/debug/stubs.s
index d56274f2d..1e883b72c 100644
--- a/src/runtime/debug/stubs.s
+++ b/src/runtime/debug/stubs.s
@@ -7,6 +7,12 @@
#ifdef GOARCH_arm
#define JMP B
#endif
+#ifdef GOARCH_power64
+#define JMP BR
+#endif
+#ifdef GOARCH_power64le
+#define JMP BR
+#endif
TEXT ·setMaxStack(SB),NOSPLIT,$0-0
JMP runtime·setMaxStack(SB)
diff --git a/src/runtime/defs1_linux.go b/src/runtime/defs1_linux.go
index 392cc4ab5..87c6e02a4 100644
--- a/src/runtime/defs1_linux.go
+++ b/src/runtime/defs1_linux.go
@@ -15,12 +15,14 @@ package runtime
/*
#include <ucontext.h>
#include <fcntl.h>
+#include <asm/signal.h>
*/
import "C"
const (
- O_RDONLY = C.O_RDONLY
- O_CLOEXEC = C.O_CLOEXEC
+ O_RDONLY = C.O_RDONLY
+ O_CLOEXEC = C.O_CLOEXEC
+ SA_RESTORER = C.SA_RESTORER
)
type Usigset C.__sigset_t
diff --git a/src/runtime/defs3_linux.go b/src/runtime/defs3_linux.go
new file mode 100644
index 000000000..3551a4fa9
--- /dev/null
+++ b/src/runtime/defs3_linux.go
@@ -0,0 +1,43 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build ignore
+
+/*
+Input to cgo -cdefs
+
+GOARCH=power64 cgo -cdefs defs_linux.go defs3_linux.go > defs_linux_power64.h
+*/
+
+package runtime
+
+/*
+#define size_t __kernel_size_t
+#define sigset_t __sigset_t // rename the sigset_t here otherwise cgo will complain about "inconsistent definitions for C.sigset_t"
+#define _SYS_TYPES_H // avoid inclusion of sys/types.h
+#include <asm/ucontext.h>
+#include <asm-generic/fcntl.h>
+*/
+import "C"
+
+const (
+ O_RDONLY = C.O_RDONLY
+ O_CLOEXEC = C.O_CLOEXEC
+ SA_RESTORER = 0 // unused
+)
+
+type Usigset C.__sigset_t
+
+// types used in sigcontext
+type Ptregs C.struct_pt_regs
+type Gregset C.elf_gregset_t
+type FPregset C.elf_fpregset_t
+type Vreg C.elf_vrreg_t
+
+type SigaltstackT C.struct_sigaltstack
+
+// PPC64 uses sigcontext in place of mcontext in ucontext.
+// see http://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/arch/powerpc/include/uapi/asm/ucontext.h
+type Sigcontext C.struct_sigcontext
+type Ucontext C.struct_ucontext
diff --git a/src/runtime/defs_linux.go b/src/runtime/defs_linux.go
index 8657dbb0e..553366a50 100644
--- a/src/runtime/defs_linux.go
+++ b/src/runtime/defs_linux.go
@@ -20,6 +20,7 @@ package runtime
// headers for things like ucontext_t, so that happens in
// a separate file, defs1.go.
+#define _SYS_TYPES_H // avoid inclusion of sys/types.h
#include <asm/posix_types.h>
#define size_t __kernel_size_t
#include <asm/signal.h>
@@ -28,7 +29,7 @@ package runtime
#include <asm-generic/errno.h>
#include <asm-generic/poll.h>
#include <linux/eventpoll.h>
-#undef size_t
+#include <linux/time.h>
*/
import "C"
@@ -48,10 +49,9 @@ const (
MADV_DONTNEED = C.MADV_DONTNEED
- SA_RESTART = C.SA_RESTART
- SA_ONSTACK = C.SA_ONSTACK
- SA_RESTORER = C.SA_RESTORER
- SA_SIGINFO = C.SA_SIGINFO
+ SA_RESTART = C.SA_RESTART
+ SA_ONSTACK = C.SA_ONSTACK
+ SA_SIGINFO = C.SA_SIGINFO
SIGHUP = C.SIGHUP
SIGINT = C.SIGINT
@@ -116,6 +116,7 @@ const (
EPOLL_CTL_MOD = C.EPOLL_CTL_MOD
)
+type Sigset C.sigset_t
type Timespec C.struct_timespec
type Timeval C.struct_timeval
type Sigaction C.struct_sigaction
diff --git a/src/runtime/defs_linux_power64.h b/src/runtime/defs_linux_power64.h
new file mode 100644
index 000000000..93742fa34
--- /dev/null
+++ b/src/runtime/defs_linux_power64.h
@@ -0,0 +1,204 @@
+// Created by cgo -cdefs - DO NOT EDIT
+// cgo -cdefs defs_linux.go defs3_linux.go
+
+
+enum {
+ EINTR = 0x4,
+ EAGAIN = 0xb,
+ ENOMEM = 0xc,
+
+ PROT_NONE = 0x0,
+ PROT_READ = 0x1,
+ PROT_WRITE = 0x2,
+ PROT_EXEC = 0x4,
+
+ MAP_ANON = 0x20,
+ MAP_PRIVATE = 0x2,
+ MAP_FIXED = 0x10,
+
+ MADV_DONTNEED = 0x4,
+
+ SA_RESTART = 0x10000000,
+ SA_ONSTACK = 0x8000000,
+ SA_SIGINFO = 0x4,
+
+ SIGHUP = 0x1,
+ SIGINT = 0x2,
+ SIGQUIT = 0x3,
+ SIGILL = 0x4,
+ SIGTRAP = 0x5,
+ SIGABRT = 0x6,
+ SIGBUS = 0x7,
+ SIGFPE = 0x8,
+ SIGKILL = 0x9,
+ SIGUSR1 = 0xa,
+ SIGSEGV = 0xb,
+ SIGUSR2 = 0xc,
+ SIGPIPE = 0xd,
+ SIGALRM = 0xe,
+ SIGSTKFLT = 0x10,
+ SIGCHLD = 0x11,
+ SIGCONT = 0x12,
+ SIGSTOP = 0x13,
+ SIGTSTP = 0x14,
+ SIGTTIN = 0x15,
+ SIGTTOU = 0x16,
+ SIGURG = 0x17,
+ SIGXCPU = 0x18,
+ SIGXFSZ = 0x19,
+ SIGVTALRM = 0x1a,
+ SIGPROF = 0x1b,
+ SIGWINCH = 0x1c,
+ SIGIO = 0x1d,
+ SIGPWR = 0x1e,
+ SIGSYS = 0x1f,
+
+ FPE_INTDIV = 0x1,
+ FPE_INTOVF = 0x2,
+ FPE_FLTDIV = 0x3,
+ FPE_FLTOVF = 0x4,
+ FPE_FLTUND = 0x5,
+ FPE_FLTRES = 0x6,
+ FPE_FLTINV = 0x7,
+ FPE_FLTSUB = 0x8,
+
+ BUS_ADRALN = 0x1,
+ BUS_ADRERR = 0x2,
+ BUS_OBJERR = 0x3,
+
+ SEGV_MAPERR = 0x1,
+ SEGV_ACCERR = 0x2,
+
+ ITIMER_REAL = 0x0,
+ ITIMER_VIRTUAL = 0x1,
+ ITIMER_PROF = 0x2,
+
+ EPOLLIN = 0x1,
+ EPOLLOUT = 0x4,
+ EPOLLERR = 0x8,
+ EPOLLHUP = 0x10,
+ EPOLLRDHUP = 0x2000,
+ EPOLLET = -0x80000000,
+ EPOLL_CLOEXEC = 0x80000,
+ EPOLL_CTL_ADD = 0x1,
+ EPOLL_CTL_DEL = 0x2,
+ EPOLL_CTL_MOD = 0x3,
+};
+
+typedef struct Sigset Sigset;
+typedef struct Timespec Timespec;
+typedef struct Timeval Timeval;
+typedef struct SigactionT SigactionT;
+typedef struct Siginfo Siginfo;
+typedef struct Itimerval Itimerval;
+typedef struct EpollEvent EpollEvent;
+
+#pragma pack on
+
+//struct Sigset {
+// uint64 sig[1];
+//};
+//typedef uint64 Sigset;
+
+struct Timespec {
+ int64 tv_sec;
+ int64 tv_nsec;
+};
+struct Timeval {
+ int64 tv_sec;
+ int64 tv_usec;
+};
+struct SigactionT {
+ void *sa_handler;
+ uint64 sa_flags;
+ void *sa_restorer;
+ uint64 sa_mask;
+};
+struct Siginfo {
+ int32 si_signo;
+ int32 si_errno;
+ int32 si_code;
+ byte Pad_cgo_0[4];
+ byte _sifields[112];
+};
+struct Itimerval {
+ Timeval it_interval;
+ Timeval it_value;
+};
+struct EpollEvent {
+ uint32 events;
+ byte Pad_cgo_0[4];
+ byte data[8]; // unaligned uintptr
+};
+
+
+#pragma pack off
+// Created by cgo -cdefs - DO NOT EDIT
+// cgo -cdefs defs_linux.go defs3_linux.go
+
+
+enum {
+ O_RDONLY = 0x0,
+ O_CLOEXEC = 0x80000,
+ SA_RESTORER = 0,
+};
+
+typedef struct Ptregs Ptregs;
+typedef struct Vreg Vreg;
+typedef struct SigaltstackT SigaltstackT;
+typedef struct Sigcontext Sigcontext;
+typedef struct Ucontext Ucontext;
+
+#pragma pack on
+
+struct Ptregs {
+ uint64 gpr[32];
+ uint64 nip;
+ uint64 msr;
+ uint64 orig_gpr3;
+ uint64 ctr;
+ uint64 link;
+ uint64 xer;
+ uint64 ccr;
+ uint64 softe;
+ uint64 trap;
+ uint64 dar;
+ uint64 dsisr;
+ uint64 result;
+};
+typedef uint64 Gregset[48];
+typedef float64 FPregset[33];
+struct Vreg {
+ uint32 u[4];
+};
+
+struct SigaltstackT {
+ byte *ss_sp;
+ int32 ss_flags;
+ byte Pad_cgo_0[4];
+ uint64 ss_size;
+};
+
+struct Sigcontext {
+ uint64 _unused[4];
+ int32 signal;
+ int32 _pad0;
+ uint64 handler;
+ uint64 oldmask;
+ Ptregs *regs;
+ uint64 gp_regs[48];
+ float64 fp_regs[33];
+ Vreg *v_regs;
+ int64 vmx_reserve[101];
+};
+struct Ucontext {
+ uint64 uc_flags;
+ Ucontext *uc_link;
+ SigaltstackT uc_stack;
+ uint64 uc_sigmask;
+ uint64 __unused[15];
+ Sigcontext uc_mcontext;
+};
+
+
+#pragma pack off
diff --git a/src/runtime/defs_linux_power64le.h b/src/runtime/defs_linux_power64le.h
new file mode 100644
index 000000000..93742fa34
--- /dev/null
+++ b/src/runtime/defs_linux_power64le.h
@@ -0,0 +1,204 @@
+// Created by cgo -cdefs - DO NOT EDIT
+// cgo -cdefs defs_linux.go defs3_linux.go
+
+
+enum {
+ EINTR = 0x4,
+ EAGAIN = 0xb,
+ ENOMEM = 0xc,
+
+ PROT_NONE = 0x0,
+ PROT_READ = 0x1,
+ PROT_WRITE = 0x2,
+ PROT_EXEC = 0x4,
+
+ MAP_ANON = 0x20,
+ MAP_PRIVATE = 0x2,
+ MAP_FIXED = 0x10,
+
+ MADV_DONTNEED = 0x4,
+
+ SA_RESTART = 0x10000000,
+ SA_ONSTACK = 0x8000000,
+ SA_SIGINFO = 0x4,
+
+ SIGHUP = 0x1,
+ SIGINT = 0x2,
+ SIGQUIT = 0x3,
+ SIGILL = 0x4,
+ SIGTRAP = 0x5,
+ SIGABRT = 0x6,
+ SIGBUS = 0x7,
+ SIGFPE = 0x8,
+ SIGKILL = 0x9,
+ SIGUSR1 = 0xa,
+ SIGSEGV = 0xb,
+ SIGUSR2 = 0xc,
+ SIGPIPE = 0xd,
+ SIGALRM = 0xe,
+ SIGSTKFLT = 0x10,
+ SIGCHLD = 0x11,
+ SIGCONT = 0x12,
+ SIGSTOP = 0x13,
+ SIGTSTP = 0x14,
+ SIGTTIN = 0x15,
+ SIGTTOU = 0x16,
+ SIGURG = 0x17,
+ SIGXCPU = 0x18,
+ SIGXFSZ = 0x19,
+ SIGVTALRM = 0x1a,
+ SIGPROF = 0x1b,
+ SIGWINCH = 0x1c,
+ SIGIO = 0x1d,
+ SIGPWR = 0x1e,
+ SIGSYS = 0x1f,
+
+ FPE_INTDIV = 0x1,
+ FPE_INTOVF = 0x2,
+ FPE_FLTDIV = 0x3,
+ FPE_FLTOVF = 0x4,
+ FPE_FLTUND = 0x5,
+ FPE_FLTRES = 0x6,
+ FPE_FLTINV = 0x7,
+ FPE_FLTSUB = 0x8,
+
+ BUS_ADRALN = 0x1,
+ BUS_ADRERR = 0x2,
+ BUS_OBJERR = 0x3,
+
+ SEGV_MAPERR = 0x1,
+ SEGV_ACCERR = 0x2,
+
+ ITIMER_REAL = 0x0,
+ ITIMER_VIRTUAL = 0x1,
+ ITIMER_PROF = 0x2,
+
+ EPOLLIN = 0x1,
+ EPOLLOUT = 0x4,
+ EPOLLERR = 0x8,
+ EPOLLHUP = 0x10,
+ EPOLLRDHUP = 0x2000,
+ EPOLLET = -0x80000000,
+ EPOLL_CLOEXEC = 0x80000,
+ EPOLL_CTL_ADD = 0x1,
+ EPOLL_CTL_DEL = 0x2,
+ EPOLL_CTL_MOD = 0x3,
+};
+
+typedef struct Sigset Sigset;
+typedef struct Timespec Timespec;
+typedef struct Timeval Timeval;
+typedef struct SigactionT SigactionT;
+typedef struct Siginfo Siginfo;
+typedef struct Itimerval Itimerval;
+typedef struct EpollEvent EpollEvent;
+
+#pragma pack on
+
+//struct Sigset {
+// uint64 sig[1];
+//};
+//typedef uint64 Sigset;
+
+struct Timespec {
+ int64 tv_sec;
+ int64 tv_nsec;
+};
+struct Timeval {
+ int64 tv_sec;
+ int64 tv_usec;
+};
+struct SigactionT {
+ void *sa_handler;
+ uint64 sa_flags;
+ void *sa_restorer;
+ uint64 sa_mask;
+};
+struct Siginfo {
+ int32 si_signo;
+ int32 si_errno;
+ int32 si_code;
+ byte Pad_cgo_0[4];
+ byte _sifields[112];
+};
+struct Itimerval {
+ Timeval it_interval;
+ Timeval it_value;
+};
+struct EpollEvent {
+ uint32 events;
+ byte Pad_cgo_0[4];
+ byte data[8]; // unaligned uintptr
+};
+
+
+#pragma pack off
+// Created by cgo -cdefs - DO NOT EDIT
+// cgo -cdefs defs_linux.go defs3_linux.go
+
+
+enum {
+ O_RDONLY = 0x0,
+ O_CLOEXEC = 0x80000,
+ SA_RESTORER = 0,
+};
+
+typedef struct Ptregs Ptregs;
+typedef struct Vreg Vreg;
+typedef struct SigaltstackT SigaltstackT;
+typedef struct Sigcontext Sigcontext;
+typedef struct Ucontext Ucontext;
+
+#pragma pack on
+
+struct Ptregs {
+ uint64 gpr[32];
+ uint64 nip;
+ uint64 msr;
+ uint64 orig_gpr3;
+ uint64 ctr;
+ uint64 link;
+ uint64 xer;
+ uint64 ccr;
+ uint64 softe;
+ uint64 trap;
+ uint64 dar;
+ uint64 dsisr;
+ uint64 result;
+};
+typedef uint64 Gregset[48];
+typedef float64 FPregset[33];
+struct Vreg {
+ uint32 u[4];
+};
+
+struct SigaltstackT {
+ byte *ss_sp;
+ int32 ss_flags;
+ byte Pad_cgo_0[4];
+ uint64 ss_size;
+};
+
+struct Sigcontext {
+ uint64 _unused[4];
+ int32 signal;
+ int32 _pad0;
+ uint64 handler;
+ uint64 oldmask;
+ Ptregs *regs;
+ uint64 gp_regs[48];
+ float64 fp_regs[33];
+ Vreg *v_regs;
+ int64 vmx_reserve[101];
+};
+struct Ucontext {
+ uint64 uc_flags;
+ Ucontext *uc_link;
+ SigaltstackT uc_stack;
+ uint64 uc_sigmask;
+ uint64 __unused[15];
+ Sigcontext uc_mcontext;
+};
+
+
+#pragma pack off
diff --git a/src/runtime/export_test.go b/src/runtime/export_test.go
index be352557f..65e918e84 100644
--- a/src/runtime/export_test.go
+++ b/src/runtime/export_test.go
@@ -26,7 +26,7 @@ var Exitsyscall = exitsyscall
var LockedOSThread = lockedOSThread
type LFNode struct {
- Next *LFNode
+ Next uint64
Pushcnt uintptr
}
diff --git a/src/runtime/gcinfo_test.go b/src/runtime/gcinfo_test.go
index 88f6703f9..1a33f3b3b 100644
--- a/src/runtime/gcinfo_test.go
+++ b/src/runtime/gcinfo_test.go
@@ -153,6 +153,12 @@ func infoBigStruct() []byte {
BitsScalar, BitsScalar, BitsDead, BitsScalar, BitsScalar, // t int; y uint16; u uint64
BitsPointer, BitsDead, // i string
}
+ case "power64", "power64le":
+ return []byte{
+ BitsPointer, BitsScalar, BitsScalar, BitsScalar,
+ BitsMultiWord, BitsSlice, BitsScalar, BitsScalar,
+ BitsScalar, BitsScalar, BitsMultiWord, BitsString,
+ }
default:
panic("unknown arch")
}
@@ -188,6 +194,6 @@ var (
infoString = []byte{BitsPointer, BitsDead}
infoSlice = []byte{BitsPointer, BitsDead, BitsDead}
- infoEface = []byte{BitsMultiWord, BitsEface}
- infoIface = []byte{BitsMultiWord, BitsIface}
+ infoEface = []byte{BitsPointer, BitsPointer}
+ infoIface = []byte{BitsPointer, BitsPointer}
)
diff --git a/src/runtime/heapdump.c b/src/runtime/heapdump.c
index eddbc1d1c..da14f2d24 100644
--- a/src/runtime/heapdump.c
+++ b/src/runtime/heapdump.c
@@ -261,20 +261,7 @@ dumpbv(BitVector *bv, uintptr offset)
dumpint(offset + i / BitsPerPointer * PtrSize);
break;
case BitsMultiWord:
- switch(bv->bytedata[(i+BitsPerPointer)/8] >> (i+BitsPerPointer)%8 & 3) {
- default:
- runtime·throw("unexpected garbage collection bits");
- case BitsIface:
- dumpint(FieldKindIface);
- dumpint(offset + i / BitsPerPointer * PtrSize);
- i += BitsPerPointer;
- break;
- case BitsEface:
- dumpint(FieldKindEface);
- dumpint(offset + i / BitsPerPointer * PtrSize);
- i += BitsPerPointer;
- break;
- }
+ runtime·throw("bumpbv unexpected garbage collection bits");
}
}
}
diff --git a/src/runtime/lfstack.c b/src/runtime/lfstack.c
index 57e0af282..0ced839c2 100644
--- a/src/runtime/lfstack.c
+++ b/src/runtime/lfstack.c
@@ -46,7 +46,7 @@ runtime·lfstackpush(uint64 *head, LFNode *node)
new = (uint64)(uintptr)node|(((uint64)node->pushcnt&CNT_MASK)<<PTR_BITS);
for(;;) {
old = runtime·atomicload64(head);
- node->next = (LFNode*)(uintptr)(old&PTR_MASK);
+ node->next = old;
if(runtime·cas64(head, old, new))
break;
}
@@ -55,19 +55,17 @@ runtime·lfstackpush(uint64 *head, LFNode *node)
LFNode*
runtime·lfstackpop(uint64 *head)
{
- LFNode *node, *node2;
- uint64 old, new;
+ LFNode *node;
+ uint64 old, next;
for(;;) {
old = runtime·atomicload64(head);
if(old == 0)
return nil;
node = (LFNode*)(uintptr)(old&PTR_MASK);
- node2 = runtime·atomicloadp(&node->next);
- new = 0;
- if(node2 != nil)
- new = (uint64)(uintptr)node2|(((uint64)node2->pushcnt&CNT_MASK)<<PTR_BITS);
- if(runtime·cas64(head, old, new))
+ next = runtime·atomicload64(&node->next);
+
+ if(runtime·cas64(head, old, next))
return node;
}
}
diff --git a/src/runtime/lfstack_test.go b/src/runtime/lfstack_test.go
index e51877704..68f221d6e 100644
--- a/src/runtime/lfstack_test.go
+++ b/src/runtime/lfstack_test.go
@@ -121,7 +121,7 @@ func TestLFStackStress(t *testing.T) {
}
cnt++
sum2 += node.data
- node.Next = nil
+ node.Next = 0
}
}
if cnt != K {
diff --git a/src/runtime/malloc.go b/src/runtime/malloc.go
index 8cf1c3d34..fab8cf269 100644
--- a/src/runtime/malloc.go
+++ b/src/runtime/malloc.go
@@ -4,9 +4,7 @@
package runtime
-import (
- "unsafe"
-)
+import "unsafe"
const (
debugMalloc = false
@@ -247,6 +245,8 @@ func mallocgc(size uintptr, typ *_type, flags uint32) unsafe.Pointer {
masksize = masksize * pointersPerByte / 8 // 4 bits per word
masksize++ // unroll flag in the beginning
if masksize > maxGCMask && typ.gc[1] != 0 {
+ // write barriers have not been updated to deal with this case yet.
+ gothrow("maxGCMask too small for now")
// If the mask is too large, unroll the program directly
// into the GC bitmap. It's 7 times slower than copying
// from the pre-unrolled mask, but saves 1/16 of type size
@@ -261,8 +261,10 @@ func mallocgc(size uintptr, typ *_type, flags uint32) unsafe.Pointer {
goto marked
}
ptrmask = (*uint8)(unsafe.Pointer(uintptr(typ.gc[0])))
- // Check whether the program is already unrolled.
- if uintptr(atomicloadp(unsafe.Pointer(ptrmask)))&0xff == 0 {
+ // Check whether the program is already unrolled
+ // by checking if the unroll flag byte is set
+ maskword := uintptr(atomicloadp(unsafe.Pointer(ptrmask)))
+ if *(*uint8)(unsafe.Pointer(&maskword)) == 0 {
mp := acquirem()
mp.ptrarg[0] = unsafe.Pointer(typ)
onM(unrollgcprog_m)
@@ -304,6 +306,18 @@ func mallocgc(size uintptr, typ *_type, flags uint32) unsafe.Pointer {
}
}
marked:
+
+ // GCmarkterminate allocates black
+ // All slots hold nil so no scanning is needed.
+ // This may be racing with GC so do it atomically if there can be
+ // a race marking the bit.
+ if gcphase == _GCmarktermination {
+ mp := acquirem()
+ mp.ptrarg[0] = x
+ onM(gcmarknewobject_m)
+ releasem(mp)
+ }
+
if raceenabled {
racemalloc(x, size)
}
@@ -344,6 +358,37 @@ marked:
return x
}
+func loadPtrMask(typ *_type) []uint8 {
+ var ptrmask *uint8
+ nptr := (uintptr(typ.size) + ptrSize - 1) / ptrSize
+ if typ.kind&kindGCProg != 0 {
+ masksize := nptr
+ if masksize%2 != 0 {
+ masksize *= 2 // repeated
+ }
+ masksize = masksize * pointersPerByte / 8 // 4 bits per word
+ masksize++ // unroll flag in the beginning
+ if masksize > maxGCMask && typ.gc[1] != 0 {
+ // write barriers have not been updated to deal with this case yet.
+ gothrow("maxGCMask too small for now")
+ }
+ ptrmask = (*uint8)(unsafe.Pointer(uintptr(typ.gc[0])))
+ // Check whether the program is already unrolled
+ // by checking if the unroll flag byte is set
+ maskword := uintptr(atomicloadp(unsafe.Pointer(ptrmask)))
+ if *(*uint8)(unsafe.Pointer(&maskword)) == 0 {
+ mp := acquirem()
+ mp.ptrarg[0] = unsafe.Pointer(typ)
+ onM(unrollgcprog_m)
+ releasem(mp)
+ }
+ ptrmask = (*uint8)(add(unsafe.Pointer(ptrmask), 1)) // skip the unroll flag byte
+ } else {
+ ptrmask = (*uint8)(unsafe.Pointer(typ.gc[0])) // pointer to unrolled mask
+ }
+ return (*[1 << 30]byte)(unsafe.Pointer(ptrmask))[:(nptr+1)/2]
+}
+
// implementation of new builtin
func newobject(typ *_type) unsafe.Pointer {
flags := uint32(0)
@@ -438,7 +483,20 @@ func gogc(force int32) {
mp = acquirem()
mp.gcing = 1
releasem(mp)
+
onM(stoptheworld)
+ onM(finishsweep_m) // finish sweep before we start concurrent scan.
+ if false { // To turn on concurrent scan and mark set to true...
+ onM(starttheworld)
+ // Do a concurrent heap scan before we stop the world.
+ onM(gcscan_m)
+ onM(stoptheworld)
+ onM(gcinstallmarkwb_m)
+ onM(starttheworld)
+ onM(gcmark_m)
+ onM(stoptheworld)
+ onM(gcinstalloffwb_m)
+ }
if mp != acquirem() {
gothrow("gogc: rescheduled")
}
@@ -469,6 +527,8 @@ func gogc(force int32) {
onM(gc_m)
}
+ onM(gccheckmark_m)
+
// all done
mp.gcing = 0
semrelease(&worldsema)
@@ -483,6 +543,14 @@ func gogc(force int32) {
}
}
+func GCcheckmarkenable() {
+ onM(gccheckmarkenable_m)
+}
+
+func GCcheckmarkdisable() {
+ onM(gccheckmarkdisable_m)
+}
+
// GC runs a garbage collection.
func GC() {
gogc(2)
diff --git a/src/runtime/malloc.h b/src/runtime/malloc.h
index adb8d3d67..522b11bba 100644
--- a/src/runtime/malloc.h
+++ b/src/runtime/malloc.h
@@ -86,6 +86,7 @@ typedef struct MSpan MSpan;
typedef struct MStats MStats;
typedef struct MLink MLink;
typedef struct GCStats GCStats;
+typedef struct Workbuf Workbuf;
enum
{
@@ -344,8 +345,6 @@ struct MCache
SudoG* sudogcache;
- void* gcworkbuf;
-
// Local allocator stats, flushed during GC.
uintptr local_nlookup; // number of pointer lookups
uintptr local_largefree; // bytes freed for large objects (>MaxSmallSize)
@@ -356,7 +355,7 @@ struct MCache
MSpan* runtime·MCache_Refill(MCache *c, int32 sizeclass);
void runtime·MCache_ReleaseAll(MCache *c);
void runtime·stackcache_clear(MCache *c);
-void runtime·gcworkbuffree(void *b);
+void runtime·gcworkbuffree(Workbuf *b);
enum
{
diff --git a/src/runtime/mcache.c b/src/runtime/mcache.c
index 5fdbe3266..95ddced3e 100644
--- a/src/runtime/mcache.c
+++ b/src/runtime/mcache.c
@@ -39,12 +39,12 @@ runtime·allocmcache(void)
return c;
}
+// mheap.lock needs to be held to release the gcworkbuf.
static void
freemcache(MCache *c)
{
runtime·MCache_ReleaseAll(c);
runtime·stackcache_clear(c);
- runtime·gcworkbuffree(c->gcworkbuf);
runtime·lock(&runtime·mheap.lock);
runtime·purgecachedstats(c);
runtime·FixAlloc_Free(&runtime·mheap.cachealloc, c);
diff --git a/src/runtime/mem_linux.c b/src/runtime/mem_linux.c
index bfb405607..52e02b34e 100644
--- a/src/runtime/mem_linux.c
+++ b/src/runtime/mem_linux.c
@@ -11,7 +11,7 @@
enum
{
- _PAGE_SIZE = 4096,
+ _PAGE_SIZE = PhysPageSize,
EACCES = 13,
};
@@ -36,8 +36,9 @@ addrspace_free(void *v, uintptr n)
errval = runtime·mincore((int8*)v + off, chunk, vec);
// ENOMEM means unmapped, which is what we want.
// Anything else we assume means the pages are mapped.
- if (errval != -ENOMEM)
+ if (errval != -ENOMEM && errval != ENOMEM) {
return 0;
+ }
}
return 1;
}
@@ -48,12 +49,15 @@ mmap_fixed(byte *v, uintptr n, int32 prot, int32 flags, int32 fd, uint32 offset)
void *p;
p = runtime·mmap(v, n, prot, flags, fd, offset);
- if(p != v && addrspace_free(v, n)) {
+ if(p != v) {
+ if(p > (void*)4096) {
+ runtime·munmap(p, n);
+ p = nil;
+ }
// On some systems, mmap ignores v without
// MAP_FIXED, so retry if the address space is free.
- if(p > (void*)4096)
- runtime·munmap(p, n);
- p = runtime·mmap(v, n, prot, flags|MAP_FIXED, fd, offset);
+ if(addrspace_free(v, n))
+ p = runtime·mmap(v, n, prot, flags|MAP_FIXED, fd, offset);
}
return p;
}
diff --git a/src/runtime/memclr_386.s b/src/runtime/memclr_386.s
index 1520aea2e..3f20b69c8 100644
--- a/src/runtime/memclr_386.s
+++ b/src/runtime/memclr_386.s
@@ -15,31 +15,31 @@ TEXT runtime·memclr(SB), NOSPLIT, $0-8
XORL AX, AX
// MOVOU seems always faster than REP STOSL.
-clr_tail:
+tail:
TESTL BX, BX
- JEQ clr_0
+ JEQ _0
CMPL BX, $2
- JBE clr_1or2
+ JBE _1or2
CMPL BX, $4
- JBE clr_3or4
+ JBE _3or4
CMPL BX, $8
- JBE clr_5through8
+ JBE _5through8
CMPL BX, $16
- JBE clr_9through16
+ JBE _9through16
TESTL $0x4000000, runtime·cpuid_edx(SB) // check for sse2
JEQ nosse2
PXOR X0, X0
CMPL BX, $32
- JBE clr_17through32
+ JBE _17through32
CMPL BX, $64
- JBE clr_33through64
+ JBE _33through64
CMPL BX, $128
- JBE clr_65through128
+ JBE _65through128
CMPL BX, $256
- JBE clr_129through256
+ JBE _129through256
// TODO: use branch table and BSR to make this just a single dispatch
-clr_loop:
+loop:
MOVOU X0, 0(DI)
MOVOU X0, 16(DI)
MOVOU X0, 32(DI)
@@ -59,40 +59,40 @@ clr_loop:
SUBL $256, BX
ADDL $256, DI
CMPL BX, $256
- JAE clr_loop
- JMP clr_tail
+ JAE loop
+ JMP tail
-clr_1or2:
+_1or2:
MOVB AX, (DI)
MOVB AX, -1(DI)(BX*1)
RET
-clr_0:
+_0:
RET
-clr_3or4:
+_3or4:
MOVW AX, (DI)
MOVW AX, -2(DI)(BX*1)
RET
-clr_5through8:
+_5through8:
MOVL AX, (DI)
MOVL AX, -4(DI)(BX*1)
RET
-clr_9through16:
+_9through16:
MOVL AX, (DI)
MOVL AX, 4(DI)
MOVL AX, -8(DI)(BX*1)
MOVL AX, -4(DI)(BX*1)
RET
-clr_17through32:
+_17through32:
MOVOU X0, (DI)
MOVOU X0, -16(DI)(BX*1)
RET
-clr_33through64:
+_33through64:
MOVOU X0, (DI)
MOVOU X0, 16(DI)
MOVOU X0, -32(DI)(BX*1)
MOVOU X0, -16(DI)(BX*1)
RET
-clr_65through128:
+_65through128:
MOVOU X0, (DI)
MOVOU X0, 16(DI)
MOVOU X0, 32(DI)
@@ -102,7 +102,7 @@ clr_65through128:
MOVOU X0, -32(DI)(BX*1)
MOVOU X0, -16(DI)(BX*1)
RET
-clr_129through256:
+_129through256:
MOVOU X0, (DI)
MOVOU X0, 16(DI)
MOVOU X0, 32(DI)
@@ -126,5 +126,5 @@ nosse2:
REP
STOSL
ANDL $3, BX
- JNE clr_tail
+ JNE tail
RET
diff --git a/src/runtime/memclr_amd64.s b/src/runtime/memclr_amd64.s
index 94a2c7f23..ec24f1db2 100644
--- a/src/runtime/memclr_amd64.s
+++ b/src/runtime/memclr_amd64.s
@@ -15,30 +15,30 @@ TEXT runtime·memclr(SB), NOSPLIT, $0-16
XORQ AX, AX
// MOVOU seems always faster than REP STOSQ.
-clr_tail:
+tail:
TESTQ BX, BX
- JEQ clr_0
+ JEQ _0
CMPQ BX, $2
- JBE clr_1or2
+ JBE _1or2
CMPQ BX, $4
- JBE clr_3or4
+ JBE _3or4
CMPQ BX, $8
- JBE clr_5through8
+ JBE _5through8
CMPQ BX, $16
- JBE clr_9through16
+ JBE _9through16
PXOR X0, X0
CMPQ BX, $32
- JBE clr_17through32
+ JBE _17through32
CMPQ BX, $64
- JBE clr_33through64
+ JBE _33through64
CMPQ BX, $128
- JBE clr_65through128
+ JBE _65through128
CMPQ BX, $256
- JBE clr_129through256
+ JBE _129through256
// TODO: use branch table and BSR to make this just a single dispatch
// TODO: for really big clears, use MOVNTDQ.
-clr_loop:
+loop:
MOVOU X0, 0(DI)
MOVOU X0, 16(DI)
MOVOU X0, 32(DI)
@@ -58,38 +58,38 @@ clr_loop:
SUBQ $256, BX
ADDQ $256, DI
CMPQ BX, $256
- JAE clr_loop
- JMP clr_tail
+ JAE loop
+ JMP tail
-clr_1or2:
+_1or2:
MOVB AX, (DI)
MOVB AX, -1(DI)(BX*1)
RET
-clr_0:
+_0:
RET
-clr_3or4:
+_3or4:
MOVW AX, (DI)
MOVW AX, -2(DI)(BX*1)
RET
-clr_5through8:
+_5through8:
MOVL AX, (DI)
MOVL AX, -4(DI)(BX*1)
RET
-clr_9through16:
+_9through16:
MOVQ AX, (DI)
MOVQ AX, -8(DI)(BX*1)
RET
-clr_17through32:
+_17through32:
MOVOU X0, (DI)
MOVOU X0, -16(DI)(BX*1)
RET
-clr_33through64:
+_33through64:
MOVOU X0, (DI)
MOVOU X0, 16(DI)
MOVOU X0, -32(DI)(BX*1)
MOVOU X0, -16(DI)(BX*1)
RET
-clr_65through128:
+_65through128:
MOVOU X0, (DI)
MOVOU X0, 16(DI)
MOVOU X0, 32(DI)
@@ -99,7 +99,7 @@ clr_65through128:
MOVOU X0, -32(DI)(BX*1)
MOVOU X0, -16(DI)(BX*1)
RET
-clr_129through256:
+_129through256:
MOVOU X0, (DI)
MOVOU X0, 16(DI)
MOVOU X0, 32(DI)
diff --git a/src/runtime/memclr_plan9_386.s b/src/runtime/memclr_plan9_386.s
index b4b671f77..50f327b4e 100644
--- a/src/runtime/memclr_plan9_386.s
+++ b/src/runtime/memclr_plan9_386.s
@@ -10,40 +10,40 @@ TEXT runtime·memclr(SB), NOSPLIT, $0-8
MOVL n+4(FP), BX
XORL AX, AX
-clr_tail:
+tail:
TESTL BX, BX
- JEQ clr_0
+ JEQ _0
CMPL BX, $2
- JBE clr_1or2
+ JBE _1or2
CMPL BX, $4
- JBE clr_3or4
+ JBE _3or4
CMPL BX, $8
- JBE clr_5through8
+ JBE _5through8
CMPL BX, $16
- JBE clr_9through16
+ JBE _9through16
MOVL BX, CX
SHRL $2, CX
REP
STOSL
ANDL $3, BX
- JNE clr_tail
+ JNE tail
RET
-clr_1or2:
+_1or2:
MOVB AX, (DI)
MOVB AX, -1(DI)(BX*1)
RET
-clr_0:
+_0:
RET
-clr_3or4:
+_3or4:
MOVW AX, (DI)
MOVW AX, -2(DI)(BX*1)
RET
-clr_5through8:
+_5through8:
MOVL AX, (DI)
MOVL AX, -4(DI)(BX*1)
RET
-clr_9through16:
+_9through16:
MOVL AX, (DI)
MOVL AX, 4(DI)
MOVL AX, -8(DI)(BX*1)
diff --git a/src/runtime/memclr_power64x.s b/src/runtime/memclr_power64x.s
new file mode 100644
index 000000000..dfad64b6f
--- /dev/null
+++ b/src/runtime/memclr_power64x.s
@@ -0,0 +1,20 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build power64 power64le
+
+#include "textflag.h"
+
+// void runtime·memclr(void*, uintptr)
+TEXT runtime·memclr(SB),NOSPLIT,$0-16
+ MOVD ptr+0(FP), R3
+ MOVD n+8(FP), R4
+ CMP R4, $0
+ BEQ done
+ SUB $1, R3
+ MOVD R4, CTR
+ MOVBU R0, 1(R3)
+ BC 25, 0, -1(PC) // bdnz+ $-4
+done:
+ RETURN
diff --git a/src/runtime/memmove_power64x.s b/src/runtime/memmove_power64x.s
new file mode 100644
index 000000000..2b04d8319
--- /dev/null
+++ b/src/runtime/memmove_power64x.s
@@ -0,0 +1,40 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build power64 power64le
+
+#include "textflag.h"
+
+// void runtime·memmove(void*, void*, uintptr)
+TEXT runtime·memmove(SB), NOSPLIT, $-8-24
+ MOVD to+0(FP), R3
+ MOVD from+8(FP), R4
+ MOVD n+16(FP), R5
+ CMP R5, $0
+ BNE check
+ RETURN
+
+check:
+ CMP R3, R4
+ BGT backward
+
+ SUB $1, R3
+ ADD R3, R5
+ SUB $1, R4
+loop:
+ MOVBU 1(R4), R6
+ MOVBU R6, 1(R3)
+ CMP R3, R5
+ BNE loop
+ RETURN
+
+backward:
+ ADD R5, R4
+ ADD R3, R5
+loop1:
+ MOVBU -1(R4), R6
+ MOVBU R6, -1(R5)
+ CMP R3, R5
+ BNE loop1
+ RETURN
diff --git a/src/runtime/mgc0.c b/src/runtime/mgc0.c
index 7754bad89..3248b0f49 100644
--- a/src/runtime/mgc0.c
+++ b/src/runtime/mgc0.c
@@ -4,22 +4,72 @@
// Garbage collector (GC).
//
-// GC is:
-// - mark&sweep
-// - mostly precise (with the exception of some C-allocated objects, assembly frames/arguments, etc)
-// - parallel (up to MaxGcproc threads)
-// - partially concurrent (mark is stop-the-world, while sweep is concurrent)
-// - non-moving/non-compacting
-// - full (non-partial)
+// The GC runs concurrently with mutator threads, is type accurate (aka precise), allows multiple GC
+// thread to run in parallel. It is a concurrent mark and sweep that uses a write barrier. It is
+// non-generational and non-compacting. Allocation is done using size segregated per P allocation
+// areas to minimize fragmentation while eliminating locks in the common case.
//
-// GC rate.
-// Next GC is after we've allocated an extra amount of memory proportional to
-// the amount already in use. The proportion is controlled by GOGC environment variable
-// (100 by default). If GOGC=100 and we're using 4M, we'll GC again when we get to 8M
-// (this mark is tracked in next_gc variable). This keeps the GC cost in linear
-// proportion to the allocation cost. Adjusting GOGC just changes the linear constant
-// (and also the amount of extra memory used).
+// The algorithm decomposes into several steps.
+// This is a high level description of the algorithm being used. For an overview of GC a good
+// place to start is Richard Jones' gchandbook.org.
+//
+// The algorithm's intellectual heritage includes Dijkstra's on-the-fly algorithm, see
+// Edsger W. Dijkstra, Leslie Lamport, A. J. Martin, C. S. Scholten, and E. F. M. Steffens. 1978.
+// On-the-fly garbage collection: an exercise in cooperation. Commun. ACM 21, 11 (November 1978), 966-975.
+// For journal quality proofs that these steps are complete, correct, and terminate see
+// Hudson, R., and Moss, J.E.B. Copying Garbage Collection without stopping the world.
+// Concurrency and Computation: Practice and Experience 15(3-5), 2003.
//
+// 0. Set phase = GCscan from GCoff.
+// 1. Wait for all P's to acknowledge phase change.
+// At this point all goroutines have passed through a GC safepoint and
+// know we are in the GCscan phase.
+// 2. GC scans all goroutine stacks, mark and enqueues all encountered pointers
+// (marking avoids most duplicate enqueuing but races may produce duplication which is benign).
+// Preempted goroutines are scanned before P schedules next goroutine.
+// 3. Set phase = GCmark.
+// 4. Wait for all P's to acknowledge phase change.
+// 5. Now write barrier marks and enqueues black, grey, or white to white pointers.
+// Malloc still allocates white (non-marked) objects.
+// 6. Meanwhile GC transitively walks the heap marking reachable objects.
+// 7. When GC finishes marking heap, it preempts P's one-by-one and
+// retakes partial wbufs (filled by write barrier or during a stack scan of the goroutine
+// currently scheduled on the P).
+// 8. Once the GC has exhausted all available marking work it sets phase = marktermination.
+// 9. Wait for all P's to acknowledge phase change.
+// 10. Malloc now allocates black objects, so number of unmarked reachable objects
+// monotonically decreases.
+// 11. GC preempts P's one-by-one taking partial wbufs and marks all unmarked yet reachable objects.
+// 12. When GC completes a full cycle over P's and discovers no new grey
+// objects, (which means all reachable objects are marked) set phase = GCsweep.
+// 13. Wait for all P's to acknowledge phase change.
+// 14. Now malloc allocates white (but sweeps spans before use).
+// Write barrier becomes nop.
+// 15. GC does background sweeping, see description below.
+// 16. When sweeping is complete set phase to GCoff.
+// 17. When sufficient allocation has taken place replay the sequence starting at 0 above,
+// see discussion of GC rate below.
+
+// Changing phases.
+// Phases are changed by setting the gcphase to the next phase and possibly calling ackgcphase.
+// All phase action must be benign in the presence of a change.
+// Starting with GCoff
+// GCoff to GCscan
+// GSscan scans stacks and globals greying them and never marks an object black.
+// Once all the P's are aware of the new phase they will scan gs on preemption.
+// This means that the scanning of preempted gs can't start until all the Ps
+// have acknowledged.
+// GCscan to GCmark
+// GCMark turns on the write barrier which also only greys objects. No scanning
+// of objects (making them black) can happen until all the Ps have acknowledged
+// the phase change.
+// GCmark to GCmarktermination
+// The only change here is that we start allocating black so the Ps must acknowledge
+// the change before we begin the termination algorithm
+// GCmarktermination to GSsweep
+// Object currently on the freelist must be marked black for this to work.
+// Are things on the free lists black or white? How does the sweep phase work?
+
// Concurrent sweep.
// The sweep phase proceeds concurrently with normal program execution.
// The heap is swept span-by-span both lazily (when a goroutine needs another span)
@@ -50,6 +100,14 @@
// The finalizer goroutine is kicked off only when all spans are swept.
// When the next GC starts, it sweeps all not-yet-swept spans (if any).
+// GC rate.
+// Next GC is after we've allocated an extra amount of memory proportional to
+// the amount already in use. The proportion is controlled by GOGC environment variable
+// (100 by default). If GOGC=100 and we're using 4M, we'll GC again when we get to 8M
+// (this mark is tracked in next_gc variable). This keeps the GC cost in linear
+// proportion to the allocation cost. Adjusting GOGC just changes the linear constant
+// (and also the amount of extra memory used).
+
#include "runtime.h"
#include "arch_GOARCH.h"
#include "malloc.h"
@@ -64,10 +122,8 @@
enum {
Debug = 0,
- DebugPtrs = 0, // if 1, print trace of every pointer load during GC
ConcurrentSweep = 1,
- WorkbufSize = 4*1024,
FinBlockSize = 4*1024,
RootData = 0,
RootBss = 1,
@@ -80,7 +136,7 @@ enum {
// ptrmask for an allocation containing a single pointer.
static byte oneptr[] = {BitsPointer};
-// Initialized from $GOGC. GOGC=off means no gc.
+// Initialized from $GOGC. GOGC=off means no GC.
extern int32 runtime·gcpercent;
// Holding worldsema grants an M the right to try to stop the world.
@@ -98,12 +154,16 @@ extern int32 runtime·gcpercent;
//
uint32 runtime·worldsema = 1;
-typedef struct Workbuf Workbuf;
-struct Workbuf
-{
- LFNode node; // must be first
- uintptr nobj;
- byte* obj[(WorkbufSize-sizeof(LFNode)-sizeof(uintptr))/PtrSize];
+// It is a bug if bits does not have bitBoundary set but
+// there are still some cases where this happens related
+// to stack spans.
+typedef struct Markbits Markbits;
+struct Markbits {
+ byte *bitp; // pointer to the byte holding xbits
+ byte shift; // bits xbits needs to be shifted to get bits
+ byte xbits; // byte holding all the bits from *bitp
+ byte bits; // mark and boundary bits relevant to corresponding slot.
+ byte tbits; // pointer||scalar bits relevant to corresponding slot.
};
extern byte runtime·data[];
@@ -128,26 +188,40 @@ BitVector runtime·gcbssmask;
Mutex runtime·gclock;
-static uintptr badblock[1024];
-static int32 nbadblock;
-
+static Workbuf* getpartialorempty(void);
+static void putpartial(Workbuf*);
static Workbuf* getempty(Workbuf*);
static Workbuf* getfull(Workbuf*);
static void putempty(Workbuf*);
+static void putfull(Workbuf*);
static Workbuf* handoff(Workbuf*);
static void gchelperstart(void);
static void flushallmcaches(void);
-static bool scanframe(Stkframe *frame, void *unused);
-static void scanstack(G *gp);
-static BitVector unrollglobgcprog(byte *prog, uintptr size);
+static bool scanframe(Stkframe*, void*);
+static void scanstack(G*);
+static BitVector unrollglobgcprog(byte*, uintptr);
+static void scanblock(byte*, uintptr, byte*);
+static byte* objectstart(byte*, Markbits*);
+static Workbuf* greyobject(byte*, Markbits*, Workbuf*);
+static bool inheap(byte*);
+static bool shaded(byte*);
+static void shade(byte*);
+static void slottombits(byte*, Markbits*);
+static void atomicxor8(byte*, byte);
+static bool ischeckmarked(Markbits*);
+static bool ismarked(Markbits*);
+static void clearcheckmarkbits(void);
+static void clearcheckmarkbitsspan(MSpan*);
void runtime·bgsweep(void);
+void runtime·finishsweep_m(void);
static FuncVal bgsweepv = {runtime·bgsweep};
typedef struct WorkData WorkData;
struct WorkData {
- uint64 full; // lock-free list of full blocks
- uint64 empty; // lock-free list of empty blocks
+ uint64 full; // lock-free list of full blocks
+ uint64 empty; // lock-free list of empty blocks
+ uint64 partial; // lock-free list of partially filled blocks
byte pad0[CacheLineSize]; // prevents false-sharing between full/empty and nproc/nwait
uint32 nproc;
int64 tstart;
@@ -162,315 +236,422 @@ struct WorkData {
};
WorkData runtime·work;
-// Is _cgo_allocate linked into the binary?
+// To help debug the concurrent GC we remark with the world
+// stopped ensuring that any object encountered has their normal
+// mark bit set. To do this we use an orthogonal bit
+// pattern to indicate the object is marked. The following pattern
+// uses the upper two bits in the object's bounday nibble.
+// 01: scalar not marked
+// 10: pointer not marked
+// 11: pointer marked
+// 00: scalar marked
+// Xoring with 01 will flip the pattern from marked to unmarked and vica versa.
+// The higher bit is 1 for pointers and 0 for scalars, whether the object
+// is marked or not.
+// The first nibble no longer holds the bitsDead pattern indicating that the
+// there are no more pointers in the object. This information is held
+// in the second nibble.
+
+// When marking an object if the bool checkmark is true one uses the above
+// encoding, otherwise one uses the bitMarked bit in the lower two bits
+// of the nibble.
+static bool checkmark = false;
+static bool gccheckmarkenable = true;
+
+// Is address b in the known heap. If it doesn't have a valid gcmap
+// returns false. For example pointers into stacks will return false.
static bool
-have_cgo_allocate(void)
+inheap(byte *b)
{
- extern byte go·weak·runtime·_cgo_allocate_internal[1];
- return go·weak·runtime·_cgo_allocate_internal != nil;
+ MSpan *s;
+ pageID k;
+ uintptr x;
+
+ if(b == nil || b < runtime·mheap.arena_start || b >= runtime·mheap.arena_used)
+ return false;
+ // Not a beginning of a block, consult span table to find the block beginning.
+ k = (uintptr)b>>PageShift;
+ x = k;
+ x -= (uintptr)runtime·mheap.arena_start>>PageShift;
+ s = runtime·mheap.spans[x];
+ if(s == nil || k < s->start || b >= s->limit || s->state != MSpanInUse)
+ return false;
+ return true;
}
-// scanblock scans a block of n bytes starting at pointer b for references
-// to other objects, scanning any it finds recursively until there are no
-// unscanned objects left. Instead of using an explicit recursion, it keeps
-// a work list in the Workbuf* structures and loops in the main function
-// body. Keeping an explicit work list is easier on the stack allocator and
-// more efficient.
+// Given an address in the heap return the relevant byte from the gcmap. This routine
+// can be used on addresses to the start of an object or to the interior of the an object.
static void
-scanblock(byte *b, uintptr n, byte *ptrmask)
+slottombits(byte *obj, Markbits *mbits)
{
- byte *obj, *obj0, *p, *arena_start, *arena_used, **wp, *scanbuf[8], *ptrbitp, *bitp;
- uintptr i, j, nobj, size, idx, x, off, scanbufpos, bits, xbits, shift;
- Workbuf *wbuf;
- Iface *iface;
- Eface *eface;
- Type *typ;
+ uintptr off;
+
+ off = (uintptr*)((uintptr)obj&~(PtrSize-1)) - (uintptr*)runtime·mheap.arena_start;
+ mbits->bitp = runtime·mheap.arena_start - off/wordsPerBitmapByte - 1;
+ mbits->shift = (off % wordsPerBitmapByte) * gcBits;
+ mbits->xbits = *mbits->bitp;
+ mbits->bits = (mbits->xbits >> mbits->shift) & bitMask;
+ mbits->tbits = ((mbits->xbits >> mbits->shift) & bitPtrMask) >> 2;
+}
+
+// b is a pointer into the heap.
+// Find the start of the object refered to by b.
+// Set mbits to the associated bits from the bit map.
+// If b is not a valid heap object return nil and
+// undefined values in mbits.
+static byte*
+objectstart(byte *b, Markbits *mbits)
+{
+ byte *obj, *p;
MSpan *s;
pageID k;
- bool keepworking;
+ uintptr x, size, idx;
- // Cache memory arena parameters in local vars.
- arena_start = runtime·mheap.arena_start;
- arena_used = runtime·mheap.arena_used;
+ obj = (byte*)((uintptr)b&~(PtrSize-1));
+ for(;;) {
+ slottombits(obj, mbits);
+ if((mbits->bits&bitBoundary) == bitBoundary)
+ break;
- wbuf = getempty(nil);
- nobj = wbuf->nobj;
- wp = &wbuf->obj[nobj];
- keepworking = b == nil;
- scanbufpos = 0;
- for(i = 0; i < nelem(scanbuf); i++)
- scanbuf[i] = nil;
+ // Not a beginning of a block, consult span table to find the block beginning.
+ k = (uintptr)obj>>PageShift;
+ x = k;
+ x -= (uintptr)runtime·mheap.arena_start>>PageShift;
+ s = runtime·mheap.spans[x];
+ if(s == nil || k < s->start || obj >= s->limit || s->state != MSpanInUse){
+ if(s != nil && s->state == MSpanStack) {
+ return nil; // This is legit.
+ }
+ // The following ensures that we are rigorous about what data
+ // structures hold valid pointers
+ if(0) {
+ // Still happens sometimes. We don't know why.
+ runtime·printf("runtime:objectstart Span weird: obj=%p, k=%p", obj, k);
+ if (s == nil)
+ runtime·printf(" s=nil\n");
+ else
+ runtime·printf(" s->start=%p s->limit=%p, s->state=%d\n", s->start*PageSize, s->limit, s->state);
+ runtime·throw("objectstart: bad pointer in unexpected span");
+ }
+ return nil;
+ }
+ p = (byte*)((uintptr)s->start<<PageShift);
+ if(s->sizeclass != 0) {
+ size = s->elemsize;
+ idx = ((byte*)obj - p)/size;
+ p = p+idx*size;
+ }
+ if(p == obj) {
+ runtime·printf("runtime: failed to find block beginning for %p s=%p s->limit=%p\n",
+ p, s->start*PageSize, s->limit);
+ runtime·throw("failed to find block beginning");
+ }
+ obj = p;
+ }
+ // if size(obj.firstfield) < PtrSize, the &obj.secondfield could map to the boundary bit
+ // Clear any low bits to get to the start of the object.
+ // greyobject depends on this.
+ return obj;
+}
+
+// Slow for now as we serialize this, since this is on a debug path
+// speed is not critical at this point.
+static Mutex andlock;
+static void
+atomicand8(byte *src, byte val)
+{
+ runtime·lock(&andlock);
+ *src = *src&val;
+ runtime·unlock(&andlock);
+}
+
+// Mark using the checkmark scheme.
+void
+docheckmark(Markbits *mbits)
+{
+ // xor 01 moves 01(scalar unmarked) to 00(scalar marked)
+ // and 10(pointer unmarked) to 11(pointer marked)
+ if(mbits->tbits == BitsScalar)
+ atomicand8(mbits->bitp, ~(byte)(BitsCheckMarkXor<<mbits->shift<<2));
+ else if(mbits->tbits == BitsPointer)
+ runtime·atomicor8(mbits->bitp, BitsCheckMarkXor<<mbits->shift<<2);
+
+ // reload bits for ischeckmarked
+ mbits->xbits = *mbits->bitp;
+ mbits->bits = (mbits->xbits >> mbits->shift) & bitMask;
+ mbits->tbits = ((mbits->xbits >> mbits->shift) & bitPtrMask) >> 2;
+
+ return;
+}
+
+// In the default scheme does mbits refer to a marked object.
+static bool
+ismarked(Markbits *mbits)
+{
+ if((mbits->bits&bitBoundary) != bitBoundary)
+ runtime·throw("ismarked: bits should have boundary bit set");
+ return (mbits->bits&bitMarked) == bitMarked;
+}
+
+// In the checkmark scheme does mbits refer to a marked object.
+static bool
+ischeckmarked(Markbits *mbits)
+{
+ if((mbits->bits&bitBoundary) != bitBoundary)
+ runtime·printf("runtime:ischeckmarked: bits should have boundary bit set\n");
+ return mbits->tbits==BitsScalarMarked || mbits->tbits==BitsPointerMarked;
+}
+
+// When in GCmarkterminate phase we allocate black.
+void
+runtime·gcmarknewobject_m(void)
+{
+ Markbits mbits;
+ byte *obj;
+
+ if(runtime·gcphase != GCmarktermination)
+ runtime·throw("marking new object while not in mark termination phase");
+ if(checkmark) // The world should be stopped so this should not happen.
+ runtime·throw("gcmarknewobject called while doing checkmark");
+
+ obj = g->m->ptrarg[0];
+ slottombits((byte*)((uintptr)obj & (PtrSize-1)), &mbits);
+
+ if((mbits.bits&bitMarked) != 0)
+ return;
+
+ // Each byte of GC bitmap holds info for two words.
+ // If the current object is larger than two words, or if the object is one word
+ // but the object it shares the byte with is already marked,
+ // then all the possible concurrent updates are trying to set the same bit,
+ // so we can use a non-atomic update.
+ if((mbits.xbits&(bitMask|(bitMask<<gcBits))) != (bitBoundary|(bitBoundary<<gcBits)) || runtime·work.nproc == 1)
+ *mbits.bitp = mbits.xbits | (bitMarked<<mbits.shift);
+ else
+ runtime·atomicor8(mbits.bitp, bitMarked<<mbits.shift);
+ return;
+}
+
+// obj is the start of an object with mark mbits.
+// If it isn't already marked, mark it and enqueue into workbuf.
+// Return possibly new workbuf to use.
+static Workbuf*
+greyobject(byte *obj, Markbits *mbits, Workbuf *wbuf)
+{
+ // obj should be start of allocation, and so must be at least pointer-aligned.
+ if(((uintptr)obj & (PtrSize-1)) != 0)
+ runtime·throw("greyobject: obj not pointer-aligned");
+
+ if(checkmark) {
+ if(!ismarked(mbits)) {
+ MSpan *s;
+ pageID k;
+ uintptr x, i;
+
+ runtime·printf("runtime:greyobject: checkmarks finds unexpected unmarked object obj=%p, mbits->bits=%x, *mbits->bitp=%x\n", obj, mbits->bits, *mbits->bitp);
+
+ k = (uintptr)obj>>PageShift;
+ x = k;
+ x -= (uintptr)runtime·mheap.arena_start>>PageShift;
+ s = runtime·mheap.spans[x];
+ runtime·printf("runtime:greyobject Span: obj=%p, k=%p", obj, k);
+ if (s == nil) {
+ runtime·printf(" s=nil\n");
+ } else {
+ runtime·printf(" s->start=%p s->limit=%p, s->state=%d, s->sizeclass=%d, s->elemsize=%D \n", s->start*PageSize, s->limit, s->state, s->sizeclass, s->elemsize);
+ for(i=0; i<s->sizeclass; i++) {
+ runtime·printf(" ((uintptr*)obj)[%D]=%p\n", i, ((uintptr*)obj)[i]);
+ }
+ }
+ runtime·throw("checkmark found unmarked object");
+ }
+ if(ischeckmarked(mbits))
+ return wbuf;
+ docheckmark(mbits);
+ if(!ischeckmarked(mbits)) {
+ runtime·printf("mbits xbits=%x bits=%x tbits=%x shift=%d\n", mbits->xbits, mbits->bits, mbits->tbits, mbits->shift);
+ runtime·throw("docheckmark and ischeckmarked disagree");
+ }
+ } else {
+ // If marked we have nothing to do.
+ if((mbits->bits&bitMarked) != 0)
+ return wbuf;
+
+ // Each byte of GC bitmap holds info for two words.
+ // If the current object is larger than two words, or if the object is one word
+ // but the object it shares the byte with is already marked,
+ // then all the possible concurrent updates are trying to set the same bit,
+ // so we can use a non-atomic update.
+ if((mbits->xbits&(bitMask|(bitMask<<gcBits))) != (bitBoundary|(bitBoundary<<gcBits)) || runtime·work.nproc == 1)
+ *mbits->bitp = mbits->xbits | (bitMarked<<mbits->shift);
+ else
+ runtime·atomicor8(mbits->bitp, bitMarked<<mbits->shift);
+ }
+
+ if (!checkmark && (((mbits->xbits>>(mbits->shift+2))&BitsMask) == BitsDead))
+ return wbuf; // noscan object
+
+ // Queue the obj for scanning. The PREFETCH(obj) logic has been removed but
+ // seems like a nice optimization that can be added back in.
+ // There needs to be time between the PREFETCH and the use.
+ // Previously we put the obj in an 8 element buffer that is drained at a rate
+ // to give the PREFETCH time to do its work.
+ // Use of PREFETCHNTA might be more appropriate than PREFETCH
+
+ // If workbuf is full, obtain an empty one.
+ if(wbuf->nobj >= nelem(wbuf->obj)) {
+ wbuf = getempty(wbuf);
+ }
+
+ wbuf->obj[wbuf->nobj] = obj;
+ wbuf->nobj++;
+ return wbuf;
+}
+
+// Scan the object b of size n, adding pointers to wbuf.
+// Return possibly new wbuf to use.
+// If ptrmask != nil, it specifies where pointers are in b.
+// If ptrmask == nil, the GC bitmap should be consulted.
+// In this case, n may be an overestimate of the size; the GC bitmap
+// must also be used to make sure the scan stops at the end of b.
+static Workbuf*
+scanobject(byte *b, uintptr n, byte *ptrmask, Workbuf *wbuf)
+{
+ byte *obj, *arena_start, *arena_used, *ptrbitp;
+ uintptr i, j;
+ int32 bits;
+ Markbits mbits;
+
+ arena_start = (byte*)runtime·mheap.arena_start;
+ arena_used = runtime·mheap.arena_used;
ptrbitp = nil;
+ // Find bits of the beginning of the object.
+ if(ptrmask == nil) {
+ b = objectstart(b, &mbits);
+ if(b == nil)
+ return wbuf;
+ ptrbitp = mbits.bitp; //arena_start - off/wordsPerBitmapByte - 1;
+ }
+ for(i = 0; i < n; i += PtrSize) {
+ // Find bits for this word.
+ if(ptrmask != nil) {
+ // dense mask (stack or data)
+ bits = (ptrmask[(i/PtrSize)/4]>>(((i/PtrSize)%4)*BitsPerPointer))&BitsMask;
+ } else {
+ // Check if we have reached end of span.
+ // n is an overestimate of the size of the object.
+ if((((uintptr)b+i)%PageSize) == 0 &&
+ runtime·mheap.spans[(b-arena_start)>>PageShift] != runtime·mheap.spans[(b+i-arena_start)>>PageShift])
+ break;
+ // Consult GC bitmap.
+ bits = *ptrbitp;
+ if(wordsPerBitmapByte != 2)
+ runtime·throw("alg doesn't work for wordsPerBitmapByte != 2");
+ j = ((uintptr)b+i)/PtrSize & 1; // j indicates upper nibble or lower nibble
+ bits >>= gcBits*j;
+ if(i == 0)
+ bits &= ~bitBoundary;
+ ptrbitp -= j;
+
+ if((bits&bitBoundary) != 0 && i != 0)
+ break; // reached beginning of the next object
+ bits = (bits&bitPtrMask)>>2; // bits refer to the type bits.
+
+ if(i != 0 && bits == BitsDead) // BitsDead in first nibble not valid during checkmark
+ break; // reached no-scan part of the object
+ }
+
+ if(bits <= BitsScalar) // Bits Scalar ||
+ // BitsDead || // default encoding
+ // BitsScalarMarked // checkmark encoding
+ continue;
+
+ if((bits&BitsPointer) != BitsPointer) {
+ runtime·printf("gc checkmark=%d, b=%p ptrmask=%p, mbits.bitp=%p, mbits.xbits=%x, bits=%x\n", checkmark, b, ptrmask, mbits.bitp, mbits.xbits, bits);
+ runtime·throw("unexpected garbage collection bits");
+ }
+
+ obj = *(byte**)(b+i);
+ // At this point we have extracted the next potential pointer.
+ // Check if it points into heap.
+ if(obj == nil || obj < arena_start || obj >= arena_used)
+ continue;
+ // Mark the object. return some important bits.
+ // We we combine the following two rotines we don't have to pass mbits or obj around.
+ obj = objectstart(obj, &mbits);
+ // In the case of the span being MSpan_Stack mbits is useless and will not have
+ // the boundary bit set. It does not need to be greyed since it will be
+ // scanned using the scan stack mechanism.
+ if(obj == nil)
+ continue;
+ wbuf = greyobject(obj, &mbits, wbuf);
+ }
+ return wbuf;
+}
+
+// scanblock starts by scanning b as scanobject would.
+// If the gcphase is GCscan, that's all scanblock does.
+// Otherwise it traverses some fraction of the pointers it found in b, recursively.
+// As a special case, scanblock(nil, 0, nil) means to scan previously queued work,
+// stopping only when no work is left in the system.
+static void
+scanblock(byte *b, uintptr n, byte *ptrmask)
+{
+ Workbuf *wbuf;
+ bool keepworking;
+
+ wbuf = getpartialorempty();
+ if(b != nil) {
+ wbuf = scanobject(b, n, ptrmask, wbuf);
+ if(runtime·gcphase == GCscan) {
+ if(inheap(b) && !ptrmask)
+ // b is in heap, we are in GCscan so there should be a ptrmask.
+ runtime·throw("scanblock: In GCscan phase and inheap is true.");
+ // GCscan only goes one level deep since mark wb not turned on.
+ putpartial(wbuf);
+ return;
+ }
+ }
+ if(runtime·gcphase == GCscan) {
+ runtime·throw("scanblock: In GCscan phase but no b passed in.");
+ }
+
+ keepworking = b == nil;
+
// ptrmask can have 2 possible values:
// 1. nil - obtain pointer mask from GC bitmap.
// 2. pointer to a compact mask (for stacks and data).
- if(b != nil)
- goto scanobj;
for(;;) {
- if(nobj == 0) {
- // Out of work in workbuf.
- // First, see is there is any work in scanbuf.
- for(i = 0; i < nelem(scanbuf); i++) {
- b = scanbuf[scanbufpos];
- scanbuf[scanbufpos++] = nil;
- scanbufpos %= nelem(scanbuf);
- if(b != nil) {
- n = arena_used - b; // scan until bitBoundary or BitsDead
- ptrmask = nil; // use GC bitmap for pointer info
- goto scanobj;
- }
- }
+ if(wbuf->nobj == 0) {
if(!keepworking) {
putempty(wbuf);
return;
}
// Refill workbuf from global queue.
wbuf = getfull(wbuf);
- if(wbuf == nil)
+ if(wbuf == nil) // nil means out of work barrier reached
return;
- nobj = wbuf->nobj;
- wp = &wbuf->obj[nobj];
+
+ if(wbuf->nobj<=0) {
+ runtime·throw("runtime:scanblock getfull returns empty buffer");
+ }
+
}
// If another proc wants a pointer, give it some.
- if(runtime·work.nwait > 0 && nobj > 4 && runtime·work.full == 0) {
- wbuf->nobj = nobj;
+ if(runtime·work.nwait > 0 && wbuf->nobj > 4 && runtime·work.full == 0) {
wbuf = handoff(wbuf);
- nobj = wbuf->nobj;
- wp = &wbuf->obj[nobj];
}
- wp--;
- nobj--;
- b = *wp;
- n = arena_used - b; // scan until next bitBoundary or BitsDead
- ptrmask = nil; // use GC bitmap for pointer info
-
- scanobj:
- if(DebugPtrs)
- runtime·printf("scanblock %p +%p %p\n", b, n, ptrmask);
- // Find bits of the beginning of the object.
- if(ptrmask == nil) {
- off = (uintptr*)b - (uintptr*)arena_start;
- ptrbitp = arena_start - off/wordsPerBitmapByte - 1;
- }
- for(i = 0; i < n; i += PtrSize) {
- obj = nil;
- // Find bits for this word.
- if(ptrmask == nil) {
- // Check is we have reached end of span.
- if((((uintptr)b+i)%PageSize) == 0 &&
- runtime·mheap.spans[(b-arena_start)>>PageShift] != runtime·mheap.spans[(b+i-arena_start)>>PageShift])
- break;
- // Consult GC bitmap.
- bits = *ptrbitp;
-
- if(wordsPerBitmapByte != 2)
- runtime·throw("alg doesn't work for wordsPerBitmapByte != 2");
- j = ((uintptr)b+i)/PtrSize & 1;
- ptrbitp -= j;
- bits >>= gcBits*j;
-
- if((bits&bitBoundary) != 0 && i != 0)
- break; // reached beginning of the next object
- bits = (bits>>2)&BitsMask;
- if(bits == BitsDead)
- break; // reached no-scan part of the object
- } else // dense mask (stack or data)
- bits = (ptrmask[(i/PtrSize)/4]>>(((i/PtrSize)%4)*BitsPerPointer))&BitsMask;
-
- if(bits <= BitsScalar) // BitsScalar || BitsDead
- continue;
- if(bits == BitsPointer) {
- obj = *(byte**)(b+i);
- obj0 = obj;
- goto markobj;
- }
-
- // With those three out of the way, must be multi-word.
- if(Debug && bits != BitsMultiWord)
- runtime·throw("unexpected garbage collection bits");
- // Find the next pair of bits.
- if(ptrmask == nil) {
- bits = *ptrbitp;
- j = ((uintptr)b+i+PtrSize)/PtrSize & 1;
- ptrbitp -= j;
- bits >>= gcBits*j;
- bits = (bits>>2)&BitsMask;
- } else
- bits = (ptrmask[((i+PtrSize)/PtrSize)/4]>>((((i+PtrSize)/PtrSize)%4)*BitsPerPointer))&BitsMask;
-
- if(Debug && bits != BitsIface && bits != BitsEface)
- runtime·throw("unexpected garbage collection bits");
-
- if(bits == BitsIface) {
- iface = (Iface*)(b+i);
- if(iface->tab != nil) {
- typ = iface->tab->type;
- if(!(typ->kind&KindDirectIface) || !(typ->kind&KindNoPointers))
- obj = iface->data;
- }
- } else {
- eface = (Eface*)(b+i);
- typ = eface->type;
- if(typ != nil) {
- if(!(typ->kind&KindDirectIface) || !(typ->kind&KindNoPointers))
- obj = eface->data;
- }
- }
-
- i += PtrSize;
-
- obj0 = obj;
- markobj:
- // At this point we have extracted the next potential pointer.
- // Check if it points into heap.
- if(obj == nil)
- continue;
- if(obj < arena_start || obj >= arena_used) {
- if((uintptr)obj < PhysPageSize && runtime·invalidptr) {
- s = nil;
- goto badobj;
- }
- continue;
- }
- // Mark the object.
- obj = (byte*)((uintptr)obj & ~(PtrSize-1));
- off = (uintptr*)obj - (uintptr*)arena_start;
- bitp = arena_start - off/wordsPerBitmapByte - 1;
- shift = (off % wordsPerBitmapByte) * gcBits;
- xbits = *bitp;
- bits = (xbits >> shift) & bitMask;
- if((bits&bitBoundary) == 0) {
- // Not a beginning of a block, consult span table to find the block beginning.
- k = (uintptr)obj>>PageShift;
- x = k;
- x -= (uintptr)arena_start>>PageShift;
- s = runtime·mheap.spans[x];
- if(s == nil || k < s->start || obj >= s->limit || s->state != MSpanInUse) {
- // Stack pointers lie within the arena bounds but are not part of the GC heap.
- // Ignore them.
- if(s != nil && s->state == MSpanStack)
- continue;
-
- badobj:
- // If cgo_allocate is linked into the binary, it can allocate
- // memory as []unsafe.Pointer that may not contain actual
- // pointers and must be scanned conservatively.
- // In this case alone, allow the bad pointer.
- if(have_cgo_allocate() && ptrmask == nil)
- continue;
-
- // Anything else indicates a bug somewhere.
- // If we're in the middle of chasing down a different bad pointer,
- // don't confuse the trace by printing about this one.
- if(nbadblock > 0)
- continue;
-
- runtime·printf("runtime: garbage collector found invalid heap pointer *(%p+%p)=%p", b, i, obj);
- if(s == nil)
- runtime·printf(" s=nil\n");
- else
- runtime·printf(" span=%p-%p-%p state=%d\n", (uintptr)s->start<<PageShift, s->limit, (uintptr)(s->start+s->npages)<<PageShift, s->state);
- if(ptrmask != nil)
- runtime·throw("invalid heap pointer");
- // Add to badblock list, which will cause the garbage collection
- // to keep repeating until it has traced the chain of pointers
- // leading to obj all the way back to a root.
- if(nbadblock == 0)
- badblock[nbadblock++] = (uintptr)b;
- continue;
- }
- p = (byte*)((uintptr)s->start<<PageShift);
- if(s->sizeclass != 0) {
- size = s->elemsize;
- idx = ((byte*)obj - p)/size;
- p = p+idx*size;
- }
- if(p == obj) {
- runtime·printf("runtime: failed to find block beginning for %p s=%p s->limit=%p\n",
- p, s->start*PageSize, s->limit);
- runtime·throw("failed to find block beginning");
- }
- obj = p;
- goto markobj;
- }
- if(DebugPtrs)
- runtime·printf("scan *%p = %p => base %p\n", b+i, obj0, obj);
-
- if(nbadblock > 0 && (uintptr)obj == badblock[nbadblock-1]) {
- // Running garbage collection again because
- // we want to find the path from a root to a bad pointer.
- // Found possible next step; extend or finish path.
- for(j=0; j<nbadblock; j++)
- if(badblock[j] == (uintptr)b)
- goto AlreadyBad;
- runtime·printf("runtime: found *(%p+%p) = %p+%p\n", b, i, obj0, (uintptr)(obj-obj0));
- if(ptrmask != nil)
- runtime·throw("bad pointer");
- if(nbadblock >= nelem(badblock))
- runtime·throw("badblock trace too long");
- badblock[nbadblock++] = (uintptr)b;
- AlreadyBad:;
- }
-
- // Now we have bits, bitp, and shift correct for
- // obj pointing at the base of the object.
- // Only care about not marked objects.
- if((bits&bitMarked) != 0)
- continue;
- // If obj size is greater than 8, then each byte of GC bitmap
- // contains info for at most one object. In such case we use
- // non-atomic byte store to mark the object. This can lead
- // to double enqueue of the object for scanning, but scanning
- // is an idempotent operation, so it is OK. This cannot lead
- // to bitmap corruption because the single marked bit is the
- // only thing that can change in the byte.
- // For 8-byte objects we use non-atomic store, if the other
- // quadruple is already marked. Otherwise we resort to CAS
- // loop for marking.
- if((xbits&(bitMask|(bitMask<<gcBits))) != (bitBoundary|(bitBoundary<<gcBits)) ||
- runtime·work.nproc == 1)
- *bitp = xbits | (bitMarked<<shift);
- else
- runtime·atomicor8(bitp, bitMarked<<shift);
-
- if(((xbits>>(shift+2))&BitsMask) == BitsDead)
- continue; // noscan object
-
- // Queue the obj for scanning.
- PREFETCH(obj);
- p = scanbuf[scanbufpos];
- scanbuf[scanbufpos++] = obj;
- scanbufpos %= nelem(scanbuf);
- if(p == nil)
- continue;
-
- // If workbuf is full, obtain an empty one.
- if(nobj >= nelem(wbuf->obj)) {
- wbuf->nobj = nobj;
- wbuf = getempty(wbuf);
- nobj = wbuf->nobj;
- wp = &wbuf->obj[nobj];
- }
- *wp = p;
- wp++;
- nobj++;
- }
- if(DebugPtrs)
- runtime·printf("end scanblock %p +%p %p\n", b, n, ptrmask);
-
- if(Debug && ptrmask == nil) {
- // For heap objects ensure that we did not overscan.
- n = 0;
- p = nil;
- if(!runtime·mlookup(b, &p, &n, nil) || b != p || i > n) {
- runtime·printf("runtime: scanned (%p,%p), heap object (%p,%p)\n", b, i, p, n);
- runtime·throw("scanblock: scanned invalid object");
- }
- }
+ // This might be a good place to add prefetch code...
+ // if(wbuf->nobj > 4) {
+ // PREFETCH(wbuf->obj[wbuf->nobj - 3];
+ // }
+ --wbuf->nobj;
+ b = wbuf->obj[wbuf->nobj];
+ wbuf = scanobject(b, runtime·mheap.arena_used - b, nil, wbuf);
}
}
@@ -484,7 +665,7 @@ markroot(ParFor *desc, uint32 i)
void *p;
uint32 status;
bool restart;
-
+
USED(&desc);
// Note: if you add a case here, please also update heapdump.c:dumproots.
switch(i) {
@@ -511,7 +692,8 @@ markroot(ParFor *desc, uint32 i)
s = runtime·work.spans[spanidx];
if(s->state != MSpanInUse)
continue;
- if(s->sweepgen != sg) {
+ if(!checkmark && s->sweepgen != sg) {
+ // sweepgen was updated (+2) during non-checkmark GC pass
runtime·printf("sweep %d %d\n", s->sweepgen, sg);
runtime·throw("gc: unswept span");
}
@@ -523,14 +705,16 @@ markroot(ParFor *desc, uint32 i)
spf = (SpecialFinalizer*)sp;
// A finalizer can be set for an inner byte of an object, find object beginning.
p = (void*)((s->start << PageShift) + spf->special.offset/s->elemsize*s->elemsize);
- scanblock(p, s->elemsize, nil);
+ if(runtime·gcphase != GCscan)
+ scanblock(p, s->elemsize, nil); // Scanned during mark phase
scanblock((void*)&spf->fn, PtrSize, oneptr);
}
}
break;
case RootFlushCaches:
- flushallmcaches();
+ if (runtime·gcphase != GCscan) // Do not flush mcaches during GCscan phase.
+ flushallmcaches();
break;
default:
@@ -540,17 +724,37 @@ markroot(ParFor *desc, uint32 i)
gp = runtime·allg[i - RootCount];
// remember when we've first observed the G blocked
// needed only to output in traceback
- status = runtime·readgstatus(gp);
+ status = runtime·readgstatus(gp); // We are not in a scan state
if((status == Gwaiting || status == Gsyscall) && gp->waitsince == 0)
gp->waitsince = runtime·work.tstart;
- // Shrink a stack if not much of it is being used.
- runtime·shrinkstack(gp);
- if(runtime·readgstatus(gp) == Gdead)
+ // Shrink a stack if not much of it is being used but not in the scan phase.
+ if (runtime·gcphase != GCscan) // Do not shrink during GCscan phase.
+ runtime·shrinkstack(gp);
+ if(runtime·readgstatus(gp) == Gdead)
gp->gcworkdone = true;
else
gp->gcworkdone = false;
restart = runtime·stopg(gp);
- scanstack(gp);
+
+ // goroutine will scan its own stack when it stops running.
+ // Wait until it has.
+ while(runtime·readgstatus(gp) == Grunning && !gp->gcworkdone) {
+ }
+
+ // scanstack(gp) is done as part of gcphasework
+ // But to make sure we finished we need to make sure that
+ // the stack traps have all responded so drop into
+ // this while loop until they respond.
+ while(!gp->gcworkdone){
+ status = runtime·readgstatus(gp);
+ if(status == Gdead) {
+ gp->gcworkdone = true; // scan is a noop
+ break;
+ //do nothing, scan not needed.
+ }
+ if(status == Gwaiting || status == Grunnable)
+ restart = runtime·stopg(gp);
+ }
if(restart)
runtime·restartg(gp);
break;
@@ -562,53 +766,95 @@ markroot(ParFor *desc, uint32 i)
static Workbuf*
getempty(Workbuf *b)
{
- MCache *c;
-
- if(b != nil)
- runtime·lfstackpush(&runtime·work.full, &b->node);
- b = nil;
- c = g->m->mcache;
- if(c->gcworkbuf != nil) {
- b = c->gcworkbuf;
- c->gcworkbuf = nil;
+ if(b != nil) {
+ putfull(b);
+ b = nil;
}
- if(b == nil)
+ if(runtime·work.empty)
b = (Workbuf*)runtime·lfstackpop(&runtime·work.empty);
- if(b == nil)
+
+ if(b && b->nobj != 0) {
+ runtime·printf("m%d: getempty: popped b=%p with non-zero b->nobj=%d\n", g->m->id, b, (uint32)b->nobj);
+ runtime·throw("getempty: workbuffer not empty, b->nobj not 0");
+ }
+ if(b == nil) {
b = runtime·persistentalloc(sizeof(*b), CacheLineSize, &mstats.gc_sys);
- b->nobj = 0;
+ b->nobj = 0;
+ }
return b;
}
static void
putempty(Workbuf *b)
{
- MCache *c;
-
- c = g->m->mcache;
- if(c->gcworkbuf == nil) {
- c->gcworkbuf = b;
- return;
+ if(b->nobj != 0) {
+ runtime·throw("putempty: b->nobj not 0\n");
}
runtime·lfstackpush(&runtime·work.empty, &b->node);
}
-void
-runtime·gcworkbuffree(void *b)
+// Put a full or partially full workbuf on the full list.
+static void
+putfull(Workbuf *b)
{
- if(b != nil)
- putempty(b);
+ if(b->nobj <= 0) {
+ runtime·throw("putfull: b->nobj <= 0\n");
+ }
+ runtime·lfstackpush(&runtime·work.full, &b->node);
}
-// Get a full work buffer off the work.full list, or return nil.
+// Get an partially empty work buffer
+// if none are available get an empty one.
+static Workbuf*
+getpartialorempty(void)
+{
+ Workbuf *b;
+
+ b = (Workbuf*)runtime·lfstackpop(&runtime·work.partial);
+ if(b == nil)
+ b = getempty(nil);
+ return b;
+}
+
+static void
+putpartial(Workbuf *b)
+{
+
+ if(b->nobj == 0)
+ runtime·lfstackpush(&runtime·work.empty, &b->node);
+ else if (b->nobj < nelem(b->obj))
+ runtime·lfstackpush(&runtime·work.partial, &b->node);
+ else if (b->nobj == nelem(b->obj))
+ runtime·lfstackpush(&runtime·work.full, &b->node);
+ else {
+ runtime·printf("b=%p, b->nobj=%d, nelem(b->obj)=%d\n", b, (uint32)b->nobj, (uint32)nelem(b->obj));
+ runtime·throw("putpartial: bad Workbuf b->nobj");
+ }
+}
+
+// Get a full work buffer off the work.full or a partially
+// filled one off the work.partial list. If nothing is available
+// wait until all the other gc helpers have finished and then
+// return nil.
+// getfull acts as a barrier for work.nproc helpers. As long as one
+// gchelper is actively marking objects it
+// may create a workbuffer that the other helpers can work on.
+// The for loop either exits when a work buffer is found
+// or when _all_ of the work.nproc GC helpers are in the loop
+// looking for work and thus not capable of creating new work.
+// This is in fact the termination condition for the STW mark
+// phase.
static Workbuf*
getfull(Workbuf *b)
{
int32 i;
if(b != nil)
- runtime·lfstackpush(&runtime·work.empty, &b->node);
+ putempty(b);
+
b = (Workbuf*)runtime·lfstackpop(&runtime·work.full);
+ if(b==nil)
+ b = (Workbuf*)runtime·lfstackpop(&runtime·work.partial);
if(b != nil || runtime·work.nproc == 1)
return b;
@@ -617,7 +863,9 @@ getfull(Workbuf *b)
if(runtime·work.full != 0) {
runtime·xadd(&runtime·work.nwait, -1);
b = (Workbuf*)runtime·lfstackpop(&runtime·work.full);
- if(b != nil)
+ if(b==nil)
+ b = (Workbuf*)runtime·lfstackpop(&runtime·work.partial);
+ if(b != nil)
return b;
runtime·xadd(&runtime·work.nwait, +1);
}
@@ -737,7 +985,7 @@ scanframe(Stkframe *frame, void *unused)
}
bv = runtime·stackmapdata(stackmap, pcdata);
}
- scanblock((byte*)frame->argp, bv.n/BitsPerPointer*PtrSize, bv.bytedata);
+ scanblock((byte*)frame->argp, bv.n/BitsPerPointer*PtrSize, bv.bytedata);
}
return true;
}
@@ -760,8 +1008,7 @@ scanstack(G *gp)
case Gdead:
return;
case Grunning:
- runtime·printf("runtime: gp=%p, goid=%D, gp->atomicstatus=%d\n", gp, gp->goid, runtime·readgstatus(gp));
- runtime·throw("mark - world not stopped");
+ runtime·throw("scanstack: - goroutine not stopped");
case Grunnable:
case Gsyscall:
case Gwaiting:
@@ -778,8 +1025,117 @@ scanstack(G *gp)
runtime·tracebackdefers(gp, &fn, nil);
}
-// The gp has been moved to a gc safepoint. If there is gcphase specific
-// work it is done here.
+// If the slot is grey or black return true, if white return false.
+// If the slot is not in the known heap and thus does not have a valid GC bitmap then
+// it is considered grey. Globals and stacks can hold such slots.
+// The slot is grey if its mark bit is set and it is enqueued to be scanned.
+// The slot is black if it has already been scanned.
+// It is white if it has a valid mark bit and the bit is not set.
+static bool
+shaded(byte *slot)
+{
+ Markbits mbits;
+ byte *valid;
+
+ if(!inheap(slot)) // non-heap slots considered grey
+ return true;
+
+ valid = objectstart(slot, &mbits);
+ if(valid == nil)
+ return true;
+
+ if(checkmark)
+ return ischeckmarked(&mbits);
+
+ return (mbits.bits&bitMarked) != 0;
+}
+
+// Shade the object if it isn't already.
+// The object is not nil and known to be in the heap.
+static void
+shade(byte *b)
+{
+ byte *obj;
+ Workbuf *wbuf;
+ Markbits mbits;
+
+ if(!inheap(b))
+ runtime·throw("shade: passed an address not in the heap");
+
+ wbuf = getpartialorempty();
+ // Mark the object, return some important bits.
+ // If we combine the following two rotines we don't have to pass mbits or obj around.
+ obj = objectstart(b, &mbits);
+ if(obj != nil)
+ wbuf = greyobject(obj, &mbits, wbuf); // augments the wbuf
+
+ putpartial(wbuf);
+ return;
+}
+
+// This is the Dijkstra barrier coarsened to always shade the ptr (dst) object.
+// The original Dijkstra barrier only shaded ptrs being placed in black slots.
+//
+// Shade indicates that it has seen a white pointer by adding the referent
+// to wbuf as well as marking it.
+//
+// slot is the destination (dst) in go code
+// ptr is the value that goes into the slot (src) in the go code
+//
+// Dijkstra pointed out that maintaining the no black to white
+// pointers means that white to white pointers not need
+// to be noted by the write barrier. Furthermore if either
+// white object dies before it is reached by the
+// GC then the object can be collected during this GC cycle
+// instead of waiting for the next cycle. Unfortunately the cost of
+// ensure that the object holding the slot doesn't concurrently
+// change to black without the mutator noticing seems prohibitive.
+//
+// Consider the following example where the mutator writes into
+// a slot and then loads the slot's mark bit while the GC thread
+// writes to the slot's mark bit and then as part of scanning reads
+// the slot.
+//
+// Initially both [slot] and [slotmark] are 0 (nil)
+// Mutator thread GC thread
+// st [slot], ptr st [slotmark], 1
+//
+// ld r1, [slotmark] ld r2, [slot]
+//
+// This is a classic example of independent reads of independent writes,
+// aka IRIW. The question is if r1==r2==0 is allowed and for most HW the
+// answer is yes without inserting a memory barriers between the st and the ld.
+// These barriers are expensive so we have decided that we will
+// always grey the ptr object regardless of the slot's color.
+//
+void
+runtime·gcmarkwb_m()
+{
+ byte *ptr;
+ ptr = (byte*)g->m->scalararg[1];
+
+ switch(runtime·gcphase) {
+ default:
+ runtime·throw("gcphasework in bad gcphase");
+ case GCoff:
+ case GCquiesce:
+ case GCstw:
+ case GCsweep:
+ case GCscan:
+ break;
+ case GCmark:
+ if(ptr != nil && inheap(ptr))
+ shade(ptr);
+ break;
+ case GCmarktermination:
+ if(ptr != nil && inheap(ptr))
+ shade(ptr);
+ break;
+ }
+}
+
+// The gp has been moved to a GC safepoint. GC phase specific
+// work is done here.
void
runtime·gcphasework(G *gp)
{
@@ -790,12 +1146,18 @@ runtime·gcphasework(G *gp)
case GCquiesce:
case GCstw:
case GCsweep:
- // No work for now.
+ // No work.
+ break;
+ case GCscan:
+ // scan the stack, mark the objects, put pointers in work buffers
+ // hanging off the P where this is being run.
+ scanstack(gp);
break;
case GCmark:
- // Disabled until concurrent GC is implemented
- // but indicate the scan has been done.
- // scanstack(gp);
+ break;
+ case GCmarktermination:
+ scanstack(gp);
+ // All available mark work will be emptied before returning.
break;
}
gp->gcworkdone = true;
@@ -885,6 +1247,7 @@ runtime·iterate_finq(void (*callback)(FuncVal*, byte*, uintptr, Type*, PtrType*
}
}
+// Returns only when span s has been swept.
void
runtime·MSpan_EnsureSwept(MSpan *s)
{
@@ -899,6 +1262,7 @@ runtime·MSpan_EnsureSwept(MSpan *s)
sg = runtime·mheap.sweepgen;
if(runtime·atomicload(&s->sweepgen) == sg)
return;
+ // The caller must be sure that the span is a MSpanInUse span.
if(runtime·cas(&s->sweepgen, sg-2, sg-1)) {
runtime·MSpan_Sweep(s, false);
return;
@@ -926,6 +1290,9 @@ runtime·MSpan_Sweep(MSpan *s, bool preserve)
Special *special, **specialp, *y;
bool res, sweepgenset;
+ if(checkmark)
+ runtime·throw("MSpan_Sweep: checkmark only runs in STW and after the sweep.");
+
// It's critical that we enter this function with preemption disabled,
// GC must not start while we are in the middle of this function.
if(g->m->locks == 0 && g->m->mallocing == 0 && g != g->m->g0)
@@ -1173,6 +1540,7 @@ runtime·gosweepdone(void)
return runtime·mheap.sweepdone;
}
+
void
runtime·gchelper(void)
{
@@ -1181,13 +1549,11 @@ runtime·gchelper(void)
g->m->traceback = 2;
gchelperstart();
- // parallel mark for over gc roots
+ // parallel mark for over GC roots
runtime·parfordo(runtime·work.markfor);
-
- // help other threads scan secondary blocks
- scanblock(nil, 0, nil);
-
- nproc = runtime·work.nproc; // runtime·work.nproc can change right after we increment runtime·work.ndone
+ if(runtime·gcphase != GCscan)
+ scanblock(nil, 0, nil); // blocks in getfull
+ nproc = runtime·work.nproc; // work.nproc can change right after we increment work.ndone
if(runtime·xadd(&runtime·work.ndone, +1) == nproc-1)
runtime·notewakeup(&runtime·work.alldone);
g->m->traceback = 0;
@@ -1353,6 +1719,7 @@ runtime·gcinit(void)
runtime·gcbssmask = unrollglobgcprog(runtime·gcbss, runtime·ebss - runtime·bss);
}
+// Called from malloc.go using onM, stopping and starting the world handled in caller.
void
runtime·gc_m(void)
{
@@ -1366,17 +1733,296 @@ runtime·gc_m(void)
a.start_time = (uint64)(g->m->scalararg[0]) | ((uint64)(g->m->scalararg[1]) << 32);
a.eagersweep = g->m->scalararg[2];
gc(&a);
+ runtime·casgstatus(gp, Gwaiting, Grunning);
+}
+
+// Similar to clearcheckmarkbits but works on a single span.
+// It preforms two tasks.
+// 1. When used before the checkmark phase it converts BitsDead (00) to bitsScalar (01)
+// for nibbles with the BoundaryBit set.
+// 2. When used after the checkmark phase it converts BitsPointerMark (11) to BitsPointer 10 and
+// BitsScalarMark (00) to BitsScalar (01), thus clearing the checkmark mark encoding.
+// For the second case it is possible to restore the BitsDead pattern but since
+// clearmark is a debug tool performance has a lower priority than simplicity.
+// The span is MSpanInUse and the world is stopped.
+static void
+clearcheckmarkbitsspan(MSpan *s)
+{
+ int32 cl, n, npages, i;
+ uintptr size, off, step;
+ byte *p, *bitp, *arena_start, b;
+
+ if(s->state != MSpanInUse) {
+ runtime·printf("runtime:clearcheckmarkbitsspan: state=%d\n",
+ s->state);
+ runtime·throw("clearcheckmarkbitsspan: bad span state");
+ }
+ arena_start = runtime·mheap.arena_start;
+ cl = s->sizeclass;
+ size = s->elemsize;
+ if(cl == 0) {
+ n = 1;
+ } else {
+ // Chunk full of small blocks.
+ npages = runtime·class_to_allocnpages[cl];
+ n = (npages << PageShift) / size;
+ }
+
+ // MSpan_Sweep has similar code but instead of overloading and
+ // complicating that routine we do a simpler walk here.
+ // Sweep through n objects of given size starting at p.
+ // This thread owns the span now, so it can manipulate
+ // the block bitmap without atomic operations.
+ p = (byte*)(s->start << PageShift);
+ // Find bits for the beginning of the span.
+ off = (uintptr*)p - (uintptr*)arena_start;
+ bitp = arena_start - off/wordsPerBitmapByte - 1;
+ step = size/(PtrSize*wordsPerBitmapByte);
+
+ // The type bit values are:
+ // 00 - BitsDead, for us BitsScalarMarked
+ // 01 - BitsScalar
+ // 10 - BitsPointer
+ // 11 - unused, for us BitsPointerMarked
+ //
+ // When called to prepare for the checkmark phase (checkmark==1),
+ // we change BitsDead to BitsScalar, so that there are no BitsScalarMarked
+ // type bits anywhere.
+ //
+ // The checkmark phase marks by changing BitsScalar to BitsScalarMarked
+ // and BitsPointer to BitsPointerMarked.
+ //
+ // When called to clean up after the checkmark phase (checkmark==0),
+ // we unmark by changing BitsScalarMarked back to BitsScalar and
+ // BitsPointerMarked back to BitsPointer.
+ //
+ // There are two problems with the scheme as just described.
+ // First, the setup rewrites BitsDead to BitsScalar, but the type bits
+ // following a BitsDead are uninitialized and must not be used.
+ // Second, objects that are free are expected to have their type
+ // bits zeroed (BitsDead), so in the cleanup we need to restore
+ // any BitsDeads that were there originally.
+ //
+ // In a one-word object (8-byte allocation on 64-bit system),
+ // there is no difference between BitsScalar and BitsDead, because
+ // neither is a pointer and there are no more words in the object,
+ // so using BitsScalar during the checkmark is safe and mapping
+ // both back to BitsDead during cleanup is also safe.
+ //
+ // In a larger object, we need to be more careful. During setup,
+ // if the type of the first word is BitsDead, we change it to BitsScalar
+ // (as we must) but also initialize the type of the second
+ // word to BitsDead, so that a scan during the checkmark phase
+ // will still stop before seeing the uninitialized type bits in the
+ // rest of the object. The sequence 'BitsScalar BitsDead' never
+ // happens in real type bitmaps - BitsDead is always as early
+ // as possible, so immediately after the last BitsPointer.
+ // During cleanup, if we see a BitsScalar, we can check to see if it
+ // is followed by BitsDead. If so, it was originally BitsDead and
+ // we can change it back.
- if(nbadblock > 0) {
- // Work out path from root to bad block.
- for(;;) {
- gc(&a);
- if(nbadblock >= nelem(badblock))
- runtime·throw("cannot find path to bad pointer");
+ if(step == 0) {
+ // updating top and bottom nibbles, all boundaries
+ for(i=0; i<n/2; i++, bitp--) {
+ if((*bitp & bitBoundary) != bitBoundary)
+ runtime·throw("missing bitBoundary");
+ b = (*bitp & bitPtrMask)>>2;
+ if(!checkmark && (b == BitsScalar || b == BitsScalarMarked))
+ *bitp &= ~0x0c; // convert to BitsDead
+ else if(b == BitsScalarMarked || b == BitsPointerMarked)
+ *bitp ^= BitsCheckMarkXor<<2;
+
+ if(((*bitp>>gcBits) & bitBoundary) != bitBoundary)
+ runtime·throw("missing bitBoundary");
+ b = ((*bitp>>gcBits) & bitPtrMask)>>2;
+ if(!checkmark && (b == BitsScalar || b == BitsScalarMarked))
+ *bitp &= ~0xc0; // convert to BitsDead
+ else if(b == BitsScalarMarked || b == BitsPointerMarked)
+ *bitp ^= BitsCheckMarkXor<<(2+gcBits);
+ }
+ } else {
+ // updating bottom nibble for first word of each object
+ for(i=0; i<n; i++, bitp -= step) {
+ if((*bitp & bitBoundary) != bitBoundary)
+ runtime·throw("missing bitBoundary");
+ b = (*bitp & bitPtrMask)>>2;
+
+ if(checkmark && b == BitsDead) {
+ // move BitsDead into second word.
+ // set bits to BitsScalar in preparation for checkmark phase.
+ *bitp &= ~0xc0;
+ *bitp |= BitsScalar<<2;
+ } else if(!checkmark && (b == BitsScalar || b == BitsScalarMarked) && (*bitp & 0xc0) == 0) {
+ // Cleaning up after checkmark phase.
+ // First word is scalar or dead (we forgot)
+ // and second word is dead.
+ // First word might as well be dead too.
+ *bitp &= ~0x0c;
+ } else if(b == BitsScalarMarked || b == BitsPointerMarked)
+ *bitp ^= BitsCheckMarkXor<<2;
}
}
+}
- runtime·casgstatus(gp, Gwaiting, Grunning);
+// clearcheckmarkbits preforms two tasks.
+// 1. When used before the checkmark phase it converts BitsDead (00) to bitsScalar (01)
+// for nibbles with the BoundaryBit set.
+// 2. When used after the checkmark phase it converts BitsPointerMark (11) to BitsPointer 10 and
+// BitsScalarMark (00) to BitsScalar (01), thus clearing the checkmark mark encoding.
+// This is a bit expensive but preserves the BitsDead encoding during the normal marking.
+// BitsDead remains valid for every nibble except the ones with BitsBoundary set.
+static void
+clearcheckmarkbits(void)
+{
+ uint32 idx;
+ MSpan *s;
+ for(idx=0; idx<runtime·work.nspan; idx++) {
+ s = runtime·work.spans[idx];
+ if(s->state == MSpanInUse) {
+ clearcheckmarkbitsspan(s);
+ }
+ }
+}
+
+// Called from malloc.go using onM.
+// The world is stopped. Rerun the scan and mark phases
+// using the bitMarkedCheck bit instead of the
+// bitMarked bit. If the marking encounters an
+// bitMarked bit that is not set then we throw.
+void
+runtime·gccheckmark_m(void)
+{
+ if(!gccheckmarkenable)
+ return;
+
+ if(checkmark)
+ runtime·throw("gccheckmark_m, entered with checkmark already true.");
+
+ checkmark = true;
+ clearcheckmarkbits(); // Converts BitsDead to BitsScalar.
+ runtime·gc_m(); // turns off checkmark
+ // Work done, fixed up the GC bitmap to remove the checkmark bits.
+ clearcheckmarkbits();
+}
+
+// checkmarkenable is initially false
+void
+runtime·gccheckmarkenable_m(void)
+{
+ gccheckmarkenable = true;
+}
+
+void
+runtime·gccheckmarkdisable_m(void)
+{
+ gccheckmarkenable = false;
+}
+
+void
+runtime·finishsweep_m(void)
+{
+ uint32 i, sg;
+ MSpan *s;
+
+ // The world is stopped so we should be able to complete the sweeps
+ // quickly.
+ while(runtime·sweepone() != -1)
+ runtime·sweep.npausesweep++;
+
+ // There may be some other spans being swept concurrently that
+ // we need to wait for. If finishsweep_m is done with the world stopped
+ // this code is not required.
+ sg = runtime·mheap.sweepgen;
+ for(i=0; i<runtime·work.nspan; i++) {
+ s = runtime·work.spans[i];
+ if(s->sweepgen == sg) {
+ continue;
+ }
+ if(s->state != MSpanInUse) // Span is not part of the GCed heap so no need to ensure it is swept.
+ continue;
+ runtime·MSpan_EnsureSwept(s);
+ }
+}
+
+// Scan all of the stacks, greying (or graying if in America) the referents
+// but not blackening them since the mark write barrier isn't installed.
+void
+runtime·gcscan_m(void)
+{
+ uint32 i, allglen, oldphase;
+ G *gp, *mastergp, **allg;
+
+ // Grab the g that called us and potentially allow rescheduling.
+ // This allows it to be scanned like other goroutines.
+ mastergp = g->m->curg;
+
+ runtime·casgstatus(mastergp, Grunning, Gwaiting);
+ mastergp->waitreason = runtime·gostringnocopy((byte*)"garbage collection scan");
+
+ // Span sweeping has been done by finishsweep_m.
+ // Long term we will want to make this goroutine runnable
+ // by placing it onto a scanenqueue state and then calling
+ // runtime·restartg(mastergp) to make it Grunnable.
+ // At the bottom we will want to return this p back to the scheduler.
+
+ oldphase = runtime·gcphase;
+
+ runtime·lock(&runtime·allglock);
+ allglen = runtime·allglen;
+ allg = runtime·allg;
+ // Prepare flag indicating that the scan has not been completed.
+ for(i = 0; i < allglen; i++) {
+ gp = allg[i];
+ gp->gcworkdone = false; // set to true in gcphasework
+ }
+ runtime·unlock(&runtime·allglock);
+
+ runtime·work.nwait = 0;
+ runtime·work.ndone = 0;
+ runtime·work.nproc = 1; // For now do not do this in parallel.
+ runtime·gcphase = GCscan;
+ // ackgcphase is not needed since we are not scanning running goroutines.
+ runtime·parforsetup(runtime·work.markfor, runtime·work.nproc, RootCount + allglen, nil, false, markroot);
+ runtime·parfordo(runtime·work.markfor);
+
+ runtime·lock(&runtime·allglock);
+
+ allg = runtime·allg;
+ // Check that gc work is done.
+ for(i = 0; i < allglen; i++) {
+ gp = allg[i];
+ if(!gp->gcworkdone) {
+ runtime·throw("scan missed a g");
+ }
+ }
+ runtime·unlock(&runtime·allglock);
+
+ runtime·gcphase = oldphase;
+ runtime·casgstatus(mastergp, Gwaiting, Grunning);
+ // Let the g that called us continue to run.
+}
+
+// Mark all objects that are known about.
+void
+runtime·gcmark_m(void)
+{
+ scanblock(nil, 0, nil);
+}
+
+// For now this must be bracketed with a stoptheworld and a starttheworld to ensure
+// all go routines see the new barrier.
+void
+runtime·gcinstallmarkwb_m(void)
+{
+ runtime·gcphase = GCmark;
+}
+
+// For now this must be bracketed with a stoptheworld and a starttheworld to ensure
+// all go routines see the new barrier.
+void
+runtime·gcinstalloffwb_m(void)
+{
+ runtime·gcphase = GCoff;
}
static void
@@ -1385,9 +2031,9 @@ gc(struct gc_args *args)
int64 t0, t1, t2, t3, t4;
uint64 heap0, heap1, obj;
GCStats stats;
-
- if(DebugPtrs)
- runtime·printf("GC start\n");
+ uint32 oldphase;
+ uint32 i;
+ G *gp;
if(runtime·debug.allocfreetrace)
runtime·tracegc();
@@ -1400,11 +2046,10 @@ gc(struct gc_args *args)
if(runtime·debug.gctrace)
t1 = runtime·nanotime();
- // Sweep what is not sweeped by bgsweep.
- while(runtime·sweepone() != -1)
- runtime·sweep.npausesweep++;
+ if(!checkmark)
+ runtime·finishsweep_m(); // skip during checkmark debug phase.
- // Cache runtime.mheap.allspans in work.spans to avoid conflicts with
+ // Cache runtime·mheap.allspans in work.spans to avoid conflicts with
// resizing/freeing allspans.
// New spans can be created while GC progresses, but they are not garbage for
// this round:
@@ -1421,10 +2066,19 @@ gc(struct gc_args *args)
runtime·work.spans = runtime·mheap.allspans;
runtime·work.nspan = runtime·mheap.nspan;
runtime·unlock(&runtime·mheap.lock);
+ oldphase = runtime·gcphase;
runtime·work.nwait = 0;
runtime·work.ndone = 0;
- runtime·work.nproc = runtime·gcprocs();
+ runtime·work.nproc = runtime·gcprocs();
+ runtime·gcphase = GCmarktermination;
+
+ // World is stopped so allglen will not change.
+ for(i = 0; i < runtime·allglen; i++) {
+ gp = runtime·allg[i];
+ gp->gcworkdone = false; // set to true in gcphasework
+ }
+
runtime·parforsetup(runtime·work.markfor, runtime·work.nproc, RootCount + runtime·allglen, nil, false, markroot);
if(runtime·work.nproc > 1) {
runtime·noteclear(&runtime·work.alldone);
@@ -1437,8 +2091,15 @@ gc(struct gc_args *args)
gchelperstart();
runtime·parfordo(runtime·work.markfor);
+
scanblock(nil, 0, nil);
+ if(runtime·work.full)
+ runtime·throw("runtime·work.full != nil");
+ if(runtime·work.partial)
+ runtime·throw("runtime·work.partial != nil");
+
+ runtime·gcphase = oldphase;
t3 = 0;
if(runtime·debug.gctrace)
t3 = runtime·nanotime();
@@ -1499,6 +2160,16 @@ gc(struct gc_args *args)
// Free the old cached mark array if necessary.
if(runtime·work.spans != nil && runtime·work.spans != runtime·mheap.allspans)
runtime·SysFree(runtime·work.spans, runtime·work.nspan*sizeof(runtime·work.spans[0]), &mstats.other_sys);
+
+ if(gccheckmarkenable) {
+ if(!checkmark) {
+ // first half of two-pass; don't set up sweep
+ runtime·unlock(&runtime·mheap.lock);
+ return;
+ }
+ checkmark = false; // done checking marks
+ }
+
// Cache the current array for sweeping.
runtime·mheap.gcspans = runtime·mheap.allspans;
runtime·mheap.sweepgen += 2;
@@ -1508,6 +2179,7 @@ gc(struct gc_args *args)
runtime·sweep.spanidx = 0;
runtime·unlock(&runtime·mheap.lock);
+
if(ConcurrentSweep && !args->eagersweep) {
runtime·lock(&runtime·gclock);
if(runtime·sweep.g == nil)
@@ -1527,9 +2199,6 @@ gc(struct gc_args *args)
runtime·mProf_GC();
g->m->traceback = 0;
-
- if(DebugPtrs)
- runtime·printf("GC end\n");
}
extern uintptr runtime·sizeof_C_MStats;
@@ -1784,7 +2453,7 @@ runtime·unrollgcprog_m(void)
Type *typ;
byte *mask, *prog;
uintptr pos;
- uint32 x;
+ uintptr x;
typ = g->m->ptrarg[0];
g->m->ptrarg[0] = nil;
@@ -1802,9 +2471,11 @@ runtime·unrollgcprog_m(void)
prog = (byte*)typ->gc[1];
unrollgcprog1(mask, prog, &pos, false, true);
}
+
// atomic way to say mask[0] = 1
- x = ((uint32*)mask)[0];
- runtime·atomicstore((uint32*)mask, x|1);
+ x = *(uintptr*)mask;
+ ((byte*)&x)[0] = 1;
+ runtime·atomicstorep((void**)mask, (void*)x);
}
runtime·unlock(&lock);
}
diff --git a/src/runtime/mgc0.go b/src/runtime/mgc0.go
index 3a7204b54..dc4eec519 100644
--- a/src/runtime/mgc0.go
+++ b/src/runtime/mgc0.go
@@ -83,54 +83,139 @@ func bgsweep() {
}
}
+const (
+ _PoisonGC = 0xf969696969696969 & (1<<(8*ptrSize) - 1)
+ _PoisonStack = 0x6868686868686868 & (1<<(8*ptrSize) - 1)
+)
+
// NOTE: Really dst *unsafe.Pointer, src unsafe.Pointer,
// but if we do that, Go inserts a write barrier on *dst = src.
//go:nosplit
func writebarrierptr(dst *uintptr, src uintptr) {
*dst = src
+ writebarrierptr_nostore(dst, src)
+}
+
+// Like writebarrierptr, but the store has already been applied.
+// Do not reapply.
+//go:nosplit
+func writebarrierptr_nostore(dst *uintptr, src uintptr) {
+ if getg() == nil { // very low-level startup
+ return
+ }
+
+ if src != 0 && (src < _PageSize || src == _PoisonGC || src == _PoisonStack) {
+ onM(func() { gothrow("bad pointer in write barrier") })
+ }
+
+ mp := acquirem()
+ if mp.inwb || mp.dying > 0 {
+ releasem(mp)
+ return
+ }
+ mp.inwb = true
+ oldscalar0 := mp.scalararg[0]
+ oldscalar1 := mp.scalararg[1]
+ mp.scalararg[0] = uintptr(unsafe.Pointer(dst))
+ mp.scalararg[1] = src
+ onM_signalok(gcmarkwb_m)
+ mp.scalararg[0] = oldscalar0
+ mp.scalararg[1] = oldscalar1
+ mp.inwb = false
+ releasem(mp)
}
//go:nosplit
func writebarrierstring(dst *[2]uintptr, src [2]uintptr) {
- dst[0] = src[0]
+ writebarrierptr(&dst[0], src[0])
dst[1] = src[1]
}
//go:nosplit
func writebarrierslice(dst *[3]uintptr, src [3]uintptr) {
- dst[0] = src[0]
+ writebarrierptr(&dst[0], src[0])
dst[1] = src[1]
dst[2] = src[2]
}
//go:nosplit
func writebarrieriface(dst *[2]uintptr, src [2]uintptr) {
- dst[0] = src[0]
- dst[1] = src[1]
-}
-
-//go:nosplit
-func writebarrierfat2(dst *[2]uintptr, _ *byte, src [2]uintptr) {
- dst[0] = src[0]
- dst[1] = src[1]
+ writebarrierptr(&dst[0], src[0])
+ writebarrierptr(&dst[1], src[1])
}
-//go:nosplit
-func writebarrierfat3(dst *[3]uintptr, _ *byte, src [3]uintptr) {
- dst[0] = src[0]
- dst[1] = src[1]
- dst[2] = src[2]
-}
+//go:generate go run wbfat_gen.go -- wbfat.go
+//
+// The above line generates multiword write barriers for
+// all the combinations of ptr+scalar up to four words.
+// The implementations are written to wbfat.go.
//go:nosplit
-func writebarrierfat4(dst *[4]uintptr, _ *byte, src [4]uintptr) {
- dst[0] = src[0]
- dst[1] = src[1]
- dst[2] = src[2]
- dst[3] = src[3]
+func writebarrierfat(typ *_type, dst, src unsafe.Pointer) {
+ mask := loadPtrMask(typ)
+ nptr := typ.size / ptrSize
+ for i := uintptr(0); i < nptr; i += 2 {
+ bits := mask[i/2]
+ if (bits>>2)&_BitsMask == _BitsPointer {
+ writebarrierptr((*uintptr)(dst), *(*uintptr)(src))
+ } else {
+ *(*uintptr)(dst) = *(*uintptr)(src)
+ }
+ dst = add(dst, ptrSize)
+ src = add(src, ptrSize)
+ if i+1 == nptr {
+ break
+ }
+ bits >>= 4
+ if (bits>>2)&_BitsMask == _BitsPointer {
+ writebarrierptr((*uintptr)(dst), *(*uintptr)(src))
+ } else {
+ *(*uintptr)(dst) = *(*uintptr)(src)
+ }
+ dst = add(dst, ptrSize)
+ src = add(src, ptrSize)
+ }
}
//go:nosplit
-func writebarrierfat(typ *_type, dst, src unsafe.Pointer) {
- memmove(dst, src, typ.size)
+func writebarriercopy(typ *_type, dst, src slice) int {
+ n := dst.len
+ if n > src.len {
+ n = src.len
+ }
+ if n == 0 {
+ return 0
+ }
+ dstp := unsafe.Pointer(dst.array)
+ srcp := unsafe.Pointer(src.array)
+
+ if uintptr(srcp) < uintptr(dstp) && uintptr(srcp)+uintptr(n)*typ.size > uintptr(dstp) {
+ // Overlap with src before dst.
+ // Copy backward, being careful not to move dstp/srcp
+ // out of the array they point into.
+ dstp = add(dstp, uintptr(n-1)*typ.size)
+ srcp = add(srcp, uintptr(n-1)*typ.size)
+ i := uint(0)
+ for {
+ writebarrierfat(typ, dstp, srcp)
+ if i++; i >= n {
+ break
+ }
+ dstp = add(dstp, -typ.size)
+ srcp = add(srcp, -typ.size)
+ }
+ } else {
+ // Copy forward, being careful not to move dstp/srcp
+ // out of the array they point into.
+ i := uint(0)
+ for {
+ writebarrierfat(typ, dstp, srcp)
+ if i++; i >= n {
+ break
+ }
+ dstp = add(dstp, typ.size)
+ srcp = add(srcp, typ.size)
+ }
+ }
+ return int(n)
}
diff --git a/src/runtime/mgc0.h b/src/runtime/mgc0.h
index 64f818914..519d7206e 100644
--- a/src/runtime/mgc0.h
+++ b/src/runtime/mgc0.h
@@ -45,8 +45,12 @@ enum {
// If you change these, also change scanblock.
// scanblock does "if(bits == BitsScalar || bits == BitsDead)" as "if(bits <= BitsScalar)".
BitsDead = 0,
- BitsScalar = 1,
- BitsPointer = 2,
+ BitsScalar = 1, // 01
+ BitsPointer = 2, // 10
+ BitsCheckMarkXor = 1, // 10
+ BitsScalarMarked = BitsScalar ^ BitsCheckMarkXor, // 00
+ BitsPointerMarked = BitsPointer ^ BitsCheckMarkXor, // 11
+
BitsMultiWord = 3,
// BitsMultiWord will be set for the first word of a multi-word item.
// When it is set, one of the following will be set for the second word.
@@ -56,7 +60,7 @@ enum {
BitsEface = 3,
// 64 bytes cover objects of size 1024/512 on 64/32 bits, respectively.
- MaxGCMask = 64,
+ MaxGCMask = 65536, // TODO(rsc): change back to 64
};
// Bits in per-word bitmap.
diff --git a/src/runtime/noasm_arm.go b/src/runtime/noasm.go
index dd3ef8267..43c16860b 100644
--- a/src/runtime/noasm_arm.go
+++ b/src/runtime/noasm.go
@@ -5,6 +5,8 @@
// Routines that are implemented in assembly in asm_{amd64,386}.s
// but are implemented in Go for arm.
+// +build arm power64 power64le
+
package runtime
func cmpstring(s1, s2 string) int {
diff --git a/src/runtime/os_darwin.c b/src/runtime/os_darwin.c
index bbd29282b..b866863d0 100644
--- a/src/runtime/os_darwin.c
+++ b/src/runtime/os_darwin.c
@@ -135,7 +135,10 @@ void
runtime·mpreinit(M *mp)
{
mp->gsignal = runtime·malg(32*1024); // OS X wants >=8K, Linux >=2K
+ runtime·writebarrierptr_nostore(&mp->gsignal, mp->gsignal);
+
mp->gsignal->m = mp;
+ runtime·writebarrierptr_nostore(&mp->gsignal->m, mp->gsignal->m);
}
// Called to initialize a new m (including the bootstrap m).
diff --git a/src/runtime/os_dragonfly.c b/src/runtime/os_dragonfly.c
index e372205ec..051192ad3 100644
--- a/src/runtime/os_dragonfly.c
+++ b/src/runtime/os_dragonfly.c
@@ -195,7 +195,10 @@ void
runtime·mpreinit(M *mp)
{
mp->gsignal = runtime·malg(32*1024);
+ runtime·writebarrierptr_nostore(&mp->gsignal, mp->gsignal);
+
mp->gsignal->m = mp;
+ runtime·writebarrierptr_nostore(&mp->gsignal->m, mp->gsignal->m);
}
// Called to initialize a new m (including the bootstrap m).
diff --git a/src/runtime/os_freebsd.c b/src/runtime/os_freebsd.c
index a513cb604..1c126547a 100644
--- a/src/runtime/os_freebsd.c
+++ b/src/runtime/os_freebsd.c
@@ -203,7 +203,10 @@ void
runtime·mpreinit(M *mp)
{
mp->gsignal = runtime·malg(32*1024);
+ runtime·writebarrierptr_nostore(&mp->gsignal, mp->gsignal);
+
mp->gsignal->m = mp;
+ runtime·writebarrierptr_nostore(&mp->gsignal->m, mp->gsignal->m);
}
// Called to initialize a new m (including the bootstrap m).
diff --git a/src/runtime/os_linux.c b/src/runtime/os_linux.c
index 0d8ffc995..cc23774e3 100644
--- a/src/runtime/os_linux.c
+++ b/src/runtime/os_linux.c
@@ -49,9 +49,22 @@ runtime·futexsleep(uint32 *addr, uint32 val, int64 ns)
runtime·futex(addr, FUTEX_WAIT, val, nil, nil, 0);
return;
}
- // NOTE: tv_nsec is int64 on amd64, so this assumes a little-endian system.
+
+ // It's difficult to live within the no-split stack limits here.
+ // On ARM and 386, a 64-bit divide invokes a general software routine
+ // that needs more stack than we can afford. So we use timediv instead.
+ // But on real 64-bit systems, where words are larger but the stack limit
+ // is not, even timediv is too heavy, and we really need to use just an
+ // ordinary machine instruction.
+ // Sorry for the #ifdef.
+ // For what it's worth, the #ifdef eliminated an implicit little-endian assumption.
+#ifdef _64BIT
+ ts.tv_sec = ns / 1000000000LL;
+ ts.tv_nsec = ns % 1000000000LL;
+#else
ts.tv_nsec = 0;
ts.tv_sec = runtime·timediv(ns, 1000000000LL, (int32*)&ts.tv_nsec);
+#endif
runtime·futex(addr, FUTEX_WAIT, val, &ts, nil, 0);
}
@@ -98,19 +111,22 @@ static int32
getproccount(void)
{
uintptr buf[16], t;
- int32 r, cnt, i;
+ int32 r, n, i;
- cnt = 0;
r = runtime·sched_getaffinity(0, sizeof(buf), buf);
- if(r > 0)
+ if(r <= 0)
+ return 1;
+ n = 0;
for(i = 0; i < r/sizeof(buf[0]); i++) {
t = buf[i];
- t = t - ((t >> 1) & 0x5555555555555555ULL);
- t = (t & 0x3333333333333333ULL) + ((t >> 2) & 0x3333333333333333ULL);
- cnt += (int32)((((t + (t >> 4)) & 0xF0F0F0F0F0F0F0FULL) * 0x101010101010101ULL) >> 56);
+ while(t != 0) {
+ n += t&1;
+ t >>= 1;
+ }
}
-
- return cnt ? cnt : 1;
+ if(n < 1)
+ n = 1;
+ return n;
}
// Clone, the Linux rfork.
@@ -217,7 +233,10 @@ void
runtime·mpreinit(M *mp)
{
mp->gsignal = runtime·malg(32*1024); // OS X wants >=8K, Linux >=2K
+ runtime·writebarrierptr_nostore(&mp->gsignal, mp->gsignal);
+
mp->gsignal->m = mp;
+ runtime·writebarrierptr_nostore(&mp->gsignal->m, mp->gsignal->m);
}
// Called to initialize a new m (including the bootstrap m).
@@ -298,7 +317,8 @@ runtime·setsig(int32 i, GoSighandler *fn, bool restart)
if(fn == runtime·sighandler)
fn = (void*)runtime·sigtramp;
sa.sa_handler = fn;
- if(runtime·rt_sigaction(i, &sa, nil, sizeof(sa.sa_mask)) != 0)
+ // Qemu rejects rt_sigaction of SIGRTMAX (64).
+ if(runtime·rt_sigaction(i, &sa, nil, sizeof(sa.sa_mask)) != 0 && i != 64)
runtime·throw("rt_sigaction failure");
}
diff --git a/src/runtime/os_nacl.c b/src/runtime/os_nacl.c
index 14b558303..ad72cc7c6 100644
--- a/src/runtime/os_nacl.c
+++ b/src/runtime/os_nacl.c
@@ -20,7 +20,10 @@ void
runtime·mpreinit(M *mp)
{
mp->gsignal = runtime·malg(32*1024); // OS X wants >=8K, Linux >=2K
+ runtime·writebarrierptr_nostore(&mp->gsignal, mp->gsignal);
+
mp->gsignal->m = mp;
+ runtime·writebarrierptr_nostore(&mp->gsignal->m, mp->gsignal->m);
}
// Called to initialize a new m (including the bootstrap m).
diff --git a/src/runtime/os_netbsd.c b/src/runtime/os_netbsd.c
index 58e5bedf2..28929ea57 100644
--- a/src/runtime/os_netbsd.c
+++ b/src/runtime/os_netbsd.c
@@ -271,7 +271,10 @@ void
runtime·mpreinit(M *mp)
{
mp->gsignal = runtime·malg(32*1024);
+ runtime·writebarrierptr_nostore(&mp->gsignal, mp->gsignal);
+
mp->gsignal->m = mp;
+ runtime·writebarrierptr_nostore(&mp->gsignal->m, mp->gsignal->m);
}
// Called to initialize a new m (including the bootstrap m).
diff --git a/src/runtime/os_openbsd.c b/src/runtime/os_openbsd.c
index eebaa13ee..960aaffff 100644
--- a/src/runtime/os_openbsd.c
+++ b/src/runtime/os_openbsd.c
@@ -217,7 +217,10 @@ void
runtime·mpreinit(M *mp)
{
mp->gsignal = runtime·malg(32*1024);
+ runtime·writebarrierptr_nostore(&mp->gsignal, mp->gsignal);
+
mp->gsignal->m = mp;
+ runtime·writebarrierptr_nostore(&mp->gsignal->m, mp->gsignal->m);
}
// Called to initialize a new m (including the bootstrap m).
diff --git a/src/runtime/os_plan9.c b/src/runtime/os_plan9.c
index f8c543f6f..18460fc12 100644
--- a/src/runtime/os_plan9.c
+++ b/src/runtime/os_plan9.c
@@ -20,12 +20,18 @@ runtime·mpreinit(M *mp)
{
// Initialize stack and goroutine for note handling.
mp->gsignal = runtime·malg(32*1024);
+ runtime·writebarrierptr_nostore(&mp->gsignal, mp->gsignal);
+
mp->gsignal->m = mp;
+ runtime·writebarrierptr_nostore(&mp->gsignal->m, mp->gsignal->m);
+
mp->notesig = (int8*)runtime·mallocgc(ERRMAX*sizeof(int8), nil, FlagNoScan);
+ runtime·writebarrierptr_nostore(&mp->notesig, mp->notesig);
// Initialize stack for handling strings from the
// errstr system call, as used in package syscall.
mp->errstr = (byte*)runtime·mallocgc(ERRMAX*sizeof(byte), nil, FlagNoScan);
+ runtime·writebarrierptr_nostore(&mp->errstr, mp->errstr);
}
// Called to initialize a new m (including the bootstrap m).
diff --git a/src/runtime/os_solaris.c b/src/runtime/os_solaris.c
index e16b8e637..bee91d8e6 100644
--- a/src/runtime/os_solaris.c
+++ b/src/runtime/os_solaris.c
@@ -176,7 +176,10 @@ void
runtime·mpreinit(M *mp)
{
mp->gsignal = runtime·malg(32*1024);
+ runtime·writebarrierptr_nostore(&mp->gsignal, mp->gsignal);
+
mp->gsignal->m = mp;
+ runtime·writebarrierptr_nostore(&mp->gsignal->m, mp->gsignal->m);
}
// Called to initialize a new m (including the bootstrap m).
diff --git a/src/runtime/panic.c b/src/runtime/panic.c
index 24eb6dbfe..46683b2b0 100644
--- a/src/runtime/panic.c
+++ b/src/runtime/panic.c
@@ -70,7 +70,7 @@ runtime·recovery_m(G *gp)
// (The pc we're returning to does pop pop
// before it tests the return value.)
// On the arm there are 2 saved LRs mixed in too.
- if(thechar == '5')
+ if(thechar == '5' || thechar == '9')
gp->sched.sp = (uintptr)argp - 4*sizeof(uintptr);
else
gp->sched.sp = (uintptr)argp - 2*sizeof(uintptr);
diff --git a/src/runtime/panic.go b/src/runtime/panic.go
index 685ff5ca0..91b5da294 100644
--- a/src/runtime/panic.go
+++ b/src/runtime/panic.go
@@ -61,7 +61,7 @@ func deferproc(siz int32, fn *funcval) { // arguments of fn follow fn
// we can only call nosplit routines.
argp := uintptr(unsafe.Pointer(&fn))
argp += unsafe.Sizeof(fn)
- if GOARCH == "arm" {
+ if GOARCH == "arm" || GOARCH == "power64" || GOARCH == "power64le" {
argp += ptrSize // skip caller's saved link register
}
mp := acquirem()
@@ -494,12 +494,12 @@ func throw(s *byte) {
//go:nosplit
func gothrow(s string) {
+ print("fatal error: ", s, "\n")
gp := getg()
if gp.m.throwing == 0 {
gp.m.throwing = 1
}
startpanic()
- print("fatal error: ", s, "\n")
dopanic(0)
*(*int)(nil) = 0 // not reached
}
diff --git a/src/runtime/print1.go b/src/runtime/print1.go
index 8f8268873..3d812bd04 100644
--- a/src/runtime/print1.go
+++ b/src/runtime/print1.go
@@ -41,7 +41,31 @@ func snprintf(dst *byte, n int32, s *byte) {
gp.writebuf = nil
}
-//var debuglock mutex
+var debuglock mutex
+
+// The compiler emits calls to printlock and printunlock around
+// the multiple calls that implement a single Go print or println
+// statement. Some of the print helpers (printsp, for example)
+// call print recursively. There is also the problem of a crash
+// happening during the print routines and needing to acquire
+// the print lock to print information about the crash.
+// For both these reasons, let a thread acquire the printlock 'recursively'.
+
+func printlock() {
+ mp := getg().m
+ mp.printlock++
+ if mp.printlock == 1 {
+ lock(&debuglock)
+ }
+}
+
+func printunlock() {
+ mp := getg().m
+ mp.printlock--
+ if mp.printlock == 0 {
+ unlock(&debuglock)
+ }
+}
// write to goroutine-local buffer if diverting output,
// or else standard error.
@@ -80,7 +104,7 @@ func printnl() {
// Very simple printf. Only for debugging prints.
// Do not add to this without checking with Rob.
func vprintf(str string, arg unsafe.Pointer) {
- //lock(&debuglock);
+ printlock()
s := bytes(str)
start := 0
@@ -160,7 +184,7 @@ func vprintf(str string, arg unsafe.Pointer) {
gwrite(s[start:i])
}
- //unlock(&debuglock);
+ printunlock()
}
func printpc(p unsafe.Pointer) {
diff --git a/src/runtime/proc.c b/src/runtime/proc.c
index 91e3fe16d..ce39db4ab 100644
--- a/src/runtime/proc.c
+++ b/src/runtime/proc.c
@@ -423,13 +423,7 @@ runtime·casgstatus(G *gp, uint32 oldval, uint32 newval)
// loop if gp->atomicstatus is in a scan state giving
// GC time to finish and change the state to oldval.
while(!runtime·cas(&gp->atomicstatus, oldval, newval)) {
- // Help GC if needed.
- if(gp->preemptscan && !gp->gcworkdone && (oldval == Grunning || oldval == Gsyscall)) {
- gp->preemptscan = false;
- g->m->ptrarg[0] = gp;
- fn = helpcasgstatus;
- runtime·onM(&fn);
- }
+
}
}
@@ -504,6 +498,13 @@ runtime·stopg(G *gp)
return false;
case Grunning:
+ if(runtime·gcphase == GCscan) {
+ gp->gcworkdone = true;
+ return false;
+ // Running routines not scanned during
+ // GCscan phase, we only scan non-running routines.
+ }
+
// Claim goroutine, so we aren't racing with a status
// transition away from Grunning.
if(!runtime·castogscanstatus(gp, Grunning, Gscanrunning))
@@ -581,9 +582,10 @@ mquiesce(G *gpmaster)
uint32 status;
uint32 activeglen;
- activeglen = runtime·allglen;
// enqueue the calling goroutine.
runtime·restartg(gpmaster);
+
+ activeglen = runtime·allglen;
for(i = 0; i < activeglen; i++) {
gp = runtime·allg[i];
if(runtime·readgstatus(gp) == Gdead)
@@ -874,7 +876,9 @@ runtime·allocm(P *p)
mp->g0 = runtime·malg(-1);
else
mp->g0 = runtime·malg(8192);
+ runtime·writebarrierptr_nostore(&mp->g0, mp->g0);
mp->g0->m = mp;
+ runtime·writebarrierptr_nostore(&mp->g0->m, mp->g0->m);
if(p == g->m->p)
releasep();
@@ -1058,7 +1062,7 @@ runtime·dropm(void)
unlockextra(mp);
}
-#define MLOCKED ((M*)1)
+#define MLOCKED 1
// lockextra locks the extra list and returns the list head.
// The caller must unlock the list by storing a new list head
@@ -1069,28 +1073,28 @@ runtime·dropm(void)
static M*
lockextra(bool nilokay)
{
- M *mp;
+ uintptr mpx;
void (*yield)(void);
for(;;) {
- mp = runtime·atomicloadp(&runtime·extram);
- if(mp == MLOCKED) {
+ mpx = runtime·atomicloaduintptr((uintptr*)&runtime·extram);
+ if(mpx == MLOCKED) {
yield = runtime·osyield;
yield();
continue;
}
- if(mp == nil && !nilokay) {
+ if(mpx == 0 && !nilokay) {
runtime·usleep(1);
continue;
}
- if(!runtime·casp(&runtime·extram, mp, MLOCKED)) {
+ if(!runtime·casuintptr((uintptr*)&runtime·extram, mpx, MLOCKED)) {
yield = runtime·osyield;
yield();
continue;
}
break;
}
- return mp;
+ return (M*)mpx;
}
#pragma textflag NOSPLIT
@@ -1915,6 +1919,7 @@ exitsyscallfast(void)
// Freezetheworld sets stopwait but does not retake P's.
if(runtime·sched.stopwait) {
+ g->m->mcache = nil;
g->m->p = nil;
return false;
}
@@ -1927,6 +1932,7 @@ exitsyscallfast(void)
return true;
}
// Try to get any other idle P.
+ g->m->mcache = nil;
g->m->p = nil;
if(runtime·sched.pidle) {
fn = exitsyscallfast_pidle;
@@ -2122,7 +2128,7 @@ runtime·newproc(int32 siz, FuncVal* fn, ...)
byte *argp;
void (*mfn)(void);
- if(thechar == '5')
+ if(thechar == '5' || thechar == '9')
argp = (byte*)(&fn+2); // skip caller's saved LR
else
argp = (byte*)(&fn+1);
@@ -2182,7 +2188,7 @@ runtime·newproc1(FuncVal *fn, byte *argp, int32 narg, int32 nret, void *callerp
sp -= 4*sizeof(uintreg); // extra space in case of reads slightly beyond frame
sp -= siz;
runtime·memmove(sp, argp, narg);
- if(thechar == '5') {
+ if(thechar == '5' || thechar == '9') {
// caller's LR
sp -= sizeof(void*);
*(void**)sp = nil;
@@ -2615,6 +2621,8 @@ runtime·setcpuprofilerate_m(void)
P *runtime·newP(void);
// Change number of processors. The world is stopped, sched is locked.
+// gcworkbufs are not being modified by either the GC or
+// the write barrier code.
static void
procresize(int32 new)
{
diff --git a/src/runtime/proc.go b/src/runtime/proc.go
index 5b8c7d8ae..f41ffbff3 100644
--- a/src/runtime/proc.go
+++ b/src/runtime/proc.go
@@ -165,6 +165,9 @@ func acquireSudog() *sudog {
// which keeps the garbage collector from being invoked.
mp := acquirem()
p := new(sudog)
+ if p.elem != nil {
+ gothrow("acquireSudog: found p.elem != nil after new")
+ }
releasem(mp)
return p
}
diff --git a/src/runtime/race_amd64.s b/src/runtime/race_amd64.s
index bdea28c7c..15b18ff8f 100644
--- a/src/runtime/race_amd64.s
+++ b/src/runtime/race_amd64.s
@@ -140,20 +140,20 @@ TEXT racecalladdr<>(SB), NOSPLIT, $0-0
MOVQ g_racectx(R14), RARG0 // goroutine context
// Check that addr is within [arenastart, arenaend) or within [noptrdata, enoptrbss).
CMPQ RARG1, runtime·racearenastart(SB)
- JB racecalladdr_data
+ JB data
CMPQ RARG1, runtime·racearenaend(SB)
- JB racecalladdr_call
-racecalladdr_data:
+ JB call
+data:
MOVQ $runtime·noptrdata(SB), R13
CMPQ RARG1, R13
- JB racecalladdr_ret
+ JB ret
MOVQ $runtime·enoptrbss(SB), R13
CMPQ RARG1, R13
- JAE racecalladdr_ret
-racecalladdr_call:
+ JAE ret
+call:
MOVQ AX, AX // w/o this 6a miscompiles this function
JMP racecall<>(SB)
-racecalladdr_ret:
+ret:
RET
// func runtime·racefuncenter(pc uintptr)
@@ -335,9 +335,9 @@ TEXT racecall<>(SB), NOSPLIT, $0-0
MOVQ SP, R12 // callee-saved, preserved across the CALL
MOVQ m_g0(R13), R10
CMPQ R10, R14
- JE racecall_cont // already on g0
+ JE call // already on g0
MOVQ (g_sched+gobuf_sp)(R10), SP
-racecall_cont:
+call:
ANDQ $~15, SP // alignment for gcc ABI
CALL AX
MOVQ R12, SP
diff --git a/src/runtime/rt0_linux_power64.s b/src/runtime/rt0_linux_power64.s
new file mode 100644
index 000000000..970b6a673
--- /dev/null
+++ b/src/runtime/rt0_linux_power64.s
@@ -0,0 +1,17 @@
+#include "textflag.h"
+
+// actually a function descriptor for _main<>(SB)
+TEXT _rt0_power64_linux(SB),NOSPLIT,$0
+ DWORD $_main<>(SB)
+ DWORD $0
+ DWORD $0
+
+TEXT _main<>(SB),NOSPLIT,$-8
+ MOVD 0(R1), R3 // argc
+ ADD $8, R1, R4 // argv
+ BR main(SB)
+
+TEXT main(SB),NOSPLIT,$-8
+ MOVD $runtime·rt0_go(SB), R31
+ MOVD R31, CTR
+ BR (CTR)
diff --git a/src/runtime/rt0_linux_power64le.s b/src/runtime/rt0_linux_power64le.s
new file mode 100644
index 000000000..85ce84733
--- /dev/null
+++ b/src/runtime/rt0_linux_power64le.s
@@ -0,0 +1,14 @@
+#include "textflag.h"
+
+TEXT _rt0_power64le_linux(SB),NOSPLIT,$0
+ BR _main<>(SB)
+
+TEXT _main<>(SB),NOSPLIT,$-8
+ MOVD 0(R1), R3 // argc
+ ADD $8, R1, R4 // argv
+ BR main(SB)
+
+TEXT main(SB),NOSPLIT,$-8
+ MOVD $runtime·rt0_go(SB), R31
+ MOVD R31, CTR
+ BR (CTR)
diff --git a/src/runtime/runtime.c b/src/runtime/runtime.c
index c823691ec..f19f8e4be 100644
--- a/src/runtime/runtime.c
+++ b/src/runtime/runtime.c
@@ -185,6 +185,7 @@ runtime·check(void)
float64 j, j1;
byte *k, *k1;
uint16* l;
+ byte m[4];
struct x1 {
byte x;
};
@@ -236,6 +237,11 @@ runtime·check(void)
if(k != k1)
runtime·throw("casp3");
+ m[0] = m[1] = m[2] = m[3] = 0x1;
+ runtime·atomicor8(&m[1], 0xf0);
+ if (m[0] != 0x1 || m[1] != 0xf1 || m[2] != 0x1 || m[3] != 0x1)
+ runtime·throw("atomicor8");
+
*(uint64*)&j = ~0ULL;
if(j == j)
runtime·throw("float64nan");
diff --git a/src/runtime/runtime.h b/src/runtime/runtime.h
index 977c4547d..330ed429b 100644
--- a/src/runtime/runtime.h
+++ b/src/runtime/runtime.h
@@ -94,6 +94,7 @@ typedef struct PollDesc PollDesc;
typedef struct DebugVars DebugVars;
typedef struct ForceGCState ForceGCState;
typedef struct Stack Stack;
+typedef struct Workbuf Workbuf;
/*
* Per-CPU declaration.
@@ -304,7 +305,7 @@ struct G
bool paniconfault; // panic (instead of crash) on unexpected fault address
bool preemptscan; // preempted g does scan for GC
bool gcworkdone; // debug: cleared at begining of gc work phase cycle, set by gcphasework, tested at end of cycle
- bool throwsplit; // must not split stack
+ bool throwsplit; // must not split stack
int8 raceignore; // ignore race detection events
M* m; // for debuggers, but offset not hard-coded
M* lockedm;
@@ -344,6 +345,8 @@ struct M
int32 helpgc;
bool spinning; // M is out of work and is actively looking for work
bool blocked; // M is blocked on a Note
+ bool inwb; // M is executing a write barrier
+ int8 printlock;
uint32 fastrand;
uint64 ncgocall; // number of cgo calls in total
int32 ncgo; // number of cgo calls currently in progress
@@ -570,9 +573,10 @@ enum {
#endif
// Lock-free stack node.
+// Also known to export_test.go.
struct LFNode
{
- LFNode *next;
+ uint64 next;
uintptr pushcnt;
};
@@ -598,6 +602,16 @@ struct ParFor
uint64 nsleep;
};
+enum {
+ WorkbufSize = 4*1024,
+};
+struct Workbuf
+{
+ LFNode node; // must be first
+ uintptr nobj;
+ byte* obj[(WorkbufSize-sizeof(LFNode)-sizeof(uintptr))/PtrSize];
+};
+
// Track memory allocated by code not written in Go during a cgo call,
// so that the garbage collector can see them.
struct CgoMal
@@ -620,12 +634,14 @@ struct DebugVars
// Indicates to write barrier and sychronization task to preform.
enum
-{ // Synchronization Write barrier
- GCoff, // stop and start nop
- GCquiesce, // stop and start nop
- GCstw, // stop the ps nop
- GCmark, // scan the stacks and start no white to black
- GCsweep, // stop and start nop
+{ // Action WB installation
+ GCoff = 0, // stop and start no wb
+ GCquiesce, // stop and start no wb
+ GCstw, // stop the ps nop
+ GCscan, // scan the stacks prior to marking
+ GCmark, // mark use wbufs from GCscan and globals, scan the stacks, then go to GCtermination
+ GCmarktermination, // mark termination detection. Allocate black, Ps help out GC
+ GCsweep, // stop and start nop
};
struct ForceGCState
@@ -636,6 +652,7 @@ struct ForceGCState
};
extern uint32 runtime·gcphase;
+extern Mutex runtime·allglock;
/*
* defined macros
@@ -666,6 +683,7 @@ enum {
uint32 runtime·readgstatus(G*);
void runtime·casgstatus(G*, uint32, uint32);
+bool runtime·castogscanstatus(G*, uint32, uint32);
void runtime·quiesce(G*);
bool runtime·stopg(G*);
void runtime·restartg(G*);
@@ -882,6 +900,7 @@ int32 runtime·round2(int32 x); // round x up to a power of 2.
bool runtime·cas(uint32*, uint32, uint32);
bool runtime·cas64(uint64*, uint64, uint64);
bool runtime·casp(void**, void*, void*);
+bool runtime·casuintptr(uintptr*, uintptr, uintptr);
// Don't confuse with XADD x86 instruction,
// this one is actually 'addx', that is, add-and-fetch.
uint32 runtime·xadd(uint32 volatile*, int32);
@@ -1108,6 +1127,8 @@ void runtime·osyield(void);
void runtime·lockOSThread(void);
void runtime·unlockOSThread(void);
+void runtime·writebarrierptr_nostore(void*, void*);
+
bool runtime·showframe(Func*, G*);
void runtime·printcreatedby(G*);
diff --git a/src/runtime/select.go b/src/runtime/select.go
index efe68c1f5..d703e1d79 100644
--- a/src/runtime/select.go
+++ b/src/runtime/select.go
@@ -377,12 +377,7 @@ loop:
// iterating through the linked list they are in reverse order.
cas = nil
sglist = gp.waiting
- // Clear all selectdone and elem before unlinking from gp.waiting.
- // They must be cleared before being put back into the sudog cache.
- // Clear before unlinking, because if a stack copy happens after the unlink,
- // they will not be updated, they will be left pointing to the old stack,
- // which creates dangling pointers, which may be detected by the
- // garbage collector.
+ // Clear all elem before unlinking from gp.waiting.
for sg1 := gp.waiting; sg1 != nil; sg1 = sg1.waitlink {
sg1.selectdone = nil
sg1.elem = nil
diff --git a/src/runtime/signal_linux_power64.h b/src/runtime/signal_linux_power64.h
new file mode 100644
index 000000000..840648920
--- /dev/null
+++ b/src/runtime/signal_linux_power64.h
@@ -0,0 +1,49 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#define SIG_REGS(ctxt) (*((Sigcontext*)&((Ucontext*)(ctxt))->uc_mcontext)->regs)
+
+#define SIG_R0(info, ctxt) (SIG_REGS(ctxt).gpr[0])
+#define SIG_R1(info, ctxt) (SIG_REGS(ctxt).gpr[1])
+#define SIG_R2(info, ctxt) (SIG_REGS(ctxt).gpr[2])
+#define SIG_R3(info, ctxt) (SIG_REGS(ctxt).gpr[3])
+#define SIG_R4(info, ctxt) (SIG_REGS(ctxt).gpr[4])
+#define SIG_R5(info, ctxt) (SIG_REGS(ctxt).gpr[5])
+#define SIG_R6(info, ctxt) (SIG_REGS(ctxt).gpr[6])
+#define SIG_R7(info, ctxt) (SIG_REGS(ctxt).gpr[7])
+#define SIG_R8(info, ctxt) (SIG_REGS(ctxt).gpr[8])
+#define SIG_R9(info, ctxt) (SIG_REGS(ctxt).gpr[9])
+#define SIG_R10(info, ctxt) (SIG_REGS(ctxt).gpr[10])
+#define SIG_R11(info, ctxt) (SIG_REGS(ctxt).gpr[11])
+#define SIG_R12(info, ctxt) (SIG_REGS(ctxt).gpr[12])
+#define SIG_R13(info, ctxt) (SIG_REGS(ctxt).gpr[13])
+#define SIG_R14(info, ctxt) (SIG_REGS(ctxt).gpr[14])
+#define SIG_R15(info, ctxt) (SIG_REGS(ctxt).gpr[15])
+#define SIG_R16(info, ctxt) (SIG_REGS(ctxt).gpr[16])
+#define SIG_R17(info, ctxt) (SIG_REGS(ctxt).gpr[17])
+#define SIG_R18(info, ctxt) (SIG_REGS(ctxt).gpr[18])
+#define SIG_R19(info, ctxt) (SIG_REGS(ctxt).gpr[19])
+#define SIG_R20(info, ctxt) (SIG_REGS(ctxt).gpr[20])
+#define SIG_R21(info, ctxt) (SIG_REGS(ctxt).gpr[21])
+#define SIG_R22(info, ctxt) (SIG_REGS(ctxt).gpr[22])
+#define SIG_R23(info, ctxt) (SIG_REGS(ctxt).gpr[23])
+#define SIG_R24(info, ctxt) (SIG_REGS(ctxt).gpr[24])
+#define SIG_R25(info, ctxt) (SIG_REGS(ctxt).gpr[25])
+#define SIG_R26(info, ctxt) (SIG_REGS(ctxt).gpr[26])
+#define SIG_R27(info, ctxt) (SIG_REGS(ctxt).gpr[27])
+#define SIG_R28(info, ctxt) (SIG_REGS(ctxt).gpr[28])
+#define SIG_R29(info, ctxt) (SIG_REGS(ctxt).gpr[29])
+#define SIG_R30(info, ctxt) (SIG_REGS(ctxt).gpr[30])
+#define SIG_R31(info, ctxt) (SIG_REGS(ctxt).gpr[31])
+
+#define SIG_SP(info, ctxt) (SIG_REGS(ctxt).gpr[1])
+#define SIG_PC(info, ctxt) (SIG_REGS(ctxt).nip)
+#define SIG_TRAP(info, ctxt) (SIG_REGS(ctxt).trap)
+#define SIG_CTR(info, ctxt) (SIG_REGS(ctxt).ctr)
+#define SIG_LINK(info, ctxt) (SIG_REGS(ctxt).link)
+#define SIG_XER(info, ctxt) (SIG_REGS(ctxt).xer)
+#define SIG_CCR(info, ctxt) (SIG_REGS(ctxt).ccr)
+
+#define SIG_CODE0(info, ctxt) ((uintptr)(info)->si_code)
+#define SIG_FAULT(info, ctxt) (SIG_REGS(ctxt).dar)
diff --git a/src/runtime/signal_linux_power64le.h b/src/runtime/signal_linux_power64le.h
new file mode 100644
index 000000000..840648920
--- /dev/null
+++ b/src/runtime/signal_linux_power64le.h
@@ -0,0 +1,49 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#define SIG_REGS(ctxt) (*((Sigcontext*)&((Ucontext*)(ctxt))->uc_mcontext)->regs)
+
+#define SIG_R0(info, ctxt) (SIG_REGS(ctxt).gpr[0])
+#define SIG_R1(info, ctxt) (SIG_REGS(ctxt).gpr[1])
+#define SIG_R2(info, ctxt) (SIG_REGS(ctxt).gpr[2])
+#define SIG_R3(info, ctxt) (SIG_REGS(ctxt).gpr[3])
+#define SIG_R4(info, ctxt) (SIG_REGS(ctxt).gpr[4])
+#define SIG_R5(info, ctxt) (SIG_REGS(ctxt).gpr[5])
+#define SIG_R6(info, ctxt) (SIG_REGS(ctxt).gpr[6])
+#define SIG_R7(info, ctxt) (SIG_REGS(ctxt).gpr[7])
+#define SIG_R8(info, ctxt) (SIG_REGS(ctxt).gpr[8])
+#define SIG_R9(info, ctxt) (SIG_REGS(ctxt).gpr[9])
+#define SIG_R10(info, ctxt) (SIG_REGS(ctxt).gpr[10])
+#define SIG_R11(info, ctxt) (SIG_REGS(ctxt).gpr[11])
+#define SIG_R12(info, ctxt) (SIG_REGS(ctxt).gpr[12])
+#define SIG_R13(info, ctxt) (SIG_REGS(ctxt).gpr[13])
+#define SIG_R14(info, ctxt) (SIG_REGS(ctxt).gpr[14])
+#define SIG_R15(info, ctxt) (SIG_REGS(ctxt).gpr[15])
+#define SIG_R16(info, ctxt) (SIG_REGS(ctxt).gpr[16])
+#define SIG_R17(info, ctxt) (SIG_REGS(ctxt).gpr[17])
+#define SIG_R18(info, ctxt) (SIG_REGS(ctxt).gpr[18])
+#define SIG_R19(info, ctxt) (SIG_REGS(ctxt).gpr[19])
+#define SIG_R20(info, ctxt) (SIG_REGS(ctxt).gpr[20])
+#define SIG_R21(info, ctxt) (SIG_REGS(ctxt).gpr[21])
+#define SIG_R22(info, ctxt) (SIG_REGS(ctxt).gpr[22])
+#define SIG_R23(info, ctxt) (SIG_REGS(ctxt).gpr[23])
+#define SIG_R24(info, ctxt) (SIG_REGS(ctxt).gpr[24])
+#define SIG_R25(info, ctxt) (SIG_REGS(ctxt).gpr[25])
+#define SIG_R26(info, ctxt) (SIG_REGS(ctxt).gpr[26])
+#define SIG_R27(info, ctxt) (SIG_REGS(ctxt).gpr[27])
+#define SIG_R28(info, ctxt) (SIG_REGS(ctxt).gpr[28])
+#define SIG_R29(info, ctxt) (SIG_REGS(ctxt).gpr[29])
+#define SIG_R30(info, ctxt) (SIG_REGS(ctxt).gpr[30])
+#define SIG_R31(info, ctxt) (SIG_REGS(ctxt).gpr[31])
+
+#define SIG_SP(info, ctxt) (SIG_REGS(ctxt).gpr[1])
+#define SIG_PC(info, ctxt) (SIG_REGS(ctxt).nip)
+#define SIG_TRAP(info, ctxt) (SIG_REGS(ctxt).trap)
+#define SIG_CTR(info, ctxt) (SIG_REGS(ctxt).ctr)
+#define SIG_LINK(info, ctxt) (SIG_REGS(ctxt).link)
+#define SIG_XER(info, ctxt) (SIG_REGS(ctxt).xer)
+#define SIG_CCR(info, ctxt) (SIG_REGS(ctxt).ccr)
+
+#define SIG_CODE0(info, ctxt) ((uintptr)(info)->si_code)
+#define SIG_FAULT(info, ctxt) (SIG_REGS(ctxt).dar)
diff --git a/src/runtime/signal_power64x.c b/src/runtime/signal_power64x.c
new file mode 100644
index 000000000..89c5c7848
--- /dev/null
+++ b/src/runtime/signal_power64x.c
@@ -0,0 +1,137 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build linux
+// +build power64 power64le
+
+#include "runtime.h"
+#include "defs_GOOS_GOARCH.h"
+#include "os_GOOS.h"
+#include "signal_GOOS_GOARCH.h"
+#include "signals_GOOS.h"
+
+void
+runtime·dumpregs(Siginfo *info, void *ctxt)
+{
+ USED(info); USED(ctxt);
+ runtime·printf("r0 %X\t", SIG_R0(info, ctxt));
+ runtime·printf("r1 %X\n", SIG_R1(info, ctxt));
+ runtime·printf("r2 %X\t", SIG_R2(info, ctxt));
+ runtime·printf("r3 %X\n", SIG_R3(info, ctxt));
+ runtime·printf("r4 %X\t", SIG_R4(info, ctxt));
+ runtime·printf("r5 %X\n", SIG_R5(info, ctxt));
+ runtime·printf("r6 %X\t", SIG_R6(info, ctxt));
+ runtime·printf("r7 %X\n", SIG_R7(info, ctxt));
+ runtime·printf("r8 %X\t", SIG_R8(info, ctxt));
+ runtime·printf("r9 %X\n", SIG_R9(info, ctxt));
+ runtime·printf("r10 %X\t", SIG_R10(info, ctxt));
+ runtime·printf("r11 %X\n", SIG_R11(info, ctxt));
+ runtime·printf("r12 %X\t", SIG_R12(info, ctxt));
+ runtime·printf("r13 %X\n", SIG_R13(info, ctxt));
+ runtime·printf("r14 %X\t", SIG_R14(info, ctxt));
+ runtime·printf("r15 %X\n", SIG_R15(info, ctxt));
+ runtime·printf("r16 %X\t", SIG_R16(info, ctxt));
+ runtime·printf("r17 %X\n", SIG_R17(info, ctxt));
+ runtime·printf("r18 %X\t", SIG_R18(info, ctxt));
+ runtime·printf("r19 %X\n", SIG_R19(info, ctxt));
+ runtime·printf("r20 %X\t", SIG_R20(info, ctxt));
+ runtime·printf("r21 %X\n", SIG_R21(info, ctxt));
+ runtime·printf("r22 %X\t", SIG_R22(info, ctxt));
+ runtime·printf("r23 %X\n", SIG_R23(info, ctxt));
+ runtime·printf("r24 %X\t", SIG_R24(info, ctxt));
+ runtime·printf("r25 %X\n", SIG_R25(info, ctxt));
+ runtime·printf("r26 %X\t", SIG_R26(info, ctxt));
+ runtime·printf("r27 %X\n", SIG_R27(info, ctxt));
+ runtime·printf("r28 %X\t", SIG_R28(info, ctxt));
+ runtime·printf("r29 %X\n", SIG_R29(info, ctxt));
+ runtime·printf("r30 %X\t", SIG_R30(info, ctxt));
+ runtime·printf("r31 %X\n", SIG_R31(info, ctxt));
+ runtime·printf("pc %X\t", SIG_PC(info, ctxt));
+ runtime·printf("ctr %X\n", SIG_CTR(info, ctxt));
+ runtime·printf("link %X\t", SIG_LINK(info, ctxt));
+ runtime·printf("xer %X\n", SIG_XER(info, ctxt));
+ runtime·printf("ccr %X\t", SIG_CCR(info, ctxt));
+ runtime·printf("trap %X\n", SIG_TRAP(info, ctxt));
+}
+
+void
+runtime·sighandler(int32 sig, Siginfo *info, void *ctxt, G *gp)
+{
+ SigTab *t;
+ bool crash;
+
+ if(sig == SIGPROF) {
+ runtime·sigprof((uint8*)SIG_PC(info, ctxt), (uint8*)SIG_SP(info, ctxt), (uint8*)SIG_LINK(info, ctxt), gp, g->m);
+ return;
+ }
+ t = &runtime·sigtab[sig];
+ if(SIG_CODE0(info, ctxt) != SI_USER && (t->flags & SigPanic)) {
+ // Make it look like a call to the signal func.
+ // Have to pass arguments out of band since
+ // augmenting the stack frame would break
+ // the unwinding code.
+ gp->sig = sig;
+ gp->sigcode0 = SIG_CODE0(info, ctxt);
+ gp->sigcode1 = SIG_FAULT(info, ctxt);
+ gp->sigpc = SIG_PC(info, ctxt);
+
+ // We arrange link, and pc to pretend the panicking
+ // function calls sigpanic directly.
+ // Always save LINK to stack so that panics in leaf
+ // functions are correctly handled. This smashes
+ // the stack frame but we're not going back there
+ // anyway.
+ SIG_SP(info, ctxt) -= sizeof(uintptr);
+ *(uintptr*)SIG_SP(info, ctxt) = SIG_LINK(info, ctxt);
+ // Don't bother saving PC if it's zero, which is
+ // probably a call to a nil func: the old link register
+ // is more useful in the stack trace.
+ if(gp->sigpc != 0)
+ SIG_LINK(info, ctxt) = gp->sigpc;
+ // In case we are panicking from external C code
+ SIG_R0(info, ctxt) = 0;
+ SIG_R30(info, ctxt) = (uintptr)gp;
+ SIG_PC(info, ctxt) = (uintptr)runtime·sigpanic;
+ return;
+ }
+
+ if(SIG_CODE0(info, ctxt) == SI_USER || (t->flags & SigNotify))
+ if(runtime·sigsend(sig))
+ return;
+ if(t->flags & SigKill)
+ runtime·exit(2);
+ if(!(t->flags & SigThrow))
+ return;
+
+ g->m->throwing = 1;
+ g->m->caughtsig = gp;
+ if(runtime·panicking) // traceback already printed
+ runtime·exit(2);
+ runtime·panicking = 1;
+
+ if(sig < 0 || sig >= NSIG)
+ runtime·printf("Signal %d\n", sig);
+ else
+ runtime·printf("%s\n", runtime·sigtab[sig].name);
+
+ runtime·printf("PC=%x\n", SIG_PC(info, ctxt));
+ if(g->m->lockedg != nil && g->m->ncgo > 0 && gp == g->m->g0) {
+ runtime·printf("signal arrived during cgo execution\n");
+ gp = g->m->lockedg;
+ }
+ runtime·printf("\n");
+
+ if(runtime·gotraceback(&crash)){
+ runtime·goroutineheader(gp);
+ runtime·traceback(SIG_PC(info, ctxt), SIG_SP(info, ctxt), SIG_LINK(info, ctxt), gp);
+ runtime·tracebackothers(gp);
+ runtime·printf("\n");
+ runtime·dumpregs(info, ctxt);
+ }
+
+ if(crash)
+ runtime·crash();
+
+ runtime·exit(2);
+}
diff --git a/src/runtime/stack.c b/src/runtime/stack.c
index 072bc242b..ffae73a2a 100644
--- a/src/runtime/stack.c
+++ b/src/runtime/stack.c
@@ -382,8 +382,6 @@ adjustpointers(byte **scanp, BitVector *bv, AdjustInfo *adjinfo, Func *f)
uintptr delta;
int32 num, i;
byte *p, *minp, *maxp;
- Type *t;
- Itab *tab;
minp = (byte*)adjinfo->old.lo;
maxp = (byte*)adjinfo->old.hi;
@@ -415,43 +413,7 @@ adjustpointers(byte **scanp, BitVector *bv, AdjustInfo *adjinfo, Func *f)
}
break;
case BitsMultiWord:
- switch(bv->bytedata[(i+1) / (8 / BitsPerPointer)] >> ((i+1) * BitsPerPointer & 7) & 3) {
- default:
- runtime·throw("unexpected garbage collection bits");
- case BitsEface:
- t = (Type*)scanp[i];
- if(t != nil && ((t->kind & KindDirectIface) == 0 || (t->kind & KindNoPointers) == 0)) {
- p = scanp[i+1];
- if(minp <= p && p < maxp) {
- if(StackDebug >= 3)
- runtime·printf("adjust eface %p\n", p);
- if(t->size > PtrSize) // currently we always allocate such objects on the heap
- runtime·throw("large interface value found on stack");
- scanp[i+1] = p + delta;
- }
- }
- i++;
- break;
- case BitsIface:
- tab = (Itab*)scanp[i];
- if(tab != nil) {
- t = tab->type;
- //runtime·printf(" type=%p\n", t);
- if((t->kind & KindDirectIface) == 0 || (t->kind & KindNoPointers) == 0) {
- p = scanp[i+1];
- if(minp <= p && p < maxp) {
- if(StackDebug >= 3)
- runtime·printf("adjust iface %p\n", p);
- if(t->size > PtrSize) // currently we always allocate such objects on the heap
- runtime·throw("large interface value found on stack");
- scanp[i+1] = p + delta;
- }
- }
- }
- i++;
- break;
- }
- break;
+ runtime·throw("adjustpointers: unexpected garbage collection bits");
}
}
}
@@ -587,13 +549,13 @@ adjustsudogs(G *gp, AdjustInfo *adjinfo)
}
// Copies gp's stack to a new stack of a different size.
+// Caller must have changed gp status to Gcopystack.
static void
copystack(G *gp, uintptr newsize)
{
Stack old, new;
uintptr used;
AdjustInfo adjinfo;
- uint32 oldstatus;
bool (*cb)(Stkframe*, void*);
byte *p, *ep;
@@ -637,20 +599,11 @@ copystack(G *gp, uintptr newsize)
}
runtime·memmove((byte*)new.hi - used, (byte*)old.hi - used, used);
- oldstatus = runtime·readgstatus(gp);
- oldstatus &= ~Gscan;
- if(oldstatus == Gwaiting || oldstatus == Grunnable)
- runtime·casgstatus(gp, oldstatus, Gcopystack); // oldstatus is Gwaiting or Grunnable
- else
- runtime·throw("copystack: bad status, not Gwaiting or Grunnable");
-
// Swap out old stack for new one
gp->stack = new;
gp->stackguard0 = new.lo + StackGuard; // NOTE: might clobber a preempt request
gp->sched.sp = new.hi - used;
- runtime·casgstatus(gp, Gcopystack, oldstatus); // oldstatus is Gwaiting or Grunnable
-
// free old stack
if(StackPoisonCopy) {
p = (byte*)old.lo;
@@ -700,6 +653,7 @@ void
runtime·newstack(void)
{
int32 oldsize, newsize;
+ uint32 oldstatus;
uintptr sp;
G *gp;
Gobuf morebuf;
@@ -752,6 +706,14 @@ runtime·newstack(void)
runtime·printf("runtime: split stack overflow: %p < %p\n", sp, gp->stack.lo);
runtime·throw("runtime: split stack overflow");
}
+
+ if(gp->sched.ctxt != nil) {
+ // morestack wrote sched.ctxt on its way in here,
+ // without a write barrier. Run the write barrier now.
+ // It is not possible to be preempted between then
+ // and now, so it's okay.
+ runtime·writebarrierptr_nostore(&gp->sched.ctxt, gp->sched.ctxt);
+ }
if(gp->stackguard0 == (uintptr)StackPreempt) {
if(gp == g->m->g0)
@@ -789,12 +751,15 @@ runtime·newstack(void)
runtime·throw("stack overflow");
}
- // Note that the concurrent GC might be scanning the stack as we try to replace it.
- // copystack takes care of the appropriate coordination with the stack scanner.
+ oldstatus = runtime·readgstatus(gp);
+ oldstatus &= ~Gscan;
+ runtime·casgstatus(gp, oldstatus, Gcopystack); // oldstatus is Gwaiting or Grunnable
+ // The concurrent GC will not scan the stack while we are doing the copy since
+ // the gp is in a Gcopystack status.
copystack(gp, newsize);
if(StackDebug >= 1)
runtime·printf("stack grow done\n");
- runtime·casgstatus(gp, Gwaiting, Grunning);
+ runtime·casgstatus(gp, Gcopystack, Grunning);
runtime·gogo(&gp->sched);
}
@@ -825,6 +790,7 @@ void
runtime·shrinkstack(G *gp)
{
uintptr used, oldsize, newsize;
+ uint32 oldstatus;
if(runtime·readgstatus(gp) == Gdead) {
if(gp->stack.lo != 0) {
@@ -858,8 +824,19 @@ runtime·shrinkstack(G *gp)
#endif
if(StackDebug > 0)
runtime·printf("shrinking stack %D->%D\n", (uint64)oldsize, (uint64)newsize);
+ // This is being done in a Gscan state and was initiated by the GC so no need to move to
+ // the Gcopystate.
+ // The world is stopped, so the goroutine must be Gwaiting or Grunnable,
+ // and what it is is not changing underfoot.
+
+ oldstatus = runtime·readgstatus(gp);
+ oldstatus &= ~Gscan;
+ if(oldstatus != Gwaiting && oldstatus != Grunnable)
+ runtime·throw("status is not Gwaiting or Grunnable");
+ runtime·casgstatus(gp, oldstatus, Gcopystack);
copystack(gp, newsize);
-}
+ runtime·casgstatus(gp, Gcopystack, oldstatus);
+ }
// Do any delayed stack freeing that was queued up during GC.
void
diff --git a/src/runtime/string.c b/src/runtime/string.c
index ed5debc33..475ea2de6 100644
--- a/src/runtime/string.c
+++ b/src/runtime/string.c
@@ -48,7 +48,7 @@ runtime·gostringnocopy(byte *str)
s.len = runtime·findnull(str);
while(true) {
ms = runtime·maxstring;
- if(s.len <= ms || runtime·casp((void**)&runtime·maxstring, (void*)ms, (void*)s.len))
+ if(s.len <= ms || runtime·casuintptr(&runtime·maxstring, ms, s.len))
return s;
}
}
diff --git a/src/runtime/string.go b/src/runtime/string.go
index 0809f89bc..882281605 100644
--- a/src/runtime/string.go
+++ b/src/runtime/string.go
@@ -39,22 +39,18 @@ func concatstrings(a []string) string {
return s
}
-//go:nosplit
func concatstring2(a [2]string) string {
return concatstrings(a[:])
}
-//go:nosplit
func concatstring3(a [3]string) string {
return concatstrings(a[:])
}
-//go:nosplit
func concatstring4(a [4]string) string {
return concatstrings(a[:])
}
-//go:nosplit
func concatstring5(a [5]string) string {
return concatstrings(a[:])
}
diff --git a/src/runtime/stubs.go b/src/runtime/stubs.go
index fe8f9c922..9889567d6 100644
--- a/src/runtime/stubs.go
+++ b/src/runtime/stubs.go
@@ -106,6 +106,16 @@ func recovery_m(*g)
func mcacheRefill_m()
func largeAlloc_m()
func gc_m()
+func gcscan_m()
+func gcmark_m()
+func gccheckmark_m()
+func gccheckmarkenable_m()
+func gccheckmarkdisable_m()
+func gcinstallmarkwb_m()
+func gcinstalloffwb_m()
+func gcmarknewobject_m()
+func gcmarkwb_m()
+func finishsweep_m()
func scavenge_m()
func setFinalizer_m()
func removeFinalizer_m()
@@ -204,9 +214,6 @@ func write(fd uintptr, p unsafe.Pointer, n int32) int32
func cas(ptr *uint32, old, new uint32) bool
//go:noescape
-func casp(ptr *unsafe.Pointer, old, new unsafe.Pointer) bool
-
-//go:noescape
func casuintptr(ptr *uintptr, old, new uintptr) bool
//go:noescape
diff --git a/src/runtime/sys_darwin_386.s b/src/runtime/sys_darwin_386.s
index a961c71a8..3bf8b1d41 100644
--- a/src/runtime/sys_darwin_386.s
+++ b/src/runtime/sys_darwin_386.s
@@ -248,7 +248,7 @@ TEXT runtime·sigtramp(SB),NOSPLIT,$40
MOVL BX, 0(SP)
MOVL $runtime·badsignal(SB), AX
CALL AX
- JMP sigtramp_ret
+ JMP ret
// save g
MOVL DI, 20(SP)
@@ -275,7 +275,7 @@ TEXT runtime·sigtramp(SB),NOSPLIT,$40
MOVL 20(SP), DI
MOVL DI, g(CX)
-sigtramp_ret:
+ret:
// call sigreturn
MOVL context+16(FP), CX
MOVL style+4(FP), BX
diff --git a/src/runtime/sys_darwin_amd64.s b/src/runtime/sys_darwin_amd64.s
index bd397d72a..8a8928e06 100644
--- a/src/runtime/sys_darwin_amd64.s
+++ b/src/runtime/sys_darwin_amd64.s
@@ -211,7 +211,7 @@ TEXT runtime·sigtramp(SB),NOSPLIT,$64
MOVL DX, 0(SP)
MOVQ $runtime·badsignal(SB), AX
CALL AX
- JMP sigtramp_ret
+ JMP ret
// save g
MOVQ R10, 48(SP)
@@ -233,7 +233,7 @@ TEXT runtime·sigtramp(SB),NOSPLIT,$64
MOVQ 48(SP), R10
MOVQ R10, g(BX)
-sigtramp_ret:
+ret:
// call sigreturn
MOVL $(0x2000000+184), AX // sigreturn(ucontext, infostyle)
MOVQ 32(SP), DI // saved ucontext
diff --git a/src/runtime/sys_dragonfly_386.s b/src/runtime/sys_dragonfly_386.s
index 161eaec19..71ece9ecb 100644
--- a/src/runtime/sys_dragonfly_386.s
+++ b/src/runtime/sys_dragonfly_386.s
@@ -217,7 +217,7 @@ TEXT runtime·sigtramp(SB),NOSPLIT,$44
MOVL BX, 0(SP)
MOVL $runtime·badsignal(SB), AX
CALL AX
- JMP sigtramp_ret
+ JMP ret
// save g
MOVL DI, 20(SP)
@@ -243,7 +243,7 @@ TEXT runtime·sigtramp(SB),NOSPLIT,$44
MOVL 20(SP), BX
MOVL BX, g(CX)
-sigtramp_ret:
+ret:
// call sigreturn
MOVL context+8(FP), AX
MOVL $0, 0(SP) // syscall gap
diff --git a/src/runtime/sys_freebsd_386.s b/src/runtime/sys_freebsd_386.s
index 2c40fc433..66d03c27d 100644
--- a/src/runtime/sys_freebsd_386.s
+++ b/src/runtime/sys_freebsd_386.s
@@ -197,7 +197,7 @@ TEXT runtime·sigtramp(SB),NOSPLIT,$44
MOVL BX, 0(SP)
MOVL $runtime·badsignal(SB), AX
CALL AX
- JMP sigtramp_ret
+ JMP ret
// save g
MOVL DI, 20(SP)
@@ -223,7 +223,7 @@ TEXT runtime·sigtramp(SB),NOSPLIT,$44
MOVL 20(SP), BX
MOVL BX, g(CX)
-sigtramp_ret:
+ret:
// call sigreturn
MOVL context+8(FP), AX
MOVL $0, 0(SP) // syscall gap
diff --git a/src/runtime/sys_linux_amd64.s b/src/runtime/sys_linux_amd64.s
index 33b91e872..d8d86ffad 100644
--- a/src/runtime/sys_linux_amd64.s
+++ b/src/runtime/sys_linux_amd64.s
@@ -115,7 +115,7 @@ TEXT time·now(SB),NOSPLIT,$16
// That leaves 104 for the gettime code to use. Hope that's enough!
MOVQ runtime·__vdso_clock_gettime_sym(SB), AX
CMPQ AX, $0
- JEQ fallback_gtod
+ JEQ fallback
MOVL $0, DI // CLOCK_REALTIME
LEAQ 0(SP), SI
CALL AX
@@ -124,7 +124,7 @@ TEXT time·now(SB),NOSPLIT,$16
MOVQ AX, sec+0(FP)
MOVL DX, nsec+8(FP)
RET
-fallback_gtod:
+fallback:
LEAQ 0(SP), DI
MOVQ $0, SI
MOVQ runtime·__vdso_gettimeofday_sym(SB), AX
@@ -141,7 +141,7 @@ TEXT runtime·nanotime(SB),NOSPLIT,$16
// See comment above in time.now.
MOVQ runtime·__vdso_clock_gettime_sym(SB), AX
CMPQ AX, $0
- JEQ fallback_gtod_nt
+ JEQ fallback
MOVL $1, DI // CLOCK_MONOTONIC
LEAQ 0(SP), SI
CALL AX
@@ -153,7 +153,7 @@ TEXT runtime·nanotime(SB),NOSPLIT,$16
ADDQ DX, AX
MOVQ AX, ret+0(FP)
RET
-fallback_gtod_nt:
+fallback:
LEAQ 0(SP), DI
MOVQ $0, SI
MOVQ runtime·__vdso_gettimeofday_sym(SB), AX
diff --git a/src/runtime/sys_linux_arm.s b/src/runtime/sys_linux_arm.s
index bd285f399..033a03642 100644
--- a/src/runtime/sys_linux_arm.s
+++ b/src/runtime/sys_linux_arm.s
@@ -373,20 +373,20 @@ TEXT cas<>(SB),NOSPLIT,$0
TEXT runtime·cas(SB),NOSPLIT,$0
MOVW ptr+0(FP), R2
MOVW old+4(FP), R0
-casagain:
+loop:
MOVW new+8(FP), R1
BL cas<>(SB)
- BCC cascheck
+ BCC check
MOVW $1, R0
MOVB R0, ret+12(FP)
RET
-cascheck:
+check:
// Kernel lies; double-check.
MOVW ptr+0(FP), R2
MOVW old+4(FP), R0
MOVW 0(R2), R3
CMP R0, R3
- BEQ casagain
+ BEQ loop
MOVW $0, R0
MOVB R0, ret+12(FP)
RET
diff --git a/src/runtime/sys_linux_power64x.s b/src/runtime/sys_linux_power64x.s
new file mode 100644
index 000000000..fb24d3e79
--- /dev/null
+++ b/src/runtime/sys_linux_power64x.s
@@ -0,0 +1,383 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build linux
+// +build power64 power64le
+
+//
+// System calls and other sys.stuff for Power64, Linux
+//
+
+#include "zasm_GOOS_GOARCH.h"
+#include "textflag.h"
+
+#define SYS_exit 1
+#define SYS_read 3
+#define SYS_write 4
+#define SYS_open 5
+#define SYS_close 6
+#define SYS_fcntl 55
+#define SYS_gettimeofday 78
+#define SYS_select 82 // always return -ENOSYS
+#define SYS_mmap 90
+#define SYS_munmap 91
+#define SYS_setitimer 104
+#define SYS_clone 120
+#define SYS_newselect 142
+#define SYS_sched_yield 158
+#define SYS_rt_sigreturn 172
+#define SYS_rt_sigaction 173
+#define SYS_rt_sigprocmask 174
+#define SYS_sigaltstack 185
+#define SYS_ugetrlimit 190
+#define SYS_madvise 205
+#define SYS_mincore 206
+#define SYS_gettid 207
+#define SYS_tkill 208
+#define SYS_futex 221
+#define SYS_sched_getaffinity 223
+#define SYS_exit_group 234
+#define SYS_epoll_create 236
+#define SYS_epoll_ctl 237
+#define SYS_epoll_wait 238
+#define SYS_clock_gettime 246
+#define SYS_epoll_create1 315
+
+TEXT runtime·exit(SB),NOSPLIT,$-8-4
+ MOVW code+0(FP), R3
+ SYSCALL $SYS_exit_group
+ RETURN
+
+TEXT runtime·exit1(SB),NOSPLIT,$-8-4
+ MOVW code+0(FP), R3
+ SYSCALL $SYS_exit
+ RETURN
+
+TEXT runtime·open(SB),NOSPLIT,$-8-20
+ MOVD name+0(FP), R3
+ MOVW mode+8(FP), R4
+ MOVW perm+12(FP), R5
+ SYSCALL $SYS_open
+ MOVW R3, ret+16(FP)
+ RETURN
+
+TEXT runtime·close(SB),NOSPLIT,$-8-12
+ MOVW fd+0(FP), R3
+ SYSCALL $SYS_close
+ MOVW R3, ret+8(FP)
+ RETURN
+
+TEXT runtime·write(SB),NOSPLIT,$-8-28
+ MOVD fd+0(FP), R3
+ MOVD p+8(FP), R4
+ MOVW n+16(FP), R5
+ SYSCALL $SYS_write
+ MOVW R3, ret+24(FP)
+ RETURN
+
+TEXT runtime·read(SB),NOSPLIT,$-8-28
+ MOVW fd+0(FP), R3
+ MOVD p+8(FP), R4
+ MOVW n+16(FP), R5
+ SYSCALL $SYS_read
+ MOVW R3, ret+24(FP)
+ RETURN
+
+TEXT runtime·getrlimit(SB),NOSPLIT,$-8-20
+ MOVW kind+0(FP), R3
+ MOVD limit+8(FP), R4
+ SYSCALL $SYS_ugetrlimit
+ MOVW R3, ret+16(FP)
+ RETURN
+
+TEXT runtime·usleep(SB),NOSPLIT,$16-4
+ MOVW usec+0(FP), R3
+ MOVD R3, R5
+ MOVW $1000000, R4
+ DIVD R4, R3
+ MOVD R3, 8(R1)
+ MULLD R3, R4
+ SUB R4, R5
+ MOVD R5, 16(R1)
+
+ // select(0, 0, 0, 0, &tv)
+ MOVW $0, R3
+ MOVW $0, R4
+ MOVW $0, R5
+ MOVW $0, R6
+ ADD $8, R1, R7
+ SYSCALL $SYS_newselect
+ RETURN
+
+TEXT runtime·raise(SB),NOSPLIT,$-8
+ SYSCALL $SYS_gettid
+ MOVW R3, R3 // arg 1 tid
+ MOVW sig+0(FP), R4 // arg 2
+ SYSCALL $SYS_tkill
+ RETURN
+
+TEXT runtime·setitimer(SB),NOSPLIT,$-8-24
+ MOVW mode+0(FP), R3
+ MOVD new+8(FP), R4
+ MOVD old+16(FP), R5
+ SYSCALL $SYS_setitimer
+ RETURN
+
+TEXT runtime·mincore(SB),NOSPLIT,$-8-28
+ MOVD addr+0(FP), R3
+ MOVD n+8(FP), R4
+ MOVD dst+16(FP), R5
+ SYSCALL $SYS_mincore
+ MOVW R3, ret+24(FP)
+ RETURN
+
+// func now() (sec int64, nsec int32)
+TEXT time·now(SB),NOSPLIT,$16
+ MOVD $0(R1), R3
+ MOVD $0, R4
+ SYSCALL $SYS_gettimeofday
+ MOVD 0(R1), R3 // sec
+ MOVD 8(R1), R5 // usec
+ MOVD $1000, R4
+ MULLD R4, R5
+ MOVD R3, sec+0(FP)
+ MOVW R5, nsec+8(FP)
+ RETURN
+
+TEXT runtime·nanotime(SB),NOSPLIT,$16
+ MOVW $1, R3 // CLOCK_MONOTONIC
+ MOVD $0(R1), R4
+ SYSCALL $SYS_clock_gettime
+ MOVD 0(R1), R3 // sec
+ MOVD 8(R1), R5 // nsec
+ // sec is in R3, nsec in R5
+ // return nsec in R3
+ MOVD $1000000000, R4
+ MULLD R4, R3
+ ADD R5, R3
+ MOVD R3, ret+0(FP)
+ RETURN
+
+TEXT runtime·rtsigprocmask(SB),NOSPLIT,$-8-28
+ MOVW sig+0(FP), R3
+ MOVD new+8(FP), R4
+ MOVD old+16(FP), R5
+ MOVW size+24(FP), R6
+ SYSCALL $SYS_rt_sigprocmask
+ BVC 2(PC)
+ MOVD R0, 0xf1(R0) // crash
+ RETURN
+
+TEXT runtime·rt_sigaction(SB),NOSPLIT,$-8-36
+ MOVD sig+0(FP), R3
+ MOVD new+8(FP), R4
+ MOVD old+16(FP), R5
+ MOVD size+24(FP), R6
+ SYSCALL $SYS_rt_sigaction
+ MOVW R3, ret+32(FP)
+ RETURN
+
+#ifdef GOARCH_power64le
+// power64le doesn't need function descriptors
+TEXT runtime·sigtramp(SB),NOSPLIT,$64
+#else
+// function descriptor for the real sigtramp
+TEXT runtime·sigtramp(SB),NOSPLIT,$-8
+ DWORD $runtime·_sigtramp(SB)
+ DWORD $0
+ DWORD $0
+TEXT runtime·_sigtramp(SB),NOSPLIT,$64
+#endif
+ // initialize essential registers (just in case)
+ BL runtime·reginit(SB)
+
+ // check that g exists
+ CMP g, $0
+ BNE 6(PC)
+ MOVD R3, 8(R1)
+ MOVD $runtime·badsignal(SB), R31
+ MOVD R31, CTR
+ BL (CTR)
+ RETURN
+
+ // save g
+ MOVD g, 40(R1)
+ MOVD g, R6
+
+ // g = m->gsignal
+ MOVD g_m(g), R7
+ MOVD m_gsignal(R7), g
+
+ MOVW R3, 8(R1)
+ MOVD R4, 16(R1)
+ MOVD R5, 24(R1)
+ MOVD R6, 32(R1)
+
+ BL runtime·sighandler(SB)
+
+ // restore g
+ MOVD 40(R1), g
+
+ RETURN
+
+TEXT runtime·mmap(SB),NOSPLIT,$-8
+ MOVD addr+0(FP), R3
+ MOVD n+8(FP), R4
+ MOVW prot+16(FP), R5
+ MOVW flags+20(FP), R6
+ MOVW fd+24(FP), R7
+ MOVW off+28(FP), R8
+
+ SYSCALL $SYS_mmap
+ MOVD R3, ret+32(FP)
+ RETURN
+
+TEXT runtime·munmap(SB),NOSPLIT,$-8
+ MOVD addr+0(FP), R3
+ MOVD n+8(FP), R4
+ SYSCALL $SYS_munmap
+ BVC 2(PC)
+ MOVD R0, 0xf3(R0)
+ RETURN
+
+TEXT runtime·madvise(SB),NOSPLIT,$-8
+ MOVD addr+0(FP), R3
+ MOVD n+8(FP), R4
+ MOVW flags+16(FP), R5
+ SYSCALL $SYS_madvise
+ // ignore failure - maybe pages are locked
+ RETURN
+
+// int64 futex(int32 *uaddr, int32 op, int32 val,
+// struct timespec *timeout, int32 *uaddr2, int32 val2);
+TEXT runtime·futex(SB),NOSPLIT,$-8
+ MOVD addr+0(FP), R3
+ MOVW op+8(FP), R4
+ MOVW val+12(FP), R5
+ MOVD ts+16(FP), R6
+ MOVD addr2+24(FP), R7
+ MOVW val3+32(FP), R8
+ SYSCALL $SYS_futex
+ MOVW R3, ret+40(FP)
+ RETURN
+
+// int64 clone(int32 flags, void *stk, M *mp, G *gp, void (*fn)(void));
+TEXT runtime·clone(SB),NOSPLIT,$-8
+ MOVW flags+0(FP), R3
+ MOVD stk+8(FP), R4
+
+ // Copy mp, gp, fn off parent stack for use by child.
+ // Careful: Linux system call clobbers ???.
+ MOVD mm+16(FP), R7
+ MOVD gg+24(FP), R8
+ MOVD fn+32(FP), R12
+
+ MOVD R7, -8(R4)
+ MOVD R8, -16(R4)
+ MOVD R12, -24(R4)
+ MOVD $1234, R7
+ MOVD R7, -32(R4)
+
+ SYSCALL $SYS_clone
+
+ // In parent, return.
+ CMP R3, $0
+ BEQ 3(PC)
+ MOVW R3, ret+40(FP)
+ RETURN
+
+ // In child, on new stack.
+ // initialize essential registers
+ BL runtime·reginit(SB)
+ MOVD -32(R1), R7
+ CMP R7, $1234
+ BEQ 2(PC)
+ MOVD R0, 0(R0)
+
+ // Initialize m->procid to Linux tid
+ SYSCALL $SYS_gettid
+
+ MOVD -24(R1), R12
+ MOVD -16(R1), R8
+ MOVD -8(R1), R7
+
+ MOVD R3, m_procid(R7)
+
+ // TODO: setup TLS.
+
+ // In child, set up new stack
+ MOVD R7, g_m(R8)
+ MOVD R8, g
+ //CALL runtime·stackcheck(SB)
+
+ // Call fn
+ MOVD R12, CTR
+ BL (CTR)
+
+ // It shouldn't return. If it does, exit
+ MOVW $111, R3
+ SYSCALL $SYS_exit_group
+ BR -2(PC) // keep exiting
+
+TEXT runtime·sigaltstack(SB),NOSPLIT,$-8
+ MOVD new+0(FP), R3
+ MOVD old+8(FP), R4
+ SYSCALL $SYS_sigaltstack
+ BVC 2(PC)
+ MOVD R0, 0xf1(R0) // crash
+ RETURN
+
+TEXT runtime·osyield(SB),NOSPLIT,$-8
+ SYSCALL $SYS_sched_yield
+ RETURN
+
+TEXT runtime·sched_getaffinity(SB),NOSPLIT,$-8
+ MOVD pid+0(FP), R3
+ MOVD len+8(FP), R4
+ MOVD buf+16(FP), R5
+ SYSCALL $SYS_sched_getaffinity
+ MOVW R3, ret+24(FP)
+ RETURN
+
+// int32 runtime·epollcreate(int32 size);
+TEXT runtime·epollcreate(SB),NOSPLIT,$-8
+ MOVW size+0(FP), R3
+ SYSCALL $SYS_epoll_create
+ MOVW R3, ret+8(FP)
+ RETURN
+
+// int32 runtime·epollcreate1(int32 flags);
+TEXT runtime·epollcreate1(SB),NOSPLIT,$-8
+ MOVW flags+0(FP), R3
+ SYSCALL $SYS_epoll_create1
+ MOVW R3, ret+8(FP)
+ RETURN
+
+// func epollctl(epfd, op, fd int32, ev *epollEvent) int
+TEXT runtime·epollctl(SB),NOSPLIT,$-8
+ MOVW epfd+0(FP), R3
+ MOVW op+4(FP), R4
+ MOVW fd+8(FP), R5
+ MOVD ev+16(FP), R6
+ SYSCALL $SYS_epoll_ctl
+ MOVW R3, ret+24(FP)
+ RETURN
+
+// int32 runtime·epollwait(int32 epfd, EpollEvent *ev, int32 nev, int32 timeout);
+TEXT runtime·epollwait(SB),NOSPLIT,$-8
+ MOVW epfd+0(FP), R3
+ MOVD ev+8(FP), R4
+ MOVW nev+16(FP), R5
+ MOVW timeout+20(FP), R6
+ SYSCALL $SYS_epoll_wait
+ MOVW R3, ret+24(FP)
+ RETURN
+
+// void runtime·closeonexec(int32 fd);
+TEXT runtime·closeonexec(SB),NOSPLIT,$-8
+ MOVW fd+0(FP), R3 // fd
+ MOVD $2, R4 // F_SETFD
+ MOVD $1, R5 // FD_CLOEXEC
+ SYSCALL $SYS_fcntl
+ RETURN
diff --git a/src/runtime/sys_nacl_386.s b/src/runtime/sys_nacl_386.s
index 47985f31f..16cd721d9 100644
--- a/src/runtime/sys_nacl_386.s
+++ b/src/runtime/sys_nacl_386.s
@@ -293,7 +293,7 @@ TEXT runtime·sigtramp(SB),NOSPLIT,$0
MOVL $0, 0(SP)
MOVL $runtime·badsignal(SB), AX
CALL AX
- JMP sigtramp_ret
+ JMP ret
// save g
MOVL DI, 20(SP)
@@ -317,7 +317,7 @@ TEXT runtime·sigtramp(SB),NOSPLIT,$0
MOVL 20(SP), BX
MOVL BX, g(CX)
-sigtramp_ret:
+ret:
// Enable exceptions again.
NACL_SYSCALL(SYS_exception_clear_flag)
diff --git a/src/runtime/sys_nacl_amd64p32.s b/src/runtime/sys_nacl_amd64p32.s
index 4eb4aacdd..9cfbef6ef 100644
--- a/src/runtime/sys_nacl_amd64p32.s
+++ b/src/runtime/sys_nacl_amd64p32.s
@@ -338,7 +338,6 @@ TEXT runtime·sigtramp(SB),NOSPLIT,$80
MOVL 20(SP), BX
MOVL BX, g(CX)
-sigtramp_ret:
// Enable exceptions again.
NACL_SYSCALL(SYS_exception_clear_flag)
diff --git a/src/runtime/sys_nacl_arm.s b/src/runtime/sys_nacl_arm.s
index d354ab483..432deadf4 100644
--- a/src/runtime/sys_nacl_arm.s
+++ b/src/runtime/sys_nacl_arm.s
@@ -269,7 +269,6 @@ TEXT runtime·sigtramp(SB),NOSPLIT,$80
// restore g
MOVW 20(R13), g
-sigtramp_ret:
// Enable exceptions again.
NACL_SYSCALL(SYS_exception_clear_flag)
diff --git a/src/runtime/sys_openbsd_386.s b/src/runtime/sys_openbsd_386.s
index 5cda7768a..b1ae5ecee 100644
--- a/src/runtime/sys_openbsd_386.s
+++ b/src/runtime/sys_openbsd_386.s
@@ -186,7 +186,7 @@ TEXT runtime·sigtramp(SB),NOSPLIT,$44
MOVL BX, 0(SP)
MOVL $runtime·badsignal(SB), AX
CALL AX
- JMP sigtramp_ret
+ JMP ret
// save g
MOVL DI, 20(SP)
@@ -212,7 +212,7 @@ TEXT runtime·sigtramp(SB),NOSPLIT,$44
MOVL 20(SP), BX
MOVL BX, g(CX)
-sigtramp_ret:
+ret:
// call sigreturn
MOVL context+8(FP), AX
MOVL $0, 0(SP) // syscall gap
diff --git a/src/runtime/sys_power64x.c b/src/runtime/sys_power64x.c
new file mode 100644
index 000000000..79d976255
--- /dev/null
+++ b/src/runtime/sys_power64x.c
@@ -0,0 +1,38 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build power64 power64le
+
+#include "runtime.h"
+
+// adjust Gobuf as if it executed a call to fn with context ctxt
+// and then did an immediate Gosave.
+void
+runtime·gostartcall(Gobuf *gobuf, void (*fn)(void), void *ctxt)
+{
+ if(gobuf->lr != 0)
+ runtime·throw("invalid use of gostartcall");
+ gobuf->lr = gobuf->pc;
+ gobuf->pc = (uintptr)fn;
+ gobuf->ctxt = ctxt;
+}
+
+// Called to rewind context saved during morestack back to beginning of function.
+// To help us, the linker emits a jmp back to the beginning right after the
+// call to morestack. We just have to decode and apply that jump.
+void
+runtime·rewindmorestack(Gobuf *gobuf)
+{
+ uint32 inst;
+
+ inst = *(uint32*)gobuf->pc;
+ if((gobuf->pc&3) == 0 && (inst>>24) == 0x4b && (inst&3) == 0) {
+ //runtime·printf("runtime: rewind pc=%p to pc=%p\n", gobuf->pc, gobuf->pc + ((int32)(inst<<8)>>8));
+ gobuf->pc += (int32)(inst<<8)>>8;
+ return;
+ }
+ runtime·printf("runtime: pc=%p %x\n", gobuf->pc, inst);
+ runtime·throw("runtime: misuse of rewindmorestack");
+}
+
diff --git a/src/runtime/sys_solaris_amd64.s b/src/runtime/sys_solaris_amd64.s
index 0ebdab6ee..3981893b0 100644
--- a/src/runtime/sys_solaris_amd64.s
+++ b/src/runtime/sys_solaris_amd64.s
@@ -287,24 +287,24 @@ TEXT runtime·usleep1(SB),NOSPLIT,$0
// Execute call on m->g0.
get_tls(R15)
CMPQ R15, $0
- JE usleep1_noswitch
+ JE noswitch
MOVQ g(R15), R13
CMPQ R13, $0
- JE usleep1_noswitch
+ JE noswitch
MOVQ g_m(R13), R13
CMPQ R13, $0
- JE usleep1_noswitch
+ JE noswitch
// TODO(aram): do something about the cpu profiler here.
MOVQ m_g0(R13), R14
CMPQ g(R15), R14
- JNE usleep1_switch
+ JNE switch
// executing on m->g0 already
CALL AX
RET
-usleep1_switch:
+switch:
// Switch to m->g0 stack and back.
MOVQ (g_sched+gobuf_sp)(R14), R14
MOVQ SP, -8(R14)
@@ -313,7 +313,7 @@ usleep1_switch:
MOVQ 0(SP), SP
RET
-usleep1_noswitch:
+noswitch:
// Not a Go-managed thread. Do not switch stack.
CALL AX
RET
diff --git a/src/runtime/sys_windows_386.s b/src/runtime/sys_windows_386.s
index 932fe9dd2..13fb5bdc9 100644
--- a/src/runtime/sys_windows_386.s
+++ b/src/runtime/sys_windows_386.s
@@ -106,7 +106,7 @@ TEXT runtime·sigtramp(SB),NOSPLIT,$0-0
MOVL g_m(DX), BX
MOVL m_g0(BX), BX
CMPL DX, BX
- JEQ sigtramp_g0
+ JEQ g0
// switch to the g0 stack
get_tls(BP)
@@ -123,7 +123,7 @@ TEXT runtime·sigtramp(SB),NOSPLIT,$0-0
MOVL SP, 36(DI)
MOVL DI, SP
-sigtramp_g0:
+g0:
MOVL 0(CX), BX // ExceptionRecord*
MOVL 4(CX), CX // Context*
MOVL BX, 0(SP)
@@ -383,12 +383,12 @@ TEXT runtime·usleep1(SB),NOSPLIT,$0
MOVL m_g0(BP), SI
CMPL g(CX), SI
- JNE usleep1_switch
+ JNE switch
// executing on m->g0 already
CALL AX
- JMP usleep1_ret
+ JMP ret
-usleep1_switch:
+switch:
// Switch to m->g0 stack and back.
MOVL (g_sched+gobuf_sp)(SI), SI
MOVL SP, -4(SI)
@@ -396,7 +396,7 @@ usleep1_switch:
CALL AX
MOVL 0(SP), SP
-usleep1_ret:
+ret:
get_tls(CX)
MOVL g(CX), BP
MOVL g_m(BP), BP
diff --git a/src/runtime/sys_windows_amd64.s b/src/runtime/sys_windows_amd64.s
index e6190ce68..8b95f6d6c 100644
--- a/src/runtime/sys_windows_amd64.s
+++ b/src/runtime/sys_windows_amd64.s
@@ -138,7 +138,7 @@ TEXT runtime·sigtramp(SB),NOSPLIT,$0-0
MOVQ g_m(DX), BX
MOVQ m_g0(BX), BX
CMPQ DX, BX
- JEQ sigtramp_g0
+ JEQ g0
// switch to g0 stack
get_tls(BP)
@@ -157,7 +157,7 @@ TEXT runtime·sigtramp(SB),NOSPLIT,$0-0
MOVQ SP, 104(DI)
MOVQ DI, SP
-sigtramp_g0:
+g0:
MOVQ 0(CX), BX // ExceptionRecord*
MOVQ 8(CX), CX // Context*
MOVQ BX, 0(SP)
@@ -407,12 +407,12 @@ TEXT runtime·usleep1(SB),NOSPLIT,$0
MOVQ m_g0(R13), R14
CMPQ g(R15), R14
- JNE usleep1_switch
+ JNE switch
// executing on m->g0 already
CALL AX
- JMP usleep1_ret
+ JMP ret
-usleep1_switch:
+switch:
// Switch to m->g0 stack and back.
MOVQ (g_sched+gobuf_sp)(R14), R14
MOVQ SP, -8(R14)
@@ -420,7 +420,7 @@ usleep1_switch:
CALL AX
MOVQ 0(SP), SP
-usleep1_ret:
+ret:
MOVQ $0, m_libcallsp(R13)
RET
diff --git a/src/runtime/sys_x86.c b/src/runtime/sys_x86.c
index a450b3e58..edbe47ff4 100644
--- a/src/runtime/sys_x86.c
+++ b/src/runtime/sys_x86.c
@@ -20,6 +20,7 @@ runtime·gostartcall(Gobuf *gobuf, void (*fn)(void), void *ctxt)
gobuf->sp = (uintptr)sp;
gobuf->pc = (uintptr)fn;
gobuf->ctxt = ctxt;
+ runtime·writebarrierptr_nostore(&gobuf->ctxt, ctxt);
}
// Called to rewind context saved during morestack back to beginning of function.
diff --git a/src/runtime/thunk.s b/src/runtime/thunk.s
index 0a0f147c4..1a5b65502 100644
--- a/src/runtime/thunk.s
+++ b/src/runtime/thunk.s
@@ -10,6 +10,12 @@
#ifdef GOARCH_arm
#define JMP B
#endif
+#ifdef GOARCH_power64
+#define JMP BR
+#endif
+#ifdef GOARCH_power64le
+#define JMP BR
+#endif
TEXT net·runtimeNano(SB),NOSPLIT,$0-0
JMP runtime·nanotime(SB)
diff --git a/src/runtime/wbfat.go b/src/runtime/wbfat.go
new file mode 100644
index 000000000..75c58b26b
--- /dev/null
+++ b/src/runtime/wbfat.go
@@ -0,0 +1,190 @@
+// generated by wbfat_gen.go; use go generate
+
+package runtime
+
+//go:nosplit
+func writebarrierfat01(dst *[2]uintptr, _ *byte, src [2]uintptr) {
+ dst[0] = src[0]
+ writebarrierptr(&dst[1], src[1])
+}
+
+//go:nosplit
+func writebarrierfat10(dst *[2]uintptr, _ *byte, src [2]uintptr) {
+ writebarrierptr(&dst[0], src[0])
+ dst[1] = src[1]
+}
+
+//go:nosplit
+func writebarrierfat11(dst *[2]uintptr, _ *byte, src [2]uintptr) {
+ writebarrierptr(&dst[0], src[0])
+ writebarrierptr(&dst[1], src[1])
+}
+
+//go:nosplit
+func writebarrierfat001(dst *[3]uintptr, _ *byte, src [3]uintptr) {
+ dst[0] = src[0]
+ dst[1] = src[1]
+ writebarrierptr(&dst[2], src[2])
+}
+
+//go:nosplit
+func writebarrierfat010(dst *[3]uintptr, _ *byte, src [3]uintptr) {
+ dst[0] = src[0]
+ writebarrierptr(&dst[1], src[1])
+ dst[2] = src[2]
+}
+
+//go:nosplit
+func writebarrierfat011(dst *[3]uintptr, _ *byte, src [3]uintptr) {
+ dst[0] = src[0]
+ writebarrierptr(&dst[1], src[1])
+ writebarrierptr(&dst[2], src[2])
+}
+
+//go:nosplit
+func writebarrierfat100(dst *[3]uintptr, _ *byte, src [3]uintptr) {
+ writebarrierptr(&dst[0], src[0])
+ dst[1] = src[1]
+ dst[2] = src[2]
+}
+
+//go:nosplit
+func writebarrierfat101(dst *[3]uintptr, _ *byte, src [3]uintptr) {
+ writebarrierptr(&dst[0], src[0])
+ dst[1] = src[1]
+ writebarrierptr(&dst[2], src[2])
+}
+
+//go:nosplit
+func writebarrierfat110(dst *[3]uintptr, _ *byte, src [3]uintptr) {
+ writebarrierptr(&dst[0], src[0])
+ writebarrierptr(&dst[1], src[1])
+ dst[2] = src[2]
+}
+
+//go:nosplit
+func writebarrierfat111(dst *[3]uintptr, _ *byte, src [3]uintptr) {
+ writebarrierptr(&dst[0], src[0])
+ writebarrierptr(&dst[1], src[1])
+ writebarrierptr(&dst[2], src[2])
+}
+
+//go:nosplit
+func writebarrierfat0001(dst *[4]uintptr, _ *byte, src [4]uintptr) {
+ dst[0] = src[0]
+ dst[1] = src[1]
+ dst[2] = src[2]
+ writebarrierptr(&dst[3], src[3])
+}
+
+//go:nosplit
+func writebarrierfat0010(dst *[4]uintptr, _ *byte, src [4]uintptr) {
+ dst[0] = src[0]
+ dst[1] = src[1]
+ writebarrierptr(&dst[2], src[2])
+ dst[3] = src[3]
+}
+
+//go:nosplit
+func writebarrierfat0011(dst *[4]uintptr, _ *byte, src [4]uintptr) {
+ dst[0] = src[0]
+ dst[1] = src[1]
+ writebarrierptr(&dst[2], src[2])
+ writebarrierptr(&dst[3], src[3])
+}
+
+//go:nosplit
+func writebarrierfat0100(dst *[4]uintptr, _ *byte, src [4]uintptr) {
+ dst[0] = src[0]
+ writebarrierptr(&dst[1], src[1])
+ dst[2] = src[2]
+ dst[3] = src[3]
+}
+
+//go:nosplit
+func writebarrierfat0101(dst *[4]uintptr, _ *byte, src [4]uintptr) {
+ dst[0] = src[0]
+ writebarrierptr(&dst[1], src[1])
+ dst[2] = src[2]
+ writebarrierptr(&dst[3], src[3])
+}
+
+//go:nosplit
+func writebarrierfat0110(dst *[4]uintptr, _ *byte, src [4]uintptr) {
+ dst[0] = src[0]
+ writebarrierptr(&dst[1], src[1])
+ writebarrierptr(&dst[2], src[2])
+ dst[3] = src[3]
+}
+
+//go:nosplit
+func writebarrierfat0111(dst *[4]uintptr, _ *byte, src [4]uintptr) {
+ dst[0] = src[0]
+ writebarrierptr(&dst[1], src[1])
+ writebarrierptr(&dst[2], src[2])
+ writebarrierptr(&dst[3], src[3])
+}
+
+//go:nosplit
+func writebarrierfat1000(dst *[4]uintptr, _ *byte, src [4]uintptr) {
+ writebarrierptr(&dst[0], src[0])
+ dst[1] = src[1]
+ dst[2] = src[2]
+ dst[3] = src[3]
+}
+
+//go:nosplit
+func writebarrierfat1001(dst *[4]uintptr, _ *byte, src [4]uintptr) {
+ writebarrierptr(&dst[0], src[0])
+ dst[1] = src[1]
+ dst[2] = src[2]
+ writebarrierptr(&dst[3], src[3])
+}
+
+//go:nosplit
+func writebarrierfat1010(dst *[4]uintptr, _ *byte, src [4]uintptr) {
+ writebarrierptr(&dst[0], src[0])
+ dst[1] = src[1]
+ writebarrierptr(&dst[2], src[2])
+ dst[3] = src[3]
+}
+
+//go:nosplit
+func writebarrierfat1011(dst *[4]uintptr, _ *byte, src [4]uintptr) {
+ writebarrierptr(&dst[0], src[0])
+ dst[1] = src[1]
+ writebarrierptr(&dst[2], src[2])
+ writebarrierptr(&dst[3], src[3])
+}
+
+//go:nosplit
+func writebarrierfat1100(dst *[4]uintptr, _ *byte, src [4]uintptr) {
+ writebarrierptr(&dst[0], src[0])
+ writebarrierptr(&dst[1], src[1])
+ dst[2] = src[2]
+ dst[3] = src[3]
+}
+
+//go:nosplit
+func writebarrierfat1101(dst *[4]uintptr, _ *byte, src [4]uintptr) {
+ writebarrierptr(&dst[0], src[0])
+ writebarrierptr(&dst[1], src[1])
+ dst[2] = src[2]
+ writebarrierptr(&dst[3], src[3])
+}
+
+//go:nosplit
+func writebarrierfat1110(dst *[4]uintptr, _ *byte, src [4]uintptr) {
+ writebarrierptr(&dst[0], src[0])
+ writebarrierptr(&dst[1], src[1])
+ writebarrierptr(&dst[2], src[2])
+ dst[3] = src[3]
+}
+
+//go:nosplit
+func writebarrierfat1111(dst *[4]uintptr, _ *byte, src [4]uintptr) {
+ writebarrierptr(&dst[0], src[0])
+ writebarrierptr(&dst[1], src[1])
+ writebarrierptr(&dst[2], src[2])
+ writebarrierptr(&dst[3], src[3])
+}
diff --git a/src/runtime/wbfat_gen.go b/src/runtime/wbfat_gen.go
new file mode 100644
index 000000000..78d5b6271
--- /dev/null
+++ b/src/runtime/wbfat_gen.go
@@ -0,0 +1,41 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build ignore
+
+package main
+
+import (
+ "flag"
+ "fmt"
+ "log"
+ "os"
+)
+
+func main() {
+ flag.Parse()
+ if flag.NArg() > 0 {
+ f, err := os.Create(flag.Arg(0))
+ if err != nil {
+ log.Fatal(err)
+ }
+ os.Stdout = f
+ }
+ fmt.Printf("// generated by wbfat_gen.go; use go generate\n\n")
+ fmt.Printf("package runtime\n")
+ for i := uint(2); i <= 4; i++ {
+ for j := 1; j < 1<<i; j++ {
+ fmt.Printf("\n//go:nosplit\n")
+ fmt.Printf("func writebarrierfat%0*b(dst *[%d]uintptr, _ *byte, src [%d]uintptr) {\n", int(i), j, i, i)
+ for k := uint(0); k < i; k++ {
+ if j&(1<<(i-1-k)) != 0 {
+ fmt.Printf("\twritebarrierptr(&dst[%d], src[%d])\n", k, k)
+ } else {
+ fmt.Printf("\tdst[%d] = src[%d]\n", k, k)
+ }
+ }
+ fmt.Printf("}\n")
+ }
+ }
+}