summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael Anthony Knyszek <mknyszek@google.com>2023-05-12 18:55:25 +0000
committerGopher Robot <gobot@golang.org>2023-05-17 15:50:17 +0000
commit865179164ec4ce5125df3e7378b8353975224216 (patch)
tree24f4108ae4ebd94d3b3e00aa3e7fbc3cfbfd73c1
parente3ada56537e0fdf04111de01a336902b1e0fb9ab (diff)
downloadgo-git-865179164ec4ce5125df3e7378b8353975224216.tar.gz
runtime: replace sysBlockTraced with tracedSyscallEnter
sysBlockTraced is a subtle and confusing flag. Currently, it's only used in one place: a condition around whether to traceGoSysExit when a goroutine is about to start running. That condition looks like "gp.syscallsp != 0 && gp.trace.sysBlockTraced". In every case but one, "gp.syscallsp != 0" is equivalent to "gp.trace.sysBlockTraced." That one case is where a goroutine is running without a P and racing with trace start (world is stopped). It switches itself back to _Grunnable from _Gsyscall before the trace start goroutine notices, such that the trace start goroutine fails to emit a EvGoInSyscall event for it (EvGoInSyscall or EvGoSysBlock must precede any EvGoSysExit event). sysBlockTraced is set unconditionally on every syscall entry and the trace start goroutine clears it if there was no EvGoInSyscall event emitted (i.e. did not observe _Gsyscall on the goroutine). That way when the goroutine-without-a-P wakes up and gets scheduled, it only emits EvGoSysExit if the flag is set, i.e. trace start didn't _clear_ the flag. What makes this confusing is the fact that the flag is set unconditionally and the code relies on it being *cleared*. Really, all it's trying to communicate is whether the tracer is aware of a goroutine's syscall at the point where a goroutine that lost its P during a syscall is trying to run again. Therefore, we can replace this flag with a less subtle one: tracedSyscallEnter. It is set when GoSysCall is traced, indicating on the goroutine that the tracer is aware of the syscall. Later, if traceGoSysExit is called, the tracer knows its safe to emit an event because the tracer is aware of the syscall. This flag is then also set at trace start, when it emits EvGoInSyscall, which again, lets the goroutine know the tracer is aware of its syscall. The flag is cleared by GoSysExit to indicate that the tracer is no longer aware of any syscalls on the goroutine. It's also cleared by trace start. This is necessary because a syscall may have been started while a trace was stopping. If the GoSysExit isn't emitted (because it races with the trace end STW) then the flag will be left set at the start of the next trace period, which will result in an erroneous GoSysExit. Instead, the flag is cleared in the same way sysBlockTraced is today: if the tracer doesn't notice the goroutine is in a syscall, it makes that explicit to the goroutine. A more direct flag to use here would be one that explicitly indicates whether EvGoInSyscall or EvGoSysBlock specifically were already emitted for a goroutine. The reason why we don't just do this is because setting the flag when EvGoSysBlock is emitted would be racy: EvGoSysBlock is emitted by whatever thread is stealing the P out from under the syscalling goroutine, so it would need to synchronize with the goroutine its emitting the event for. The end result of all this is that the new flag can be managed entirely within trace.go, hiding another implementation detail about the tracer. Tested with `stress ./trace.test -test.run="TestTraceStressStartStop"` which was occasionally failing before the CL in which sysBlockTraced was added (CL 9132). I also confirmed also that this test is still sensitive to `EvGoSysExit` by removing the one use of sysBlockTraced. The result is about a 5% error rate. If there is something very subtly wrong about how this CL emits `EvGoSysExit`, I would expect to see it as a test failure. Instead: 53m55s: 200434 runs so far, 0 failures Change-Id: If1d24ee6b6926eec7e90cdb66039a5abac819d9b Reviewed-on: https://go-review.googlesource.com/c/go/+/494715 TryBot-Result: Gopher Robot <gobot@golang.org> Reviewed-by: Michael Pratt <mpratt@google.com> Run-TryBot: Michael Knyszek <mknyszek@google.com> Auto-Submit: Michael Knyszek <mknyszek@google.com>
-rw-r--r--src/runtime/proc.go5
-rw-r--r--src/runtime/trace.go25
2 files changed, 21 insertions, 9 deletions
diff --git a/src/runtime/proc.go b/src/runtime/proc.go
index 363e8befe6..fd892115bf 100644
--- a/src/runtime/proc.go
+++ b/src/runtime/proc.go
@@ -1999,7 +1999,6 @@ func oneNewExtraM() {
mp.lockedg.set(gp)
gp.lockedm.set(mp)
gp.goid = sched.goidgen.Add(1)
- gp.trace.sysBlockTraced = true
if raceenabled {
gp.racectx = racegostart(abi.FuncPCABIInternal(newextram) + sys.PCQuantum)
}
@@ -2705,7 +2704,7 @@ func execute(gp *g, inheritTime bool) {
if traceEnabled() {
// GoSysExit has to happen when we have a P, but before GoStart.
// So we emit it here.
- if gp.syscallsp != 0 && gp.trace.sysBlockTraced {
+ if gp.syscallsp != 0 {
traceGoSysExit()
}
traceGoStart()
@@ -3856,7 +3855,6 @@ func reentersyscall(pc, sp uintptr) {
}
gp.m.syscalltick = gp.m.p.ptr().syscalltick
- gp.trace.sysBlockTraced = true
pp := gp.m.p.ptr()
pp.m = 0
gp.m.oldp.set(pp)
@@ -3917,7 +3915,6 @@ func entersyscallblock() {
gp.throwsplit = true
gp.stackguard0 = stackPreempt // see comment in entersyscall
gp.m.syscalltick = gp.m.p.ptr().syscalltick
- gp.trace.sysBlockTraced = true
gp.m.p.ptr().syscalltick++
// Leave SP around for GC and traceback.
diff --git a/src/runtime/trace.go b/src/runtime/trace.go
index 9c7792d42b..45a066e7a2 100644
--- a/src/runtime/trace.go
+++ b/src/runtime/trace.go
@@ -163,10 +163,10 @@ var trace struct {
// gTraceState is per-G state for the tracer.
type gTraceState struct {
- sysExitTicks int64 // cputicks when syscall has returned
- sysBlockTraced bool // StartTrace has emitted EvGoInSyscall about this goroutine
- seq uint64 // trace event sequencer
- lastP puintptr // last P emitted an event for this goroutine
+ sysExitTicks int64 // cputicks when syscall has returned
+ tracedSyscallEnter bool // syscall or cgo was entered while trace was enabled or StartTrace has emitted EvGoInSyscall about this goroutine
+ seq uint64 // trace event sequencer
+ lastP puintptr // last P emitted an event for this goroutine
}
// mTraceState is per-M state for the tracer.
@@ -309,6 +309,7 @@ func StartTrace() error {
}
if status == _Gsyscall {
gp.trace.seq++
+ gp.trace.tracedSyscallEnter = true
traceEvent(traceEvGoInSyscall, -1, gp.goid)
} else if status == _Gdead && gp.m != nil && gp.m.isextra {
// Trigger two trace events for the dead g in the extra m,
@@ -320,9 +321,16 @@ func StartTrace() error {
id := trace.stackTab.put([]uintptr{logicalStackSentinel, startPCforTrace(0) + sys.PCQuantum}) // no start pc
traceEvent(traceEvGoCreate, -1, gp.goid, uint64(id), stackID)
gp.trace.seq++
+ gp.trace.tracedSyscallEnter = true
traceEvent(traceEvGoInSyscall, -1, gp.goid)
} else {
- gp.trace.sysBlockTraced = false
+ // We need to explicitly clear the flag. A previous trace might have ended with a goroutine
+ // not emitting a GoSysExit and clearing the flag, leaving it in a stale state. Clearing
+ // it here makes it unambiguous to any goroutine exiting a syscall racing with us that
+ // no EvGoInSyscall event was emitted for it. (It's not racy to set this flag here, because
+ // it'll only get checked when the goroutine runs again, which will be after the world starts
+ // again.)
+ gp.trace.tracedSyscallEnter = false
}
})
traceProcStart()
@@ -1603,11 +1611,18 @@ func traceGoSysCall() {
// Skip the extra trampoline frame used on most systems.
skip = 4
}
+ getg().m.curg.trace.tracedSyscallEnter = true
traceEvent(traceEvGoSysCall, skip)
}
func traceGoSysExit() {
gp := getg().m.curg
+ if !gp.trace.tracedSyscallEnter {
+ // There was no syscall entry traced for us at all, so there's definitely
+ // no EvGoSysBlock or EvGoInSyscall before us, which EvGoSysExit requires.
+ return
+ }
+ gp.trace.tracedSyscallEnter = false
ts := gp.trace.sysExitTicks
if ts != 0 && ts < trace.ticksStart {
// There is a race between the code that initializes sysExitTicks