summaryrefslogtreecommitdiff
path: root/libgo/go/runtime/mgcscavenge.go
diff options
context:
space:
mode:
Diffstat (limited to 'libgo/go/runtime/mgcscavenge.go')
-rw-r--r--libgo/go/runtime/mgcscavenge.go395
1 files changed, 205 insertions, 190 deletions
diff --git a/libgo/go/runtime/mgcscavenge.go b/libgo/go/runtime/mgcscavenge.go
index 3fa1c4604d7..adf2b058af4 100644
--- a/libgo/go/runtime/mgcscavenge.go
+++ b/libgo/go/runtime/mgcscavenge.go
@@ -56,6 +56,7 @@
package runtime
import (
+ "internal/goos"
"runtime/internal/atomic"
"runtime/internal/sys"
"unsafe"
@@ -90,7 +91,7 @@ const (
//
// This ratio is used as part of multiplicative factor to help the scavenger account
// for the additional costs of using scavenged memory in its pacing.
- scavengeCostRatio = 0.7 * (sys.GoosDarwin + sys.GoosIos)
+ scavengeCostRatio = 0.7 * (goos.IsDarwin + goos.IsIos)
// scavengeReservationShards determines the amount of memory the scavenger
// should reserve for scavenging at a time. Specifically, the amount of
@@ -104,7 +105,8 @@ func heapRetained() uint64 {
}
// gcPaceScavenger updates the scavenger's pacing, particularly
-// its rate and RSS goal.
+// its rate and RSS goal. For this, it requires the current heapGoal,
+// and the heapGoal for the previous GC cycle.
//
// The RSS goal is based on the current heap goal with a small overhead
// to accommodate non-determinism in the allocator.
@@ -112,18 +114,22 @@ func heapRetained() uint64 {
// The pacing is based on scavengePageRate, which applies to both regular and
// huge pages. See that constant for more information.
//
+// Must be called whenever GC pacing is updated.
+//
// mheap_.lock must be held or the world must be stopped.
-func gcPaceScavenger() {
+func gcPaceScavenger(heapGoal, lastHeapGoal uint64) {
+ assertWorldStoppedOrLockHeld(&mheap_.lock)
+
// If we're called before the first GC completed, disable scavenging.
// We never scavenge before the 2nd GC cycle anyway (we don't have enough
// information about the heap yet) so this is fine, and avoids a fault
// or garbage data later.
- if gcController.lastHeapGoal == 0 {
- mheap_.scavengeGoal = ^uint64(0)
+ if lastHeapGoal == 0 {
+ atomic.Store64(&mheap_.scavengeGoal, ^uint64(0))
return
}
// Compute our scavenging goal.
- goalRatio := float64(atomic.Load64(&gcController.heapGoal)) / float64(gcController.lastHeapGoal)
+ goalRatio := float64(heapGoal) / float64(lastHeapGoal)
retainedGoal := uint64(float64(memstats.last_heap_inuse) * goalRatio)
// Add retainExtraPercent overhead to retainedGoal. This calculation
// looks strange but the purpose is to arrive at an integer division
@@ -151,10 +157,10 @@ func gcPaceScavenger() {
// the background scavenger. We disable the background scavenger if there's
// less than one physical page of work to do because it's not worth it.
if retainedNow <= retainedGoal || retainedNow-retainedGoal < uint64(physPageSize) {
- mheap_.scavengeGoal = ^uint64(0)
+ atomic.Store64(&mheap_.scavengeGoal, ^uint64(0))
return
}
- mheap_.scavengeGoal = retainedGoal
+ atomic.Store64(&mheap_.scavengeGoal, retainedGoal)
}
// Sleep/wait state of the background scavenger.
@@ -249,7 +255,7 @@ func scavengeSleep(ns int64) int64 {
// The background scavenger maintains the RSS of the application below
// the line described by the proportional scavenging statistics in
// the mheap struct.
-func bgscavenge() {
+func bgscavenge(c chan int) {
setSystemGoroutine()
scavenge.g = getg()
@@ -259,56 +265,93 @@ func bgscavenge() {
scavenge.parked = true
scavenge.timer = new(timer)
- scavenge.timer.f = func(_ interface{}, _ uintptr) {
+ scavenge.timer.f = func(_ any, _ uintptr) {
wakeScavenger()
}
- gcenable_setup <- 1
+ c <- 1
goparkunlock(&scavenge.lock, waitReasonGCScavengeWait, traceEvGoBlock, 1)
- // Exponentially-weighted moving average of the fraction of time this
- // goroutine spends scavenging (that is, percent of a single CPU).
- // It represents a measure of scheduling overheads which might extend
- // the sleep or the critical time beyond what's expected. Assume no
- // overhead to begin with.
- //
- // TODO(mknyszek): Consider making this based on total CPU time of the
- // application (i.e. scavengePercent * GOMAXPROCS). This isn't really
- // feasible now because the scavenger acquires the heap lock over the
- // scavenging operation, which means scavenging effectively blocks
- // allocators and isn't scalable. However, given a scalable allocator,
- // it makes sense to also make the scavenger scale with it; if you're
- // allocating more frequently, then presumably you're also generating
- // more work for the scavenger.
- const idealFraction = scavengePercent / 100.0
- scavengeEWMA := float64(idealFraction)
+ // idealFraction is the ideal % of overall application CPU time that we
+ // spend scavenging.
+ idealFraction := float64(scavengePercent) / 100.0
+ // Input: fraction of CPU time used.
+ // Setpoint: idealFraction.
+ // Output: ratio of critical time to sleep time (determines sleep time).
+ //
+ // The output of this controller is somewhat indirect to what we actually
+ // want to achieve: how much time to sleep for. The reason for this definition
+ // is to ensure that the controller's outputs have a direct relationship with
+ // its inputs (as opposed to an inverse relationship), making it somewhat
+ // easier to reason about for tuning purposes.
+ critSleepController := piController{
+ // Tuned loosely via Ziegler-Nichols process.
+ kp: 0.3375,
+ ti: 3.2e6,
+ tt: 1e9, // 1 second reset time.
+
+ // These ranges seem wide, but we want to give the controller plenty of
+ // room to hunt for the optimal value.
+ min: 0.001, // 1:1000
+ max: 1000.0, // 1000:1
+ }
+ // It doesn't really matter what value we start at, but we can't be zero, because
+ // that'll cause divide-by-zero issues.
+ critSleepRatio := 0.001
for {
released := uintptr(0)
-
- // Time in scavenging critical section.
crit := float64(0)
- // Run on the system stack since we grab the heap lock,
- // and a stack growth with the heap lock means a deadlock.
- systemstack(func() {
- lock(&mheap_.lock)
-
+ // Spend at least 1 ms scavenging, otherwise the corresponding
+ // sleep time to maintain our desired utilization is too low to
+ // be reliable.
+ const minCritTime = 1e6
+ for crit < minCritTime {
// If background scavenging is disabled or if there's no work to do just park.
- retained, goal := heapRetained(), mheap_.scavengeGoal
+ retained, goal := heapRetained(), atomic.Load64(&mheap_.scavengeGoal)
if retained <= goal {
- unlock(&mheap_.lock)
- return
+ break
}
- // Scavenge one page, and measure the amount of time spent scavenging.
+ // scavengeQuantum is the amount of memory we try to scavenge
+ // in one go. A smaller value means the scavenger is more responsive
+ // to the scheduler in case of e.g. preemption. A larger value means
+ // that the overheads of scavenging are better amortized, so better
+ // scavenging throughput.
+ //
+ // The current value is chosen assuming a cost of ~10µs/physical page
+ // (this is somewhat pessimistic), which implies a worst-case latency of
+ // about 160µs for 4 KiB physical pages. The current value is biased
+ // toward latency over throughput.
+ const scavengeQuantum = 64 << 10
+
+ // Accumulate the amount of time spent scavenging.
start := nanotime()
- released = mheap_.pages.scavenge(physPageSize, true)
- mheap_.pages.scav.released += released
- crit = float64(nanotime() - start)
+ r := mheap_.pages.scavenge(scavengeQuantum)
+ atomic.Xadduintptr(&mheap_.pages.scav.released, r)
+ end := nanotime()
- unlock(&mheap_.lock)
- })
+ // On some platforms we may see end >= start if the time it takes to scavenge
+ // memory is less than the minimum granularity of its clock (e.g. Windows) or
+ // due to clock bugs.
+ //
+ // In this case, just assume scavenging takes 10 µs per regular physical page
+ // (determined empirically), and conservatively ignore the impact of huge pages
+ // on timing.
+ const approxCritNSPerPhysicalPage = 10e3
+ if end <= start {
+ crit += approxCritNSPerPhysicalPage * float64(r/physPageSize)
+ } else {
+ crit += float64(end - start)
+ }
+ released += r
+
+ // When using fake time just do one loop.
+ if faketime != 0 {
+ break
+ }
+ }
if released == 0 {
lock(&scavenge.lock)
@@ -325,18 +368,13 @@ func bgscavenge() {
throw("released less than one physical page of memory")
}
- // On some platforms we may see crit as zero if the time it takes to scavenge
- // memory is less than the minimum granularity of its clock (e.g. Windows).
- // In this case, just assume scavenging takes 10 µs per regular physical page
- // (determined empirically), and conservatively ignore the impact of huge pages
- // on timing.
- //
- // We shouldn't ever see a crit value less than zero unless there's a bug of
- // some kind, either on our side or in the platform we're running on, but be
- // defensive in that case as well.
- const approxCritNSPerPhysicalPage = 10e3
- if crit <= 0 {
- crit = approxCritNSPerPhysicalPage * float64(released/physPageSize)
+ if crit < minCritTime {
+ // This means there wasn't enough work to actually fill up minCritTime.
+ // That's fine; we shouldn't try to do anything with this information
+ // because it's going result in a short enough sleep request that things
+ // will get messy. Just assume we did at least this much work.
+ // All this means is that we'll sleep longer than we otherwise would have.
+ crit = minCritTime
}
// Multiply the critical time by 1 + the ratio of the costs of using
@@ -347,41 +385,19 @@ func bgscavenge() {
// because of the additional overheads of using scavenged memory.
crit *= 1 + scavengeCostRatio
- // If we spent more than 10 ms (for example, if the OS scheduled us away, or someone
- // put their machine to sleep) in the critical section, bound the time we use to
- // calculate at 10 ms to avoid letting the sleep time get arbitrarily high.
- const maxCrit = 10e6
- if crit > maxCrit {
- crit = maxCrit
- }
+ // Go to sleep for our current sleepNS.
+ slept := scavengeSleep(int64(crit / critSleepRatio))
- // Compute the amount of time to sleep, assuming we want to use at most
- // scavengePercent of CPU time. Take into account scheduling overheads
- // that may extend the length of our sleep by multiplying by how far
- // off we are from the ideal ratio. For example, if we're sleeping too
- // much, then scavengeEMWA < idealFraction, so we'll adjust the sleep time
- // down.
- adjust := scavengeEWMA / idealFraction
- sleepTime := int64(adjust * crit / (scavengePercent / 100.0))
-
- // Go to sleep.
- slept := scavengeSleep(sleepTime)
-
- // Compute the new ratio.
- fraction := crit / (crit + float64(slept))
-
- // Set a lower bound on the fraction.
- // Due to OS-related anomalies we may "sleep" for an inordinate amount
- // of time. Let's avoid letting the ratio get out of hand by bounding
- // the sleep time we use in our EWMA.
- const minFraction = 1.0 / 1000.0
- if fraction < minFraction {
- fraction = minFraction
- }
+ // Calculate the CPU time spent.
+ //
+ // This may be slightly inaccurate with respect to GOMAXPROCS, but we're
+ // recomputing this often enough relative to GOMAXPROCS changes in general
+ // (it only changes when the world is stopped, and not during a GC) that
+ // that small inaccuracy is in the noise.
+ cpuFraction := float64(crit) / ((float64(slept) + crit) * float64(gomaxprocs))
- // Update scavengeEWMA by merging in the new crit/slept ratio.
- const alpha = 0.5
- scavengeEWMA = alpha*fraction + (1-alpha)*scavengeEWMA
+ // Update the critSleepRatio, adjusting until we reach our ideal fraction.
+ critSleepRatio = critSleepController.next(cpuFraction, idealFraction, float64(slept)+crit)
}
}
@@ -391,16 +407,7 @@ func bgscavenge() {
// back to the top of the heap.
//
// Returns the amount of memory scavenged in bytes.
-//
-// p.mheapLock must be held, but may be temporarily released if
-// mayUnlock == true.
-//
-// Must run on the system stack because p.mheapLock must be held.
-//
-//go:systemstack
-func (p *pageAlloc) scavenge(nbytes uintptr, mayUnlock bool) uintptr {
- assertLockHeld(p.mheapLock)
-
+func (p *pageAlloc) scavenge(nbytes uintptr) uintptr {
var (
addrs addrRange
gen uint32
@@ -412,9 +419,11 @@ func (p *pageAlloc) scavenge(nbytes uintptr, mayUnlock bool) uintptr {
break
}
}
- r, a := p.scavengeOne(addrs, nbytes-released, mayUnlock)
- released += r
- addrs = a
+ systemstack(func() {
+ r, a := p.scavengeOne(addrs, nbytes-released)
+ released += r
+ addrs = a
+ })
}
// Only unreserve the space which hasn't been scavenged or searched
// to ensure we always make progress.
@@ -452,8 +461,9 @@ func printScavTrace(gen uint32, released uintptr, forced bool) {
func (p *pageAlloc) scavengeStartGen() {
assertLockHeld(p.mheapLock)
+ lock(&p.scav.lock)
if debug.scavtrace > 0 {
- printScavTrace(p.scav.gen, p.scav.released, false)
+ printScavTrace(p.scav.gen, atomic.Loaduintptr(&p.scav.released), false)
}
p.inUse.cloneInto(&p.scav.inUse)
@@ -483,9 +493,10 @@ func (p *pageAlloc) scavengeStartGen() {
// arena in size, so virtually every heap has the scavenger on.
p.scav.reservationBytes = alignUp(p.inUse.totalBytes, pallocChunkBytes) / scavengeReservationShards
p.scav.gen++
- p.scav.released = 0
+ atomic.Storeuintptr(&p.scav.released, 0)
p.scav.freeHWM = minOffAddr
p.scav.scavLWM = maxOffAddr
+ unlock(&p.scav.lock)
}
// scavengeReserve reserves a contiguous range of the address space
@@ -494,14 +505,9 @@ func (p *pageAlloc) scavengeStartGen() {
// first.
//
// Returns the reserved range and the scavenge generation number for it.
-//
-// p.mheapLock must be held.
-//
-// Must run on the system stack because p.mheapLock must be held.
-//
-//go:systemstack
func (p *pageAlloc) scavengeReserve() (addrRange, uint32) {
- assertLockHeld(p.mheapLock)
+ lock(&p.scav.lock)
+ gen := p.scav.gen
// Start by reserving the minimum.
r := p.scav.inUse.removeLast(p.scav.reservationBytes)
@@ -509,7 +515,8 @@ func (p *pageAlloc) scavengeReserve() (addrRange, uint32) {
// Return early if the size is zero; we don't want to use
// the bogus address below.
if r.size() == 0 {
- return r, p.scav.gen
+ unlock(&p.scav.lock)
+ return r, gen
}
// The scavenger requires that base be aligned to a
@@ -520,28 +527,26 @@ func (p *pageAlloc) scavengeReserve() (addrRange, uint32) {
// Remove from inUse however much extra we just pulled out.
p.scav.inUse.removeGreaterEqual(newBase)
+ unlock(&p.scav.lock)
+
r.base = offAddr{newBase}
- return r, p.scav.gen
+ return r, gen
}
// scavengeUnreserve returns an unscavenged portion of a range that was
// previously reserved with scavengeReserve.
-//
-// p.mheapLock must be held.
-//
-// Must run on the system stack because p.mheapLock must be held.
-//
-//go:systemstack
func (p *pageAlloc) scavengeUnreserve(r addrRange, gen uint32) {
- assertLockHeld(p.mheapLock)
-
- if r.size() == 0 || gen != p.scav.gen {
+ if r.size() == 0 {
return
}
if r.base.addr()%pallocChunkBytes != 0 {
throw("unreserving unaligned region")
}
- p.scav.inUse.add(r)
+ lock(&p.scav.lock)
+ if gen == p.scav.gen {
+ p.scav.inUse.add(r)
+ }
+ unlock(&p.scav.lock)
}
// scavengeOne walks over address range work until it finds
@@ -555,15 +560,10 @@ func (p *pageAlloc) scavengeUnreserve(r addrRange, gen uint32) {
//
// work's base address must be aligned to pallocChunkBytes.
//
-// p.mheapLock must be held, but may be temporarily released if
-// mayUnlock == true.
-//
-// Must run on the system stack because p.mheapLock must be held.
+// Must run on the systemstack because it acquires p.mheapLock.
//
//go:systemstack
-func (p *pageAlloc) scavengeOne(work addrRange, max uintptr, mayUnlock bool) (uintptr, addrRange) {
- assertLockHeld(p.mheapLock)
-
+func (p *pageAlloc) scavengeOne(work addrRange, max uintptr) (uintptr, addrRange) {
// Defensively check if we've received an empty address range.
// If so, just return.
if work.size() == 0 {
@@ -595,40 +595,12 @@ func (p *pageAlloc) scavengeOne(work addrRange, max uintptr, mayUnlock bool) (ui
minPages = 1
}
- // Helpers for locking and unlocking only if mayUnlock == true.
- lockHeap := func() {
- if mayUnlock {
- lock(p.mheapLock)
- }
- }
- unlockHeap := func() {
- if mayUnlock {
- unlock(p.mheapLock)
- }
- }
-
- // Fast path: check the chunk containing the top-most address in work,
- // starting at that address's page index in the chunk.
- //
- // Note that work.end() is exclusive, so get the chunk we care about
- // by subtracting 1.
- maxAddr := work.limit.addr() - 1
- maxChunk := chunkIndex(maxAddr)
- if p.summary[len(p.summary)-1][maxChunk].max() >= uint(minPages) {
- // We only bother looking for a candidate if there at least
- // minPages free pages at all.
- base, npages := p.chunkOf(maxChunk).findScavengeCandidate(chunkPageIndex(maxAddr), minPages, maxPages)
-
- // If we found something, scavenge it and return!
- if npages != 0 {
- work.limit = offAddr{p.scavengeRangeLocked(maxChunk, base, npages)}
-
- assertLockHeld(p.mheapLock) // Must be locked on return.
- return uintptr(npages) * pageSize, work
- }
+ // Fast path: check the chunk containing the top-most address in work.
+ if r, w := p.scavengeOneFast(work, minPages, maxPages); r != 0 {
+ return r, w
+ } else {
+ work = w
}
- // Update the limit to reflect the fact that we checked maxChunk already.
- work.limit = offAddr{chunkBase(maxChunk)}
// findCandidate finds the next scavenge candidate in work optimistically.
//
@@ -667,37 +639,61 @@ func (p *pageAlloc) scavengeOne(work addrRange, max uintptr, mayUnlock bool) (ui
// looking for any free and unscavenged page. If we think we see something,
// lock and verify it!
for work.size() != 0 {
- unlockHeap()
// Search for the candidate.
candidateChunkIdx, ok := findCandidate(work)
-
- // Lock the heap. We need to do this now if we found a candidate or not.
- // If we did, we'll verify it. If not, we need to lock before returning
- // anyway.
- lockHeap()
-
if !ok {
// We didn't find a candidate, so we're done.
work.limit = work.base
break
}
+ // Lock, so we can verify what we found.
+ lock(p.mheapLock)
+
// Find, verify, and scavenge if we can.
chunk := p.chunkOf(candidateChunkIdx)
base, npages := chunk.findScavengeCandidate(pallocChunkPages-1, minPages, maxPages)
if npages > 0 {
work.limit = offAddr{p.scavengeRangeLocked(candidateChunkIdx, base, npages)}
-
- assertLockHeld(p.mheapLock) // Must be locked on return.
+ unlock(p.mheapLock)
return uintptr(npages) * pageSize, work
}
+ unlock(p.mheapLock)
// We were fooled, so let's continue from where we left off.
work.limit = offAddr{chunkBase(candidateChunkIdx)}
}
+ return 0, work
+}
- assertLockHeld(p.mheapLock) // Must be locked on return.
+// scavengeOneFast is the fast path for scavengeOne, which just checks the top
+// chunk of work for some pages to scavenge.
+//
+// Must run on the system stack because it acquires the heap lock.
+//
+//go:systemstack
+func (p *pageAlloc) scavengeOneFast(work addrRange, minPages, maxPages uintptr) (uintptr, addrRange) {
+ maxAddr := work.limit.addr() - 1
+ maxChunk := chunkIndex(maxAddr)
+
+ lock(p.mheapLock)
+ if p.summary[len(p.summary)-1][maxChunk].max() >= uint(minPages) {
+ // We only bother looking for a candidate if there at least
+ // minPages free pages at all.
+ base, npages := p.chunkOf(maxChunk).findScavengeCandidate(chunkPageIndex(maxAddr), minPages, maxPages)
+
+ // If we found something, scavenge it and return!
+ if npages != 0 {
+ work.limit = offAddr{p.scavengeRangeLocked(maxChunk, base, npages)}
+ unlock(p.mheapLock)
+ return uintptr(npages) * pageSize, work
+ }
+ }
+ unlock(p.mheapLock)
+
+ // Update the limit to reflect the fact that we checked maxChunk already.
+ work.limit = offAddr{chunkBase(maxChunk)}
return 0, work
}
@@ -708,38 +704,57 @@ func (p *pageAlloc) scavengeOne(work addrRange, max uintptr, mayUnlock bool) (ui
//
// Returns the base address of the scavenged region.
//
-// p.mheapLock must be held.
+// p.mheapLock must be held. Unlocks p.mheapLock but reacquires
+// it before returning. Must be run on the systemstack as a result.
+//
+//go:systemstack
func (p *pageAlloc) scavengeRangeLocked(ci chunkIdx, base, npages uint) uintptr {
assertLockHeld(p.mheapLock)
- p.chunkOf(ci).scavenged.setRange(base, npages)
-
// Compute the full address for the start of the range.
addr := chunkBase(ci) + uintptr(base)*pageSize
+ // Mark the range we're about to scavenge as allocated, because
+ // we don't want any allocating goroutines to grab it while
+ // the scavenging is in progress.
+ if scav := p.allocRange(addr, uintptr(npages)); scav != 0 {
+ throw("double scavenge")
+ }
+
+ // With that done, it's safe to unlock.
+ unlock(p.mheapLock)
+
// Update the scavenge low watermark.
+ lock(&p.scav.lock)
if oAddr := (offAddr{addr}); oAddr.lessThan(p.scav.scavLWM) {
p.scav.scavLWM = oAddr
}
+ unlock(&p.scav.lock)
- // Only perform the actual scavenging if we're not in a test.
- // It's dangerous to do so otherwise.
- if p.test {
- return addr
- }
- sysUnused(unsafe.Pointer(addr), uintptr(npages)*pageSize)
+ if !p.test {
+ // Only perform the actual scavenging if we're not in a test.
+ // It's dangerous to do so otherwise.
+ sysUnused(unsafe.Pointer(addr), uintptr(npages)*pageSize)
- // Update global accounting only when not in test, otherwise
- // the runtime's accounting will be wrong.
- nbytes := int64(npages) * pageSize
- atomic.Xadd64(&memstats.heap_released, nbytes)
+ // Update global accounting only when not in test, otherwise
+ // the runtime's accounting will be wrong.
+ nbytes := int64(npages) * pageSize
+ atomic.Xadd64(&memstats.heap_released, nbytes)
- // Update consistent accounting too.
- stats := memstats.heapStats.acquire()
- atomic.Xaddint64(&stats.committed, -nbytes)
- atomic.Xaddint64(&stats.released, nbytes)
- memstats.heapStats.release()
+ // Update consistent accounting too.
+ stats := memstats.heapStats.acquire()
+ atomic.Xaddint64(&stats.committed, -nbytes)
+ atomic.Xaddint64(&stats.released, nbytes)
+ memstats.heapStats.release()
+ }
+
+ // Relock the heap, because now we need to make these pages
+ // available allocation. Free them back to the page allocator.
+ lock(p.mheapLock)
+ p.free(addr, uintptr(npages), true)
+ // Mark the range as scavenged.
+ p.chunkOf(ci).scavenged.setRange(base, npages)
return addr
}