diff options
Diffstat (limited to 'libgo/go/runtime/mgcsweep.go')
-rw-r--r-- | libgo/go/runtime/mgcsweep.go | 433 |
1 files changed, 430 insertions, 3 deletions
diff --git a/libgo/go/runtime/mgcsweep.go b/libgo/go/runtime/mgcsweep.go index 1e959a4ad2d..6877649edbb 100644 --- a/libgo/go/runtime/mgcsweep.go +++ b/libgo/go/runtime/mgcsweep.go @@ -10,7 +10,7 @@ // can free a whole span if none of the objects are marked, but that // isn't its goal. This can be driven either synchronously by // mcentral.cacheSpan for mcentral spans, or asynchronously by -// sweepone from the list of all in-use spans in mheap_.sweepSpans. +// sweepone, which looks at all the mcentral lists. // // * The span reclaimer looks for spans that contain no marked objects // and frees whole spans. This is a separate algorithm because @@ -40,6 +40,80 @@ type sweepdata struct { nbgsweep uint32 npausesweep uint32 + + // centralIndex is the current unswept span class. + // It represents an index into the mcentral span + // sets. Accessed and updated via its load and + // update methods. Not protected by a lock. + // + // Reset at mark termination. + // Used by mheap.nextSpanForSweep. + centralIndex sweepClass +} + +// sweepClass is a spanClass and one bit to represent whether we're currently +// sweeping partial or full spans. +type sweepClass uint32 + +const ( + numSweepClasses = numSpanClasses * 2 + sweepClassDone sweepClass = sweepClass(^uint32(0)) +) + +func (s *sweepClass) load() sweepClass { + return sweepClass(atomic.Load((*uint32)(s))) +} + +func (s *sweepClass) update(sNew sweepClass) { + // Only update *s if its current value is less than sNew, + // since *s increases monotonically. + sOld := s.load() + for sOld < sNew && !atomic.Cas((*uint32)(s), uint32(sOld), uint32(sNew)) { + sOld = s.load() + } + // TODO(mknyszek): This isn't the only place we have + // an atomic monotonically increasing counter. It would + // be nice to have an "atomic max" which is just implemented + // as the above on most architectures. Some architectures + // like RISC-V however have native support for an atomic max. +} + +func (s *sweepClass) clear() { + atomic.Store((*uint32)(s), 0) +} + +// split returns the underlying span class as well as +// whether we're interested in the full or partial +// unswept lists for that class, indicated as a boolean +// (true means "full"). +func (s sweepClass) split() (spc spanClass, full bool) { + return spanClass(s >> 1), s&1 == 0 +} + +// nextSpanForSweep finds and pops the next span for sweeping from the +// central sweep buffers. It returns ownership of the span to the caller. +// Returns nil if no such span exists. +func (h *mheap) nextSpanForSweep() *mspan { + sg := h.sweepgen + for sc := sweep.centralIndex.load(); sc < numSweepClasses; sc++ { + spc, full := sc.split() + c := &h.central[spc].mcentral + var s *mspan + if full { + s = c.fullUnswept(sg).pop() + } else { + s = c.partialUnswept(sg).pop() + } + if s != nil { + // Write down that we found something so future sweepers + // can start from here. + sweep.centralIndex.update(sc) + return s + } + } + // Write down that we found nothing. + sweep.centralIndex.update(sweepClassDone) + return nil } // finishsweep_m ensures that all spans are swept. @@ -58,6 +132,24 @@ func finishsweep_m() { sweep.npausesweep++ } + if go115NewMCentralImpl { + // Reset all the unswept buffers, which should be empty. + // Do this in sweep termination as opposed to mark termination + // so that we can catch unswept spans and reclaim blocks as + // soon as possible. + sg := mheap_.sweepgen + for i := range mheap_.central { + c := &mheap_.central[i].mcentral + c.partialUnswept(sg).reset() + c.fullUnswept(sg).reset() + } + } + + // Sweeping is done, so if the scavenger isn't already awake, + // wake it up. There's definitely work for it to do at this + // point. + wakeScavenger() + nextMarkBitArenaEpoch() } @@ -66,6 +158,7 @@ func bgsweep(c chan int) { sweep.g = getg() + lockInit(&sweep.lock, lockRankSweep) lock(&sweep.lock) sweep.parked = true c <- 1 @@ -111,7 +204,11 @@ func sweepone() uintptr { var s *mspan sg := mheap_.sweepgen for { - s = mheap_.sweepSpans[1-sg/2%2].pop() + if go115NewMCentralImpl { + s = mheap_.nextSpanForSweep() + } else { + s = mheap_.sweepSpans[1-sg/2%2].pop() + } if s == nil { atomic.Store(&mheap_.sweepdone, 1) break @@ -151,6 +248,27 @@ func sweepone() uintptr { // Decrement the number of active sweepers and if this is the // last one print trace information. if atomic.Xadd(&mheap_.sweepers, -1) == 0 && atomic.Load(&mheap_.sweepdone) != 0 { + // Since the sweeper is done, move the scavenge gen forward (signalling + // that there's new work to do) and wake the scavenger. + // + // The scavenger is signaled by the last sweeper because once + // sweeping is done, we will definitely have useful work for + // the scavenger to do, since the scavenger only runs over the + // heap once per GC cyle. This update is not done during sweep + // termination because in some cases there may be a long delay + // between sweep done and sweep termination (e.g. not enough + // allocations to trigger a GC) which would be nice to fill in + // with scavenging work. + systemstack(func() { + lock(&mheap_.lock) + mheap_.pages.scavengeStartGen() + unlock(&mheap_.lock) + }) + // Since we might sweep in an allocation path, it's not possible + // for us to wake the scavenger directly via wakeScavenger, since + // it could allocate. Ask sysmon to do it for us instead. + readyForScavenger() + if debug.gcpacertrace > 0 { print("pacer: sweep done at heap size ", memstats.heap_live>>20, "MB; allocated ", (memstats.heap_live-mheap_.sweepHeapLiveBasis)>>20, "MB during sweep; swept ", mheap_.pagesSwept, " pages at ", sweepRatio, " pages/byte\n") } @@ -206,6 +324,260 @@ func (s *mspan) ensureSwept() { // If preserve=true, don't return it to heap nor relink in mcentral lists; // caller takes care of it. func (s *mspan) sweep(preserve bool) bool { + if !go115NewMCentralImpl { + return s.oldSweep(preserve) + } + // It's critical that we enter this function with preemption disabled, + // GC must not start while we are in the middle of this function. + _g_ := getg() + if _g_.m.locks == 0 && _g_.m.mallocing == 0 && _g_ != _g_.m.g0 { + throw("mspan.sweep: m is not locked") + } + sweepgen := mheap_.sweepgen + if state := s.state.get(); state != mSpanInUse || s.sweepgen != sweepgen-1 { + print("mspan.sweep: state=", state, " sweepgen=", s.sweepgen, " mheap.sweepgen=", sweepgen, "\n") + throw("mspan.sweep: bad span state") + } + + if trace.enabled { + traceGCSweepSpan(s.npages * _PageSize) + } + + atomic.Xadd64(&mheap_.pagesSwept, int64(s.npages)) + + spc := s.spanclass + size := s.elemsize + + c := _g_.m.p.ptr().mcache + + // The allocBits indicate which unmarked objects don't need to be + // processed since they were free at the end of the last GC cycle + // and were not allocated since then. + // If the allocBits index is >= s.freeindex and the bit + // is not marked then the object remains unallocated + // since the last GC. + // This situation is analogous to being on a freelist. + + // Unlink & free special records for any objects we're about to free. + // Two complications here: + // 1. An object can have both finalizer and profile special records. + // In such case we need to queue finalizer for execution, + // mark the object as live and preserve the profile special. + // 2. A tiny object can have several finalizers setup for different offsets. + // If such object is not marked, we need to queue all finalizers at once. + // Both 1 and 2 are possible at the same time. + hadSpecials := s.specials != nil + specialp := &s.specials + special := *specialp + for special != nil { + // A finalizer can be set for an inner byte of an object, find object beginning. + objIndex := uintptr(special.offset) / size + p := s.base() + objIndex*size + mbits := s.markBitsForIndex(objIndex) + if !mbits.isMarked() { + // This object is not marked and has at least one special record. + // Pass 1: see if it has at least one finalizer. + hasFin := false + endOffset := p - s.base() + size + for tmp := special; tmp != nil && uintptr(tmp.offset) < endOffset; tmp = tmp.next { + if tmp.kind == _KindSpecialFinalizer { + // Stop freeing of object if it has a finalizer. + mbits.setMarkedNonAtomic() + hasFin = true + break + } + } + // Pass 2: queue all finalizers _or_ handle profile record. + for special != nil && uintptr(special.offset) < endOffset { + // Find the exact byte for which the special was setup + // (as opposed to object beginning). + p := s.base() + uintptr(special.offset) + if special.kind == _KindSpecialFinalizer || !hasFin { + // Splice out special record. + y := special + special = special.next + *specialp = special + freespecial(y, unsafe.Pointer(p), size) + } else { + // This is profile record, but the object has finalizers (so kept alive). + // Keep special record. + specialp = &special.next + special = *specialp + } + } + } else { + // object is still live: keep special record + specialp = &special.next + special = *specialp + } + } + if hadSpecials && s.specials == nil { + spanHasNoSpecials(s) + } + + if debug.allocfreetrace != 0 || debug.clobberfree != 0 || raceenabled || msanenabled { + // Find all newly freed objects. This doesn't have to + // efficient; allocfreetrace has massive overhead. + mbits := s.markBitsForBase() + abits := s.allocBitsForIndex(0) + for i := uintptr(0); i < s.nelems; i++ { + if !mbits.isMarked() && (abits.index < s.freeindex || abits.isMarked()) { + x := s.base() + i*s.elemsize + if debug.allocfreetrace != 0 { + tracefree(unsafe.Pointer(x), size) + } + if debug.clobberfree != 0 { + clobberfree(unsafe.Pointer(x), size) + } + if raceenabled { + racefree(unsafe.Pointer(x), size) + } + if msanenabled { + msanfree(unsafe.Pointer(x), size) + } + } + mbits.advance() + abits.advance() + } + } + + // Check for zombie objects. + if s.freeindex < s.nelems { + // Everything < freeindex is allocated and hence + // cannot be zombies. + // + // Check the first bitmap byte, where we have to be + // careful with freeindex. + obj := s.freeindex + if (*s.gcmarkBits.bytep(obj / 8)&^*s.allocBits.bytep(obj / 8))>>(obj%8) != 0 { + s.reportZombies() + } + // Check remaining bytes. + for i := obj/8 + 1; i < divRoundUp(s.nelems, 8); i++ { + if *s.gcmarkBits.bytep(i)&^*s.allocBits.bytep(i) != 0 { + s.reportZombies() + } + } + } + + // Count the number of free objects in this span. + nalloc := uint16(s.countAlloc()) + nfreed := s.allocCount - nalloc + if nalloc > s.allocCount { + // The zombie check above should have caught this in + // more detail. + print("runtime: nelems=", s.nelems, " nalloc=", nalloc, " previous allocCount=", s.allocCount, " nfreed=", nfreed, "\n") + throw("sweep increased allocation count") + } + + s.allocCount = nalloc + s.freeindex = 0 // reset allocation index to start of span. + if trace.enabled { + getg().m.p.ptr().traceReclaimed += uintptr(nfreed) * s.elemsize + } + + // gcmarkBits becomes the allocBits. + // get a fresh cleared gcmarkBits in preparation for next GC + s.allocBits = s.gcmarkBits + s.gcmarkBits = newMarkBits(s.nelems) + + // Initialize alloc bits cache. + s.refillAllocCache(0) + + // The span must be in our exclusive ownership until we update sweepgen, + // check for potential races. + if state := s.state.get(); state != mSpanInUse || s.sweepgen != sweepgen-1 { + print("mspan.sweep: state=", state, " sweepgen=", s.sweepgen, " mheap.sweepgen=", sweepgen, "\n") + throw("mspan.sweep: bad span state after sweep") + } + if s.sweepgen == sweepgen+1 || s.sweepgen == sweepgen+3 { + throw("swept cached span") + } + + // We need to set s.sweepgen = h.sweepgen only when all blocks are swept, + // because of the potential for a concurrent free/SetFinalizer. + // + // But we need to set it before we make the span available for allocation + // (return it to heap or mcentral), because allocation code assumes that a + // span is already swept if available for allocation. + // + // Serialization point. + // At this point the mark bits are cleared and allocation ready + // to go so release the span. + atomic.Store(&s.sweepgen, sweepgen) + + if spc.sizeclass() != 0 { + // Handle spans for small objects. + if nfreed > 0 { + // Only mark the span as needing zeroing if we've freed any + // objects, because a fresh span that had been allocated into, + // wasn't totally filled, but then swept, still has all of its + // free slots zeroed. + s.needzero = 1 + c.local_nsmallfree[spc.sizeclass()] += uintptr(nfreed) + } + if !preserve { + // The caller may not have removed this span from whatever + // unswept set its on but taken ownership of the span for + // sweeping by updating sweepgen. If this span still is in + // an unswept set, then the mcentral will pop it off the + // set, check its sweepgen, and ignore it. + if nalloc == 0 { + // Free totally free span directly back to the heap. + mheap_.freeSpan(s) + return true + } + // Return span back to the right mcentral list. + if uintptr(nalloc) == s.nelems { + mheap_.central[spc].mcentral.fullSwept(sweepgen).push(s) + } else { + mheap_.central[spc].mcentral.partialSwept(sweepgen).push(s) + } + } + } else if !preserve { + // Handle spans for large objects. + if nfreed != 0 { + // Free large object span to heap. + + // NOTE(rsc,dvyukov): The original implementation of efence + // in CL 22060046 used sysFree instead of sysFault, so that + // the operating system would eventually give the memory + // back to us again, so that an efence program could run + // longer without running out of memory. Unfortunately, + // calling sysFree here without any kind of adjustment of the + // heap data structures means that when the memory does + // come back to us, we have the wrong metadata for it, either in + // the mspan structures or in the garbage collection bitmap. + // Using sysFault here means that the program will run out of + // memory fairly quickly in efence mode, but at least it won't + // have mysterious crashes due to confused memory reuse. + // It should be possible to switch back to sysFree if we also + // implement and then call some kind of mheap.deleteSpan. + if debug.efence > 0 { + s.limit = 0 // prevent mlookup from finding this span + sysFault(unsafe.Pointer(s.base()), size) + } else { + mheap_.freeSpan(s) + } + c.local_nlargefree++ + c.local_largefree += size + return true + } + + // Add a large span directly onto the full+swept list. + mheap_.central[spc].mcentral.fullSwept(sweepgen).push(s) + } + return false +} + +// Sweep frees or collects finalizers for blocks not marked in the mark phase. +// It clears the mark bits in preparation for the next GC round. +// Returns true if the span was returned to heap. +// If preserve=true, don't return it to heap nor relink in mcentral lists; +// caller takes care of it. +// +// For !go115NewMCentralImpl. +func (s *mspan) oldSweep(preserve bool) bool { // It's critical that we enter this function with preemption disabled, // GC must not start while we are in the middle of this function. _g_ := getg() @@ -228,7 +600,7 @@ func (s *mspan) sweep(preserve bool) bool { size := s.elemsize res := false - c := _g_.m.mcache + c := _g_.m.p.ptr().mcache freeToHeap := false // The allocBits indicate which unmarked objects don't need to be @@ -247,6 +619,7 @@ func (s *mspan) sweep(preserve bool) bool { // 2. A tiny object can have several finalizers setup for different offsets. // If such object is not marked, we need to queue all finalizers at once. // Both 1 and 2 are possible at the same time. + hadSpecials := s.specials != nil specialp := &s.specials special := *specialp for special != nil { @@ -291,6 +664,9 @@ func (s *mspan) sweep(preserve bool) bool { special = *specialp } } + if go115NewMarkrootSpans && hadSpecials && s.specials == nil { + spanHasNoSpecials(s) + } if debug.allocfreetrace != 0 || debug.clobberfree != 0 || raceenabled || msanenabled { // Find all newly freed objects. This doesn't have to @@ -402,6 +778,57 @@ func (s *mspan) sweep(preserve bool) bool { return res } +// reportZombies reports any marked but free objects in s and throws. +// +// This generally means one of the following: +// +// 1. User code converted a pointer to a uintptr and then back +// unsafely, and a GC ran while the uintptr was the only reference to +// an object. +// +// 2. User code (or a compiler bug) constructed a bad pointer that +// points to a free slot, often a past-the-end pointer. +// +// 3. The GC two cycles ago missed a pointer and freed a live object, +// but it was still live in the last cycle, so this GC cycle found a +// pointer to that object and marked it. +func (s *mspan) reportZombies() { + printlock() + print("runtime: marked free object in span ", s, ", elemsize=", s.elemsize, " freeindex=", s.freeindex, " (bad use of unsafe.Pointer? try -d=checkptr)\n") + mbits := s.markBitsForBase() + abits := s.allocBitsForIndex(0) + for i := uintptr(0); i < s.nelems; i++ { + addr := s.base() + i*s.elemsize + print(hex(addr)) + alloc := i < s.freeindex || abits.isMarked() + if alloc { + print(" alloc") + } else { + print(" free ") + } + if mbits.isMarked() { + print(" marked ") + } else { + print(" unmarked") + } + zombie := mbits.isMarked() && !alloc + if zombie { + print(" zombie") + } + print("\n") + if zombie { + length := s.elemsize + if length > 1024 { + length = 1024 + } + hexdumpWords(addr, addr+length, nil) + } + mbits.advance() + abits.advance() + } + throw("found pointer to free object") +} + // deductSweepCredit deducts sweep credit for allocating a span of // size spanBytes. This must be performed *before* the span is // allocated to ensure the system has enough credit. If necessary, it |