From 77bed1748432cd332d132ab64e40fe6ad743d996 Mon Sep 17 00:00:00 2001 From: Kostya Kortchinsky Date: Mon, 20 May 2019 14:40:04 +0000 Subject: [scudo][standalone] Introduce the Primary(s) and LocalCache Summary: This CL introduces the 32 & 64-bit primary allocators, and associated Local Cache. While the general idea is mostly similar to what exists in sanitizer_common, it departs from the original code somewhat significantly: - the 64-bit primary no longer uses a free array at the end of a region but uses batches of free blocks in region 0, allowing for a convergence with the 32-bit primary behavior; - as a result, there is only one (templated) local cache type for both primary allocators, and memory reclaiming can be implemented similarly for the 32-bit & 64-bit platforms; - 64-bit primary regions are handled a bit differently: we do not reserve 4TB of memory that we split, but reserve `NumClasses * 2^RegionSizeLog`, each region being offseted by a random number of pages from its computed base. A side effect of this is that the 64-bit primary works on 32-bit platform (I don't think we want to encourage it but it's an interesting side effect); Reviewers: vitalybuka, eugenis, morehouse, hctim Reviewed By: morehouse Subscribers: srhines, mgorny, delcypher, jfb, #sanitizers, llvm-commits Tags: #llvm, #sanitizers Differential Revision: https://reviews.llvm.org/D61745 git-svn-id: https://llvm.org/svn/llvm-project/compiler-rt/trunk@361159 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/scudo/standalone/primary32.h | 388 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 388 insertions(+) create mode 100644 lib/scudo/standalone/primary32.h (limited to 'lib/scudo/standalone/primary32.h') diff --git a/lib/scudo/standalone/primary32.h b/lib/scudo/standalone/primary32.h new file mode 100644 index 000000000..066e63749 --- /dev/null +++ b/lib/scudo/standalone/primary32.h @@ -0,0 +1,388 @@ +//===-- primary32.h ---------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef SCUDO_PRIMARY32_H_ +#define SCUDO_PRIMARY32_H_ + +#include "bytemap.h" +#include "common.h" +#include "list.h" +#include "local_cache.h" +#include "release.h" +#include "report.h" +#include "stats.h" +#include "string_utils.h" + +namespace scudo { + +// SizeClassAllocator32 is an allocator for 32 or 64-bit address space. +// +// It maps Regions of 2^RegionSizeLog bytes aligned on a 2^RegionSizeLog bytes +// boundary, and keeps a bytemap of the mappable address space to track the size +// class they are associated with. +// +// Mapped regions are split into equally sized Blocks according to the size +// class they belong to, and the associated pointers are shuffled to prevent any +// predictable address pattern (the predictability increases with the block +// size). +// +// Regions for size class 0 are special and used to hold TransferBatches, which +// allow to transfer arrays of pointers from the global size class freelist to +// the thread specific freelist for said class, and back. +// +// Memory used by this allocator is never unmapped but can be partially +// reclaimed if the platform allows for it. + +template class SizeClassAllocator32 { +public: + typedef SizeClassMapT SizeClassMap; + // Regions should be large enough to hold the largest Block. + COMPILER_CHECK((1UL << RegionSizeLog) >= SizeClassMap::MaxSize); + typedef SizeClassAllocator32 ThisT; + typedef SizeClassAllocatorLocalCache CacheT; + typedef typename CacheT::TransferBatch TransferBatch; + + static uptr getSizeByClassId(uptr ClassId) { + return (ClassId == SizeClassMap::BatchClassId) + ? sizeof(TransferBatch) + : SizeClassMap::getSizeByClassId(ClassId); + } + + static bool canAllocate(uptr Size) { return Size <= SizeClassMap::MaxSize; } + + void initLinkerInitialized(s32 ReleaseToOsInterval) { + if (SCUDO_FUCHSIA) + reportError("SizeClassAllocator32 is not supported on Fuchsia"); + + PossibleRegions.initLinkerInitialized(); + MinRegionIndex = NumRegions; // MaxRegionIndex is already initialized to 0. + + u32 Seed; + if (UNLIKELY(!getRandom(reinterpret_cast(&Seed), sizeof(Seed)))) + Seed = + static_cast(getMonotonicTime() ^ + (reinterpret_cast(SizeClassInfoArray) >> 6)); + const uptr PageSize = getPageSizeCached(); + for (uptr I = 0; I < NumClasses; I++) { + SizeClassInfo *Sci = getSizeClassInfo(I); + Sci->RandState = getRandomU32(&Seed); + // See comment in the 64-bit primary about releasing smaller size classes. + Sci->CanRelease = (ReleaseToOsInterval > 0) && + (I != SizeClassMap::BatchClassId) && + (getSizeByClassId(I) >= (PageSize / 32)); + } + ReleaseToOsIntervalMs = ReleaseToOsInterval; + } + void init(s32 ReleaseToOsInterval) { + memset(this, 0, sizeof(*this)); + initLinkerInitialized(ReleaseToOsInterval); + } + + TransferBatch *popBatch(CacheT *C, uptr ClassId) { + DCHECK_LT(ClassId, NumClasses); + SizeClassInfo *Sci = getSizeClassInfo(ClassId); + BlockingMutexLock L(&Sci->Mutex); + TransferBatch *B = Sci->FreeList.front(); + if (B) + Sci->FreeList.pop_front(); + else { + B = populateFreeList(C, ClassId, Sci); + if (UNLIKELY(!B)) + return nullptr; + } + DCHECK_GT(B->getCount(), 0); + Sci->Stats.PoppedBlocks += B->getCount(); + return B; + } + + void pushBatch(uptr ClassId, TransferBatch *B) { + DCHECK_LT(ClassId, NumClasses); + DCHECK_GT(B->getCount(), 0); + SizeClassInfo *Sci = getSizeClassInfo(ClassId); + BlockingMutexLock L(&Sci->Mutex); + Sci->FreeList.push_front(B); + Sci->Stats.PushedBlocks += B->getCount(); + if (Sci->CanRelease) + releaseToOSMaybe(Sci, ClassId); + } + + void disable() { + for (uptr I = 0; I < NumClasses; I++) + getSizeClassInfo(I)->Mutex.lock(); + } + + void enable() { + for (sptr I = static_cast(NumClasses) - 1; I >= 0; I--) + getSizeClassInfo(I)->Mutex.unlock(); + } + + template void iterateOverBlocks(F Callback) { + for (uptr I = MinRegionIndex; I <= MaxRegionIndex; I++) + if (PossibleRegions[I]) { + const uptr BlockSize = getSizeByClassId(PossibleRegions[I]); + const uptr From = I * RegionSize; + const uptr To = From + (RegionSize / BlockSize) * BlockSize; + for (uptr Block = From; Block < To; Block += BlockSize) + Callback(Block); + } + } + + void printStats() { + // TODO(kostyak): get the RSS per region. + uptr TotalMapped = 0; + uptr PoppedBlocks = 0; + uptr PushedBlocks = 0; + for (uptr I = 0; I < NumClasses; I++) { + SizeClassInfo *Sci = getSizeClassInfo(I); + TotalMapped += Sci->AllocatedUser; + PoppedBlocks += Sci->Stats.PoppedBlocks; + PushedBlocks += Sci->Stats.PushedBlocks; + } + Printf("Stats: SizeClassAllocator32: %zuM mapped in %zu allocations; " + "remains %zu\n", + TotalMapped >> 20, PoppedBlocks, PoppedBlocks - PushedBlocks); + for (uptr I = 0; I < NumClasses; I++) + printStats(I, 0); + } + + void releaseToOS() { + for (uptr I = 1; I < NumClasses; I++) { + SizeClassInfo *Sci = getSizeClassInfo(I); + BlockingMutexLock L(&Sci->Mutex); + releaseToOSMaybe(Sci, I, /*Force=*/true); + } + } + +private: + static const uptr NumClasses = SizeClassMap::NumClasses; + static const uptr RegionSize = 1UL << RegionSizeLog; + static const uptr NumRegions = SCUDO_MMAP_RANGE_SIZE >> RegionSizeLog; +#if SCUDO_WORDSIZE == 32U + typedef FlatByteMap ByteMap; +#else + typedef TwoLevelByteMap<(NumRegions >> 12), 1UL << 12> ByteMap; +#endif + + struct SizeClassStats { + uptr PoppedBlocks; + uptr PushedBlocks; + }; + + struct ReleaseToOsInfo { + uptr PushedBlocksAtLastRelease; + uptr RangesReleased; + uptr LastReleasedBytes; + u64 LastReleaseAtNs; + }; + + struct ALIGNED(SCUDO_CACHE_LINE_SIZE) SizeClassInfo { + BlockingMutex Mutex; + IntrusiveList FreeList; + SizeClassStats Stats; + bool CanRelease; + u32 RandState; + uptr AllocatedUser; + ReleaseToOsInfo ReleaseInfo; + }; + COMPILER_CHECK(sizeof(SizeClassInfo) % SCUDO_CACHE_LINE_SIZE == 0); + + uptr computeRegionId(uptr Mem) { + const uptr Id = Mem >> RegionSizeLog; + CHECK_LT(Id, NumRegions); + return Id; + } + + uptr allocateRegionSlow() { + uptr MapSize = 2 * RegionSize; + const uptr MapBase = reinterpret_cast( + map(nullptr, MapSize, "scudo:primary", MAP_ALLOWNOMEM)); + if (UNLIKELY(!MapBase)) + return 0; + const uptr MapEnd = MapBase + MapSize; + uptr Region = MapBase; + if (isAligned(Region, RegionSize)) { + SpinMutexLock L(&RegionsStashMutex); + if (NumberOfStashedRegions < MaxStashedRegions) + RegionsStash[NumberOfStashedRegions++] = MapBase + RegionSize; + else + MapSize = RegionSize; + } else { + Region = roundUpTo(MapBase, RegionSize); + unmap(reinterpret_cast(MapBase), Region - MapBase); + MapSize = RegionSize; + } + const uptr End = Region + MapSize; + if (End != MapEnd) + unmap(reinterpret_cast(End), MapEnd - End); + return Region; + } + + uptr allocateRegion(uptr ClassId) { + DCHECK_LT(ClassId, NumClasses); + uptr Region = 0; + { + SpinMutexLock L(&RegionsStashMutex); + if (NumberOfStashedRegions > 0) + Region = RegionsStash[--NumberOfStashedRegions]; + } + if (!Region) + Region = allocateRegionSlow(); + if (LIKELY(Region)) { + if (ClassId) { + const uptr RegionIndex = computeRegionId(Region); + if (RegionIndex < MinRegionIndex) + MinRegionIndex = RegionIndex; + if (RegionIndex > MaxRegionIndex) + MaxRegionIndex = RegionIndex; + PossibleRegions.set(RegionIndex, static_cast(ClassId)); + } + } + return Region; + } + + SizeClassInfo *getSizeClassInfo(uptr ClassId) { + DCHECK_LT(ClassId, NumClasses); + return &SizeClassInfoArray[ClassId]; + } + + bool populateBatches(CacheT *C, SizeClassInfo *Sci, uptr ClassId, + TransferBatch **CurrentBatch, u32 MaxCount, + void **PointersArray, u32 Count) { + if (ClassId != SizeClassMap::BatchClassId) + shuffle(PointersArray, Count, &Sci->RandState); + TransferBatch *B = *CurrentBatch; + for (uptr I = 0; I < Count; I++) { + if (B && B->getCount() == MaxCount) { + Sci->FreeList.push_back(B); + B = nullptr; + } + if (!B) { + B = C->createBatch(ClassId, PointersArray[I]); + if (UNLIKELY(!B)) + return false; + B->clear(); + } + B->add(PointersArray[I]); + } + *CurrentBatch = B; + return true; + } + + NOINLINE TransferBatch *populateFreeList(CacheT *C, uptr ClassId, + SizeClassInfo *Sci) { + const uptr Region = allocateRegion(ClassId); + if (UNLIKELY(!Region)) + return nullptr; + C->getStats().add(StatMapped, RegionSize); + const uptr Size = getSizeByClassId(ClassId); + const u32 MaxCount = TransferBatch::MaxCached(Size); + DCHECK_GT(MaxCount, 0); + const uptr NumberOfBlocks = RegionSize / Size; + DCHECK_GT(NumberOfBlocks, 0); + TransferBatch *B = nullptr; + constexpr uptr ShuffleArraySize = 48; + void *ShuffleArray[ShuffleArraySize]; + u32 Count = 0; + const uptr AllocatedUser = NumberOfBlocks * Size; + for (uptr I = Region; I < Region + AllocatedUser; I += Size) { + ShuffleArray[Count++] = reinterpret_cast(I); + if (Count == ShuffleArraySize) { + if (UNLIKELY(!populateBatches(C, Sci, ClassId, &B, MaxCount, + ShuffleArray, Count))) + return nullptr; + Count = 0; + } + } + if (Count) { + if (UNLIKELY(!populateBatches(C, Sci, ClassId, &B, MaxCount, ShuffleArray, + Count))) + return nullptr; + } + DCHECK(B); + DCHECK_GT(B->getCount(), 0); + Sci->AllocatedUser += AllocatedUser; + if (Sci->CanRelease) + Sci->ReleaseInfo.LastReleaseAtNs = getMonotonicTime(); + return B; + } + + void printStats(uptr ClassId, uptr Rss) { + SizeClassInfo *Sci = getSizeClassInfo(ClassId); + if (Sci->AllocatedUser == 0) + return; + const uptr InUse = Sci->Stats.PoppedBlocks - Sci->Stats.PushedBlocks; + const uptr AvailableChunks = Sci->AllocatedUser / getSizeByClassId(ClassId); + Printf(" %02zu (%6zu): mapped: %6zuK popped: %7zu pushed: %7zu inuse: %6zu" + " avail: %6zu rss: %6zuK\n", + ClassId, getSizeByClassId(ClassId), Sci->AllocatedUser >> 10, + Sci->Stats.PoppedBlocks, Sci->Stats.PushedBlocks, InUse, + AvailableChunks, Rss >> 10); + } + + NOINLINE void releaseToOSMaybe(SizeClassInfo *Sci, uptr ClassId, + bool Force = false) { + const uptr BlockSize = getSizeByClassId(ClassId); + const uptr PageSize = getPageSizeCached(); + + CHECK_GE(Sci->Stats.PoppedBlocks, Sci->Stats.PushedBlocks); + const uptr N = Sci->Stats.PoppedBlocks - Sci->Stats.PushedBlocks; + if (N * BlockSize < PageSize) + return; // No chance to release anything. + if ((Sci->Stats.PushedBlocks - Sci->ReleaseInfo.PushedBlocksAtLastRelease) * + BlockSize < + PageSize) { + return; // Nothing new to release. + } + + if (!Force) { + const s32 IntervalMs = ReleaseToOsIntervalMs; + if (IntervalMs < 0) + return; + if (Sci->ReleaseInfo.LastReleaseAtNs + IntervalMs * 1000000ULL > + getMonotonicTime()) { + return; // Memory was returned recently. + } + } + + // TODO(kostyak): currently not ideal as we loop over all regions and + // iterate multiple times over the same freelist if a ClassId spans multiple + // regions. But it will have to do for now. + for (uptr I = MinRegionIndex; I <= MaxRegionIndex; I++) { + if (PossibleRegions[I] == ClassId) { + ReleaseRecorder Recorder(I * RegionSize); + releaseFreeMemoryToOS(&Sci->FreeList, I * RegionSize, + RegionSize / PageSize, BlockSize, &Recorder); + if (Recorder.getReleasedRangesCount() > 0) { + Sci->ReleaseInfo.PushedBlocksAtLastRelease = Sci->Stats.PushedBlocks; + Sci->ReleaseInfo.RangesReleased += Recorder.getReleasedRangesCount(); + Sci->ReleaseInfo.LastReleasedBytes = Recorder.getReleasedBytes(); + } + } + } + Sci->ReleaseInfo.LastReleaseAtNs = getMonotonicTime(); + } + + SizeClassInfo SizeClassInfoArray[NumClasses]; + + ByteMap PossibleRegions; + // Keep track of the lowest & highest regions allocated to avoid looping + // through the whole NumRegions. + uptr MinRegionIndex; + uptr MaxRegionIndex; + s32 ReleaseToOsIntervalMs; + // Unless several threads request regions simultaneously from different size + // classes, the stash rarely contains more than 1 entry. + static constexpr uptr MaxStashedRegions = 4; + StaticSpinMutex RegionsStashMutex; + uptr NumberOfStashedRegions; + uptr RegionsStash[MaxStashedRegions]; +}; + +} // namespace scudo + +#endif // SCUDO_PRIMARY32_H_ -- cgit v1.2.1