diff options
author | csilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> | 2011-05-19 21:37:12 +0000 |
---|---|---|
committer | csilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> | 2011-05-19 21:37:12 +0000 |
commit | 8c7d2289d24f7a49f1f6f60d4a6eaee06fa04c60 (patch) | |
tree | 204112b581d072707fe939f77a9b93a520e1028b /src | |
parent | 1d30e525ae6ac38ae381bb3118f7f47998af2942 (diff) | |
download | gperftools-8c7d2289d24f7a49f1f6f60d4a6eaee06fa04c60.tar.gz |
* Fix typos in comment in profiler.h (nrhodes)
* #include fixes (jyrki)
* Add missing stddef.h for ptrdiff_t (mec)
* Add M{,un}mapReplacement hooks into MallocHook (ribrdb)
* Force big alloc in frag test (ruemmler)
* PERF: Increase the size class cache to 64K entries (ruemmler)
* PERF: Increase the transfer cache by 16x (ruemmler)
* Use windows intrinsic to get the tsc (csilvers)
* Rename atomicops-internals-x86-msvc.h->windows.h (csilvers)
* Remove flaky DEATH test in malloc_hook_test (ppluzhnikov)
* Expose internal ReadStackTraces()/etc (lantran)
* Refactored system allocator logic (gangren)
* Include-what-you-use: cleanup tcmalloc #includes (csilvers)
* Don't set kAddressBits to 48 on 32-bit systems (csilvers)
* Add declaration for __rdtsc() for windows (koda)
* Don't revert to system alloc for expected errors (gangren)
* Add TCMALLOC_SMALL_BUT_SLOW support (ruemmler)
* Clarify that tcmalloc stats are MiB (robinson)
* Avoid setting cpuinfo_cycles_per_second to 0 (koda)
* Fix frag_unittest memory calculations (ruemmler)
* Remove support for non-tcmalloc debugallocation (blount)
* Add malloc_hook_test (llib)
* Change the objcopy -W test to be cross-friendly (mcgrathr)
* Export __tcmalloc in addition to _tcmalloc, for 86_64 (csilvers)
git-svn-id: http://gperftools.googlecode.com/svn/trunk@109 6b5cf1ce-ec42-a296-1ba9-69fdba395a50
Diffstat (limited to 'src')
52 files changed, 1148 insertions, 587 deletions
diff --git a/src/base/atomicops-internals-x86-msvc.h b/src/base/atomicops-internals-windows.h index d50894c..58782a1 100644 --- a/src/base/atomicops-internals-x86-msvc.h +++ b/src/base/atomicops-internals-windows.h @@ -31,12 +31,12 @@ * Author: Sanjay Ghemawat */ -// Implementation of atomic operations for x86. This file should not -// be included directly. Clients should instead include -// "base/atomicops.h". +// Implementation of atomic operations using Windows API +// functions. This file should not be included directly. Clients +// should instead include "base/atomicops.h". -#ifndef BASE_ATOMICOPS_INTERNALS_X86_MSVC_H_ -#define BASE_ATOMICOPS_INTERNALS_X86_MSVC_H_ +#ifndef BASE_ATOMICOPS_INTERNALS_WINDOWS_H_ +#define BASE_ATOMICOPS_INTERNALS_WINDOWS_H_ #include <stdio.h> #include <stdlib.h> @@ -257,8 +257,8 @@ inline Atomic64 Release_Load(volatile const Atomic64* ptr) { // 64-bit low-level operations on 32-bit platform -// TBD(vchen): The GNU assembly below must be converted to MSVC inline -// assembly. +// TODO(vchen): The GNU assembly below must be converted to MSVC inline +// assembly. Then the file should be renamed to ...-x86-mscv.h, probably. inline void NotImplementedFatalError(const char *function_name) { fprintf(stderr, "64-bit %s() not implemented on this platform\n", @@ -411,4 +411,4 @@ inline Atomic64 Release_CompareAndSwap(volatile Atomic64* ptr, } // namespace base::subtle } // namespace base -#endif // BASE_ATOMICOPS_INTERNALS_X86_MSVC_H_ +#endif // BASE_ATOMICOPS_INTERNALS_WINDOWS_H_ diff --git a/src/base/atomicops.h b/src/base/atomicops.h index f683766..f36df5f 100644 --- a/src/base/atomicops.h +++ b/src/base/atomicops.h @@ -95,11 +95,9 @@ #include "base/atomicops-internals-arm-v6plus.h" #elif defined(ARMV3) #include "base/atomicops-internals-arm-generic.h" -#elif defined(_MSC_VER) && defined(_M_IX86) -#include "base/atomicops-internals-x86-msvc.h" -#elif defined(__MINGW32__) && defined(__i386__) -#include "base/atomicops-internals-x86-msvc.h" -#elif defined(__GNUC__) && (defined(__i386) || defined(ARCH_K8)) +#elif defined(_WIN32) +#include "base/atomicops-internals-x86-windows.h" +#elif defined(__GNUC__) && (defined(__i386) || defined(__x86_64__)) #include "base/atomicops-internals-x86.h" #elif defined(__linux__) && defined(__PPC__) #include "base/atomicops-internals-linuxppc.h" diff --git a/src/base/cycleclock.h b/src/base/cycleclock.h index a5ce138..11f9c9b 100644 --- a/src/base/cycleclock.h +++ b/src/base/cycleclock.h @@ -50,6 +50,15 @@ #if defined(__MACH__) && defined(__APPLE__) # include <mach/mach_time.h> #endif +// For MSVC, we want the __rdtsc intrinsic, declared in <intrin.h>. +// Unfortunately, in some environments, <windows.h> and <intrin.h> have +// conflicting declarations of some other intrinsics, breaking compilation. +// Therefore, we simply declare __rdtsc ourselves. See also +// http://connect.microsoft.com/VisualStudio/feedback/details/262047 +#if defined(_MSC_VER) +extern "C" uint64 __rdtsc(); +#pragma intrinsic(__rdtsc) +#endif #include <sys/time.h> // NOTE: only i386 and x86_64 have been well tested. @@ -97,8 +106,8 @@ struct CycleClock { int64 itc; asm("mov %0 = ar.itc" : "=r" (itc)); return itc; -#elif defined(_MSC_VER) && defined(_M_IX86) - _asm rdtsc +#elif defined(_MSC_VER) + return __rdtsc(); #elif defined(ARMV3) #if defined(ARMV6) // V6 is the earliest arch that has a standard cyclecount uint32 pmccntr; diff --git a/src/base/sysinfo.cc b/src/base/sysinfo.cc index 3e97ac9..5396743 100644 --- a/src/base/sysinfo.cc +++ b/src/base/sysinfo.cc @@ -204,7 +204,7 @@ bool GetUniquePathFromEnv(const char* env_name, char* path) { static double cpuinfo_cycles_per_second = 1.0; // 0.0 might be dangerous static int cpuinfo_num_cpus = 1; // Conservative guess -static void SleepForMilliseconds(int milliseconds) { +void SleepForMilliseconds(int milliseconds) { #ifdef PLATFORM_WINDOWS _sleep(milliseconds); // Windows's _sleep takes milliseconds argument #else @@ -318,6 +318,7 @@ static void InitializeSystemInfo() { } double bogo_clock = 1.0; + bool saw_bogo = false; int num_cpus = 0; line[0] = line[1] = '\0'; int chars_read = 0; @@ -341,19 +342,23 @@ static void InitializeSystemInfo() { if (newline != NULL) *newline = '\0'; + // When parsing the "cpu MHz" and "bogomips" (fallback) entries, we only + // accept postive values. Some environments (virtual machines) report zero, + // which would cause infinite looping in WallTime_Init. if (!saw_mhz && strncasecmp(line, "cpu MHz", sizeof("cpu MHz")-1) == 0) { const char* freqstr = strchr(line, ':'); if (freqstr) { cpuinfo_cycles_per_second = strtod(freqstr+1, &err) * 1000000.0; - if (freqstr[1] != '\0' && *err == '\0') + if (freqstr[1] != '\0' && *err == '\0' && cpuinfo_cycles_per_second > 0) saw_mhz = true; } } else if (strncasecmp(line, "bogomips", sizeof("bogomips")-1) == 0) { const char* freqstr = strchr(line, ':'); - if (freqstr) + if (freqstr) { bogo_clock = strtod(freqstr+1, &err) * 1000000.0; - if (freqstr == NULL || freqstr[1] == '\0' || *err != '\0') - bogo_clock = 1.0; + if (freqstr[1] != '\0' && *err == '\0' && bogo_clock > 0) + saw_bogo = true; + } } else if (strncasecmp(line, "processor", sizeof("processor")-1) == 0) { num_cpus++; // count up every time we see an "processor :" entry } @@ -361,9 +366,14 @@ static void InitializeSystemInfo() { close(fd); if (!saw_mhz) { - // If we didn't find anything better, we'll use bogomips, but - // we're not happy about it. - cpuinfo_cycles_per_second = bogo_clock; + if (saw_bogo) { + // If we didn't find anything better, we'll use bogomips, but + // we're not happy about it. + cpuinfo_cycles_per_second = bogo_clock; + } else { + // If we don't even have bogomips, we'll use the slow estimation. + cpuinfo_cycles_per_second = EstimateCyclesPerSecond(1000); + } } if (cpuinfo_cycles_per_second == 0.0) { cpuinfo_cycles_per_second = 1.0; // maybe unnecessary, but safe diff --git a/src/base/sysinfo.h b/src/base/sysinfo.h index 8bae5e3..7935855 100644 --- a/src/base/sysinfo.h +++ b/src/base/sysinfo.h @@ -71,6 +71,8 @@ extern bool GetUniquePathFromEnv(const char* env_name, char* path); extern int NumCPUs(); +void SleepForMilliseconds(int milliseconds); + // processor cycles per second of each processor. Thread-safe. extern double CyclesPerSecond(void); diff --git a/src/base/vdso_support.h b/src/base/vdso_support.h index 108a1fe..131646a 100644 --- a/src/base/vdso_support.h +++ b/src/base/vdso_support.h @@ -30,6 +30,7 @@ #ifdef HAVE_FEATURES_H #include <features.h> // for __GLIBC__ #endif +#include "base/basictypes.h" // Maybe one day we can rewrite this file not to require the elf // symbol extensions in glibc, but for right now we need them. @@ -39,7 +40,6 @@ #include <stdlib.h> // for NULL #include <link.h> // for ElfW -#include "base/basictypes.h" namespace base { diff --git a/src/central_freelist.cc b/src/central_freelist.cc index 7fe8c4f..6b3be06 100644 --- a/src/central_freelist.cc +++ b/src/central_freelist.cc @@ -33,8 +33,10 @@ #include "config.h" #include "central_freelist.h" -#include "linked_list.h" -#include "static_vars.h" +#include "internal_logging.h" // for ASSERT, MESSAGE +#include "linked_list.h" // for SLL_Next, SLL_Push, etc +#include "page_heap.h" // for PageHeap +#include "static_vars.h" // for Static namespace tcmalloc { @@ -44,7 +46,12 @@ void CentralFreeList::Init(size_t cl) { tcmalloc::DLL_Init(&nonempty_); counter_ = 0; - cache_size_ = 1; +#ifdef TCMALLOC_SMALL_BUT_SLOW + // Disable the transfer cache for the small footprint case. + cache_size_ = 0; +#else + cache_size_ = 16; +#endif used_slots_ = 0; ASSERT(cache_size_ <= kNumTransferEntries); } @@ -142,8 +149,14 @@ bool CentralFreeList::MakeCacheSpace() { if (EvictRandomSizeClass(size_class_, false) || EvictRandomSizeClass(size_class_, true)) { // Succeeded in evicting, we're going to make our cache larger. - cache_size_++; - return true; + // However, we may have dropped and re-acquired the lock in + // EvictRandomSizeClass (via ShrinkCache and the LockInverter), so the + // cache_size may have changed. Therefore, check and verify that it is + // still OK to increase the cache_size. + if (cache_size_ < kNumTransferEntries) { + cache_size_++; + return true; + } } return false; } @@ -323,20 +336,4 @@ int CentralFreeList::tc_length() { return used_slots_ * Static::sizemap()->num_objects_to_move(size_class_); } -size_t CentralFreeList::OverheadBytes() { - SpinLockHolder h(&lock_); - size_t overhead = 0; - for (const Span* s = empty_.next; s != &empty_; s = s->next) { - ASSERT(size_class_ == s->sizeclass); - ASSERT(size_class_ != 0); - overhead += (s->length * kPageSize) % size_class_; - } - for (const Span* s = nonempty_.next; s != &nonempty_; s = s->next) { - ASSERT(size_class_ == s->sizeclass); - ASSERT(size_class_ != 0); - overhead += (s->length * kPageSize) % size_class_; - } - return overhead; -} - } // namespace tcmalloc diff --git a/src/central_freelist.h b/src/central_freelist.h index a520f68..69a09af 100644 --- a/src/central_freelist.h +++ b/src/central_freelist.h @@ -34,8 +34,10 @@ #define TCMALLOC_CENTRAL_FREELIST_H_ #include "config.h" -#include "base/thread_annotations.h" +#include <stddef.h> // for size_t +#include <stdint.h> // for int32_t #include "base/spinlock.h" +#include "base/thread_annotations.h" #include "common.h" #include "span.h" @@ -64,12 +66,6 @@ class CentralFreeList { // Returns the number of free objects in the transfer cache. int tc_length(); - // Returns the memory overhead (internal fragmentation) attributable - // to the freelist. This is memory lost when the size of elements - // in a freelist doesn't exactly divide the page-size (a 8192-byte - // page full of 5-byte objects would have 2 bytes memory overhead). - size_t OverheadBytes(); - private: // TransferCache is used to cache transfers of // sizemap.num_objects_to_move(size_class) back and forth between @@ -84,7 +80,12 @@ class CentralFreeList { // number of TCEntries across size classes is fixed. Currently each size // class is initially given one TCEntry which also means that the maximum any // one class can have is kNumClasses. +#ifdef TCMALLOC_SMALL_BUT_SLOW + // For the small memory model, the transfer cache is not used. + static const int kNumTransferEntries = 0; +#else static const int kNumTransferEntries = kNumClasses; +#endif // REQUIRES: lock_ is held // Remove object from cache and return. diff --git a/src/common.cc b/src/common.cc index 4b84f18..8221b08 100644 --- a/src/common.cc +++ b/src/common.cc @@ -31,9 +31,8 @@ // Author: Sanjay Ghemawat <opensource@google.com> #include "config.h" -#include "system-alloc.h" -#include "config.h" #include "common.h" +#include "system-alloc.h" namespace tcmalloc { diff --git a/src/common.h b/src/common.h index 53050ca..e960c57 100644 --- a/src/common.h +++ b/src/common.h @@ -36,13 +36,11 @@ #define TCMALLOC_COMMON_H_ #include "config.h" -#include <stddef.h> +#include <stddef.h> // for size_t #ifdef HAVE_STDINT_H -#include <stdint.h> +#include <stdint.h> // for uintptr_t, uint64_t #endif -#include <stdarg.h> -#include "base/commandlineflags.h" -#include "internal_logging.h" +#include "internal_logging.h" // for ASSERT, etc // Type that can hold a page number typedef uintptr_t PageID; @@ -81,7 +79,13 @@ static const size_t kLargeSizeClass = 0; static const size_t kMaxPages = 1 << (20 - kPageShift); // Default bound on the total amount of thread caches. +#ifdef TCMALLOC_SMALL_BUT_SLOW +// Make the overall thread cache no bigger than that of a single thread +// for the small memory footprint case. +static const size_t kDefaultOverallThreadCacheSize = kMaxThreadCacheSize; +#else static const size_t kDefaultOverallThreadCacheSize = 8u * kMaxThreadCacheSize; +#endif // Lower bound on the per-thread cache sizes static const size_t kMinThreadCacheSize = kMaxSize * 2; @@ -104,13 +108,13 @@ static const int kMaxDynamicFreeListLength = 8192; static const Length kMaxValidPages = (~static_cast<Length>(0)) >> kPageShift; -#ifdef __x86_64__ +#if defined __x86_64__ // All current and planned x86_64 processors only look at the lower 48 bits // in virtual to physical address translation. The top 16 are thus unused. // TODO(rus): Under what operating systems can we increase it safely to 17? // This lets us use smaller page maps. On first allocation, a 36-bit page map // uses only 96 KB instead of the 4.5 MB used by a 52-bit page map. -static const int kAddressBits = 48; +static const int kAddressBits = (sizeof(void*) < 8 ? (8 * sizeof(void*)) : 48); #else static const int kAddressBits = 8 * sizeof(void*); #endif diff --git a/src/config.h.in b/src/config.h.in index 3b8d6f3..9112951 100644 --- a/src/config.h.in +++ b/src/config.h.in @@ -122,6 +122,9 @@ /* Define to 1 if the system has the type `struct mallinfo'. */ #undef HAVE_STRUCT_MALLINFO +/* Define to 1 if you have the <sys/cdefs.h> header file. */ +#undef HAVE_SYS_CDEFS_H + /* Define to 1 if you have the <sys/malloc.h> header file. */ #undef HAVE_SYS_MALLOC_H diff --git a/src/debugallocation.cc b/src/debugallocation.cc index d38d1e0..9de927a 100644 --- a/src/debugallocation.cc +++ b/src/debugallocation.cc @@ -62,22 +62,19 @@ #include <errno.h> #include <string.h> +#include <google/malloc_extension.h> +#include <google/malloc_hook.h> +#include <google/stacktrace.h> #include "base/commandlineflags.h" #include "base/googleinit.h" #include "base/logging.h" -#include "google/malloc_extension.h" -#include "google/malloc_hook.h" -#include "google/stacktrace.h" +#include "base/spinlock.h" #include "addressmap-inl.h" #include "malloc_hook-inl.h" #include "symbolize.h" -#ifdef TCMALLOC_FOR_DEBUGALLOCATION +#define TCMALLOC_USING_DEBUGALLOCATION #include "tcmalloc.cc" -#else -#include "base/spinlock.h" -// Else we already have a SpinLock defined in tcmalloc/internal_spinlock.h -#endif // __THROW is defined in glibc systems. It means, counter-intuitively, // "This function will never throw an exception." It's an optional @@ -134,11 +131,6 @@ DEFINE_bool(symbolize_stacktrace, static void TracePrintf(int fd, const char *fmt, ...) __attribute__ ((__format__ (__printf__, 2, 3))); -// -// Define the malloc/free/mallopt/mallinfo implementations -// we will be working on top of: -#ifdef TCMALLOC_FOR_DEBUGALLOCATION - // The do_* functions are defined in tcmalloc/tcmalloc.cc, // which is included before this file // when TCMALLOC_FOR_DEBUGALLOCATION is defined @@ -150,39 +142,6 @@ static void TracePrintf(int fd, const char *fmt, ...) #define BASE_MALLINFO do_mallinfo #define BASE_MALLOC_SIZE(ptr) GetSizeWithCallback(ptr, &InvalidGetAllocatedSize) -#else - -// GNU has some weird "weak aliasing" thing that permits us to define our -// own malloc(), free(), and realloc() which can use the normal versions of -// of themselves by calling __libc_malloc(), __libc_free(), and -// __libc_realloc(). -// -extern "C" { - extern void* __libc_malloc(size_t size); - extern void __libc_free(void* ptr); - extern void* __libc_realloc(void* ptr, size_t size); - extern void* __libc_calloc(size_t nmemb, size_t size); - extern int __libc_mallopt(int cmd, int value); -#ifdef HAVE_STRUCT_MALLINFO - extern struct mallinfo __libc_mallinfo(void); -#endif - static void noop_malloc_stats(void) {} -} - -// We are working on top of standard libc's malloc library -#define BASE_MALLOC_NEW __libc_malloc -#define BASE_MALLOC __libc_malloc -#define BASE_FREE __libc_free -#define BASE_MALLOC_STATS noop_malloc_stats -#define BASE_MALLOPT __libc_mallopt -#ifdef HAVE_STRUCT_MALLINFO -#define BASE_MALLINFO __libc_mallinfo -#endif -// This is malloc_size() on OS X, malloc_usable_size() on libc, -// _msize() on windows. Rather than trying to pick, we just bail. -#define BASE_MALLOC_SIZE(ptr) 0 // TODO(csilvers): do better -#endif - // ========================================================================= // class MallocBlock; @@ -998,18 +957,10 @@ static inline void DebugDeallocate(void* ptr, int type) { // The following functions may be called via MallocExtension::instance() // for memory verification and statistics. -#ifdef TCMALLOC_FOR_DEBUGALLOCATION -// Inherit from tcmalloc's version -typedef TCMallocImplementation ParentImplementation; -#else -// Inherit from default version -typedef MallocExtension ParentImplementation; -#endif - -class DebugMallocImplementation : public ParentImplementation { +class DebugMallocImplementation : public TCMallocImplementation { public: virtual bool GetNumericProperty(const char* name, size_t* value) { - bool result = ParentImplementation::GetNumericProperty(name, value); + bool result = TCMallocImplementation::GetNumericProperty(name, value); if (result && (strcmp(name, "generic.current_allocated_bytes") == 0)) { // Subtract bytes kept in the free queue size_t qsize = MallocBlock::FreeQueueSize(); @@ -1057,7 +1008,7 @@ class DebugMallocImplementation : public ParentImplementation { virtual void GetFreeListSizes(vector<MallocExtension::FreeListInfo>* v) { static const char* kDebugFreeQueue = "debug.free_queue"; - ParentImplementation::GetFreeListSizes(v); + TCMallocImplementation::GetFreeListSizes(v); MallocExtension::FreeListInfo i; i.type = kDebugFreeQueue; diff --git a/src/google/malloc_extension.h b/src/google/malloc_extension.h index a2e956e..0e15c04 100644 --- a/src/google/malloc_extension.h +++ b/src/google/malloc_extension.h @@ -70,6 +70,22 @@ namespace base { struct MallocRange; } +// Interface to a pluggable system allocator. +class SysAllocator { + public: + SysAllocator() { + } + virtual ~SysAllocator(); + + // Allocates "size"-byte of memory from system aligned with "alignment". + // Returns NULL if failed. Otherwise, the returned pointer p up to and + // including (p + actual_size -1) have been allocated. + virtual void* Alloc(size_t size, size_t *actual_size, size_t alignment) = 0; + + // Notification that command-line flags have been initialized. + virtual void FlagsInitialized() = 0; +}; + // The default implementations of the following routines do nothing. // All implementations should be thread-safe; the current one // (TCMallocImplementation) is. @@ -102,7 +118,9 @@ class PERFTOOLS_DLL_DECL MallocExtension { // Outputs to "writer" a sample of live objects and the stack traces // that allocated these objects. The format of the returned output // is equivalent to the output of the heap profiler and can - // therefore be passed to "pprof". + // therefore be passed to "pprof". This function is equivalent to + // ReadStackTraces. The main difference is that this function returns + // serialized data appropriately formatted for use by the pprof tool. // NOTE: by default, tcmalloc does not do any heap sampling, and this // function will always return an empty sample. To get useful // data from GetHeapSample, you must also set the environment @@ -112,7 +130,10 @@ class PERFTOOLS_DLL_DECL MallocExtension { // Outputs to "writer" the stack traces that caused growth in the // address space size. The format of the returned output is // equivalent to the output of the heap profiler and can therefore - // be passed to "pprof". (This does not depend on, or require, + // be passed to "pprof". This function is equivalent to + // ReadHeapGrowthStackTraces. The main difference is that this function + // returns serialized data appropriately formatted for use by the + // pprof tool. (This does not depend on, or require, // TCMALLOC_SAMPLE_PARAMETER.) virtual void GetHeapGrowthStacks(MallocExtensionWriter* writer); @@ -201,6 +222,27 @@ class PERFTOOLS_DLL_DECL MallocExtension { // Most malloc implementations ignore this routine. virtual void MarkThreadBusy(); + // Gets the system allocator used by the malloc extension instance. Returns + // NULL for malloc implementations that do not support pluggable system + // allocators. + virtual SysAllocator* GetSystemAllocator(); + + // Sets the system allocator to the specified. + // + // Users could register their own system allocators for malloc implementation + // that supports pluggable system allocators, such as TCMalloc, by doing: + // alloc = new MyOwnSysAllocator(); + // MallocExtension::instance()->SetSystemAllocator(alloc); + // It's up to users whether to fall back (recommended) to the default + // system allocator (use GetSystemAllocator() above) or not. The caller is + // responsible to any necessary locking. + // See tcmalloc/system-alloc.h for the interface and + // tcmalloc/memfs_malloc.cc for the examples. + // + // It's a no-op for malloc implementations that do not support pluggable + // system allocators. + virtual void SetSystemAllocator(SysAllocator *a); + // Try to release num_bytes of free memory back to the operating // system for reuse. Use this extension with caution -- to get this // memory back may require faulting pages back in by the OS, and @@ -289,7 +331,6 @@ class PERFTOOLS_DLL_DECL MallocExtension { // "tcmalloc.thread" - tcmalloc's per-thread caches. Never unmapped. virtual void GetFreeListSizes(std::vector<FreeListInfo>* v); - protected: // Get a list of stack traces of sampled allocation points. Returns // a pointer to a "new[]-ed" result array, and stores the sample // period in "sample_period". diff --git a/src/google/malloc_hook.h b/src/google/malloc_hook.h index 16d9075..3e2ef45 100644 --- a/src/google/malloc_hook.h +++ b/src/google/malloc_hook.h @@ -62,10 +62,6 @@ // // NOTE FOR C USERS: If you want to use malloc_hook functionality from // a C program, #include malloc_hook_c.h instead of this file. -// -// TODO(csilvers): support a non-inlined function called -// Assert*HookIs()? This is the context in which I normally see -// Get*Hook() called in non-tcmalloc code. #ifndef _MALLOC_HOOK_H_ #define _MALLOC_HOOK_H_ @@ -132,6 +128,32 @@ class PERFTOOLS_DLL_DECL MallocHook { int fd, off_t offset); + // The MmapReplacement is invoked after the PreMmapHook but before + // the call is actually made. The MmapReplacement should return true + // if it handled the call, or false if it is still necessary to + // call mmap/mmap64. + // This should be used only by experts, and users must be be + // extremely careful to avoid recursive calls to mmap. The replacement + // should be async signal safe. + // Only one MmapReplacement is supported. After setting an MmapReplacement + // you must call RemoveMmapReplacement before calling SetMmapReplacement + // again. + typedef MallocHook_MmapReplacement MmapReplacement; + inline static bool SetMmapReplacement(MmapReplacement hook) { + return MallocHook_SetMmapReplacement(hook); + } + inline static bool RemoveMmapReplacement(MmapReplacement hook) { + return MallocHook_RemoveMmapReplacement(hook); + } + inline static bool InvokeMmapReplacement(const void* start, + size_t size, + int protection, + int flags, + int fd, + off_t offset, + void** result); + + // The MmapHook is invoked whenever a region of memory is mapped. // It may be passed MAP_FAILED if the mmap failed. typedef MallocHook_MmapHook MmapHook; @@ -149,6 +171,26 @@ class PERFTOOLS_DLL_DECL MallocHook { int fd, off_t offset); + // The MunmapReplacement is invoked with munmap arguments just before + // the call is actually made. The MunmapReplacement should return true + // if it handled the call, or false if it is still necessary to + // call munmap. + // This should be used only by experts. The replacement should be + // async signal safe. + // Only one MunmapReplacement is supported. After setting an + // MunmapReplacement you must call RemoveMunmapReplacement before + // calling SetMunmapReplacement again. + typedef MallocHook_MunmapReplacement MunmapReplacement; + inline static bool SetMunmapReplacement(MunmapReplacement hook) { + return MallocHook_SetMunmapReplacement(hook); + } + inline static bool RemoveMunmapReplacement(MunmapReplacement hook) { + return MallocHook_RemoveMunmapReplacement(hook); + } + inline static bool InvokeMunmapReplacement(const void* p, + size_t size, + int* result); + // The MunmapHook is invoked whenever a region of memory is unmapped. typedef MallocHook_MunmapHook MunmapHook; inline static bool AddMunmapHook(MunmapHook hook) { @@ -214,6 +256,8 @@ class PERFTOOLS_DLL_DECL MallocHook { // Unhooked versions of mmap() and munmap(). These should be used // only by experts, since they bypass heapchecking, etc. + // Note: These do not run hooks, but they still use the MmapReplacement + // and MunmapReplacement. static void* UnhookedMMap(void *start, size_t length, int prot, int flags, int fd, off_t offset); static int UnhookedMUnmap(void *start, size_t length); @@ -277,7 +321,17 @@ class PERFTOOLS_DLL_DECL MallocHook { int flags, int fd, off_t offset); + static bool InvokeMmapReplacementSlow(const void* start, + size_t size, + int protection, + int flags, + int fd, + off_t offset, + void** result); static void InvokeMunmapHookSlow(const void* p, size_t size); + static bool InvokeMunmapReplacementSlow(const void* p, + size_t size, + int* result); static void InvokeMremapHookSlow(const void* result, const void* old_addr, size_t old_size, diff --git a/src/google/malloc_hook_c.h b/src/google/malloc_hook_c.h index 420cd33..56337e1 100644 --- a/src/google/malloc_hook_c.h +++ b/src/google/malloc_hook_c.h @@ -102,12 +102,28 @@ int MallocHook_AddMmapHook(MallocHook_MmapHook hook); PERFTOOLS_DLL_DECL int MallocHook_RemoveMmapHook(MallocHook_MmapHook hook); +typedef int (*MallocHook_MmapReplacement)(const void* start, + size_t size, + int protection, + int flags, + int fd, + off_t offset, + void** result); +int MallocHook_SetMmapReplacement(MallocHook_MmapReplacement hook); +int MallocHook_RemoveMmapReplacement(MallocHook_MmapReplacement hook); + typedef void (*MallocHook_MunmapHook)(const void* ptr, size_t size); PERFTOOLS_DLL_DECL int MallocHook_AddMunmapHook(MallocHook_MunmapHook hook); PERFTOOLS_DLL_DECL int MallocHook_RemoveMunmapHook(MallocHook_MunmapHook hook); +typedef int (*MallocHook_MunmapReplacement)(const void* ptr, + size_t size, + int* result); +int MallocHook_SetMunmapReplacement(MallocHook_MunmapReplacement hook); +int MallocHook_RemoveMunmapReplacement(MallocHook_MunmapReplacement hook); + typedef void (*MallocHook_MremapHook)(const void* result, const void* old_addr, size_t old_size, diff --git a/src/google/profiler.h b/src/google/profiler.h index a6883f4..7971e04 100644 --- a/src/google/profiler.h +++ b/src/google/profiler.h @@ -157,7 +157,7 @@ struct ProfilerState { int enabled; /* Is profiling currently enabled? */ time_t start_time; /* If enabled, when was profiling started? */ char profile_name[1024]; /* Name of profile file being written, or '\0' */ - int samples_gathered; /* Number of samples gatheered to far (or 0) */ + int samples_gathered; /* Number of samples gathered so far (or 0) */ }; PERFTOOLS_DLL_DECL void ProfilerGetCurrentState(struct ProfilerState* state); diff --git a/src/google/tcmalloc.h.in b/src/google/tcmalloc.h.in index d42f4e6..c887559 100644 --- a/src/google/tcmalloc.h.in +++ b/src/google/tcmalloc.h.in @@ -35,6 +35,11 @@ #ifndef TCMALLOC_TCMALLOC_H_ #define TCMALLOC_TCMALLOC_H_ +#include <stddef.h> // for size_t +#ifdef HAVE_SYS_CDEFS_H +#include <sys/cdefs.h> // where glibc defines __THROW +#endif + // __THROW is defined in glibc systems. It means, counter-intuitively, // "This function will never throw an exception." It's an optional // optimization tool, but we may need to use it to match glibc prototypes. @@ -60,7 +65,9 @@ #endif #ifdef __cplusplus -#include <new> // for std::nothrow_t +namespace std { +struct nothrow_t; +} extern "C" { #endif diff --git a/src/heap-profiler.cc b/src/heap-profiler.cc index fc4f154..5e30c22 100644 --- a/src/heap-profiler.cc +++ b/src/heap-profiler.cc @@ -33,7 +33,7 @@ // TODO: Log large allocations #include <config.h> - +#include <stddef.h> #include <stdio.h> #include <stdlib.h> #ifdef HAVE_UNISTD_H diff --git a/src/internal_logging.cc b/src/internal_logging.cc index 4e46ba8..4c90190 100644 --- a/src/internal_logging.cc +++ b/src/internal_logging.cc @@ -31,15 +31,18 @@ // Sanjay Ghemawat <opensource@google.com> #include <config.h> -#include <stdio.h> -#include <stdarg.h> +#include "internal_logging.h" +#include <stdarg.h> // for va_end, va_start +#include <stdio.h> // for vsnprintf, va_list, etc +#include <stdlib.h> // for abort +#include <string.h> // for strlen, memcpy #ifdef HAVE_UNISTD_H #include <unistd.h> // for write() #endif -#include <string.h> + #include <google/malloc_extension.h> -#include "internal_logging.h" #include "base/logging.h" // for perftools_vsnprintf +#include "base/spinlock.h" // for SpinLockHolder, SpinLock static const int kLogBufSize = 800; diff --git a/src/internal_logging.h b/src/internal_logging.h index 0cb9ba2..ce4a516 100644 --- a/src/internal_logging.h +++ b/src/internal_logging.h @@ -36,10 +36,7 @@ #define TCMALLOC_INTERNAL_LOGGING_H_ #include <config.h> -#include <stdlib.h> // for abort() -#ifdef HAVE_UNISTD_H -#include <unistd.h> // for write() -#endif +#include <stddef.h> // for size_t //------------------------------------------------------------------- // Utility routines diff --git a/src/malloc_extension.cc b/src/malloc_extension.cc index 1272068..e9a0da7 100644 --- a/src/malloc_extension.cc +++ b/src/malloc_extension.cc @@ -32,7 +32,6 @@ #include <config.h> #include <assert.h> -#include <stdio.h> #include <string.h> #include <stdio.h> #if defined HAVE_STDINT_H @@ -102,6 +101,9 @@ void MallocExtension::Initialize() { #endif /* __GLIBC__ */ } +// SysAllocator implementation +SysAllocator::~SysAllocator() {} + // Default implementation -- does nothing MallocExtension::~MallocExtension() { } bool MallocExtension::VerifyAllMemory() { return true; } @@ -146,6 +148,14 @@ void MallocExtension::MarkThreadBusy() { // Default implementation does nothing } +SysAllocator* MallocExtension::GetSystemAllocator() { + return NULL; +} + +void MallocExtension::SetSystemAllocator(SysAllocator *a) { + // Default implementation does nothing +} + void MallocExtension::ReleaseToSystem(size_t num_bytes) { // Default implementation does nothing } diff --git a/src/malloc_hook-inl.h b/src/malloc_hook-inl.h index 027d6e2..b24b1c6 100644 --- a/src/malloc_hook-inl.h +++ b/src/malloc_hook-inl.h @@ -142,7 +142,9 @@ extern HookList<MallocHook::NewHook> new_hooks_; extern HookList<MallocHook::DeleteHook> delete_hooks_; extern HookList<MallocHook::PreMmapHook> premmap_hooks_; extern HookList<MallocHook::MmapHook> mmap_hooks_; +extern HookList<MallocHook::MmapReplacement> mmap_replacement_; extern HookList<MallocHook::MunmapHook> munmap_hooks_; +extern HookList<MallocHook::MunmapReplacement> munmap_replacement_; extern HookList<MallocHook::MremapHook> mremap_hooks_; extern HookList<MallocHook::PreSbrkHook> presbrk_hooks_; extern HookList<MallocHook::SbrkHook> sbrk_hooks_; @@ -225,6 +227,22 @@ inline void MallocHook::InvokeMmapHook(const void* result, // End DEPRECATED code. } +inline bool MallocHook::InvokeMmapReplacement(const void* start, + size_t size, + int protection, + int flags, + int fd, + off_t offset, + void** result) { + if (!base::internal::mmap_replacement_.empty()) { + return InvokeMmapReplacementSlow(start, size, + protection, flags, + fd, offset, + result); + } + return false; +} + // The following method is DEPRECATED inline MallocHook::MunmapHook MallocHook::GetMunmapHook() { return base::internal::munmap_hook_.Get(); @@ -240,6 +258,14 @@ inline void MallocHook::InvokeMunmapHook(const void* p, size_t size) { // End DEPRECATED code. } +inline bool MallocHook::InvokeMunmapReplacement( + const void* p, size_t size, int* result) { + if (!base::internal::mmap_replacement_.empty()) { + return InvokeMunmapReplacementSlow(p, size, result); + } + return false; +} + // The following method is DEPRECATED inline MallocHook::MremapHook MallocHook::GetMremapHook() { return base::internal::mremap_hook_.Get(); diff --git a/src/malloc_hook.cc b/src/malloc_hook.cc index b185905..5a61362 100644 --- a/src/malloc_hook.cc +++ b/src/malloc_hook.cc @@ -41,6 +41,10 @@ # undef mremap #endif +#include <stddef.h> +#ifdef HAVE_STDINT_H +#include <stdint.h> +#endif #include <algorithm> #include "base/basictypes.h" #include "base/logging.h" @@ -293,6 +297,10 @@ HookList<MallocHook::PreSbrkHook> presbrk_hooks_ = INIT_HOOK_LIST_WITH_VALUE(InitialPreSbrkHook); HookList<MallocHook::SbrkHook> sbrk_hooks_ = INIT_HOOK_LIST; +// These lists contain either 0 or 1 hooks. +HookList<MallocHook::MmapReplacement> mmap_replacement_ = { 0 }; +HookList<MallocHook::MunmapReplacement> munmap_replacement_ = { 0 }; + #undef INIT_HOOK_LIST_WITH_VALUE #undef INIT_HOOK_LIST @@ -314,7 +322,9 @@ using base::internal::new_hooks_; using base::internal::delete_hooks_; using base::internal::premmap_hooks_; using base::internal::mmap_hooks_; +using base::internal::mmap_replacement_; using base::internal::munmap_hooks_; +using base::internal::munmap_replacement_; using base::internal::mremap_hooks_; using base::internal::presbrk_hooks_; using base::internal::sbrk_hooks_; @@ -358,6 +368,21 @@ int MallocHook_RemovePreMmapHook(MallocHook_PreMmapHook hook) { } extern "C" +int MallocHook_SetMmapReplacement(MallocHook_MmapReplacement hook) { + RAW_VLOG(10, "SetMmapReplacement(%p)", hook); + // NOTE this is a best effort CHECK. Concurrent sets could succeed since + // this test is outside of the Add spin lock. + RAW_CHECK(mmap_replacement_.empty(), "Only one MMapReplacement is allowed."); + return mmap_replacement_.Add(hook); +} + +extern "C" +int MallocHook_RemoveMmapReplacement(MallocHook_MmapReplacement hook) { + RAW_VLOG(10, "RemoveMmapReplacement(%p)", hook); + return mmap_replacement_.Remove(hook); +} + +extern "C" int MallocHook_AddMmapHook(MallocHook_MmapHook hook) { RAW_VLOG(10, "AddMmapHook(%p)", hook); return mmap_hooks_.Add(hook); @@ -382,6 +407,22 @@ int MallocHook_RemoveMunmapHook(MallocHook_MunmapHook hook) { } extern "C" +int MallocHook_SetMunmapReplacement(MallocHook_MunmapReplacement hook) { + RAW_VLOG(10, "SetMunmapReplacement(%p)", hook); + // NOTE this is a best effort CHECK. Concurrent sets could succeed since + // this test is outside of the Add spin lock. + RAW_CHECK(munmap_replacement_.empty(), + "Only one MunmapReplacement is allowed."); + return munmap_replacement_.Add(hook); +} + +extern "C" +int MallocHook_RemoveMunmapReplacement(MallocHook_MunmapReplacement hook) { + RAW_VLOG(10, "RemoveMunmapReplacement(%p)", hook); + return munmap_replacement_.Remove(hook); +} + +extern "C" int MallocHook_AddMremapHook(MallocHook_MremapHook hook) { RAW_VLOG(10, "AddMremapHook(%p)", hook); return mremap_hooks_.Add(hook); @@ -479,6 +520,15 @@ MallocHook_SbrkHook MallocHook_SetSbrkHook(MallocHook_SbrkHook hook) { } \ } while (0) +// There should only be one replacement. Return the result of the first +// one, or false if there is none. +#define INVOKE_REPLACEMENT(HookType, hook_list, args) do { \ + HookType hooks[kHookListMaxValues]; \ + int num_hooks = hook_list.Traverse(hooks, kHookListMaxValues); \ + return (num_hooks > 0 && (*hooks[0])args); \ + } while (0) + + void MallocHook::InvokeNewHookSlow(const void* p, size_t s) { INVOKE_HOOKS(NewHook, new_hooks_, (p, s)); } @@ -508,10 +558,27 @@ void MallocHook::InvokeMmapHookSlow(const void* result, fd, offset)); } +bool MallocHook::InvokeMmapReplacementSlow(const void* start, + size_t size, + int protection, + int flags, + int fd, + off_t offset, + void** result) { + INVOKE_REPLACEMENT(MmapReplacement, mmap_replacement_, + (start, size, protection, flags, fd, offset, result)); +} + void MallocHook::InvokeMunmapHookSlow(const void* p, size_t s) { INVOKE_HOOKS(MunmapHook, munmap_hooks_, (p, s)); } +bool MallocHook::InvokeMunmapReplacementSlow(const void* p, + size_t s, + int* result) { + INVOKE_REPLACEMENT(MunmapReplacement, munmap_replacement_, (p, s, result)); +} + void MallocHook::InvokeMremapHookSlow(const void* result, const void* old_addr, size_t old_size, @@ -739,7 +806,11 @@ extern "C" { extern "C" void* mmap64(void *start, size_t length, int prot, int flags, int fd, __off64_t offset) __THROW { MallocHook::InvokePreMmapHook(start, length, prot, flags, fd, offset); - void *result = do_mmap64(start, length, prot, flags, fd, offset); + void *result; + if (!MallocHook::InvokeMmapReplacement( + start, length, prot, flags, fd, offset, &result)) { + result = do_mmap64(start, length, prot, flags, fd, offset); + } MallocHook::InvokeMmapHook(result, start, length, prot, flags, fd, offset); return result; } @@ -749,8 +820,12 @@ extern "C" void* mmap64(void *start, size_t length, int prot, int flags, extern "C" void* mmap(void *start, size_t length, int prot, int flags, int fd, off_t offset) __THROW { MallocHook::InvokePreMmapHook(start, length, prot, flags, fd, offset); - void *result = do_mmap64(start, length, prot, flags, fd, - static_cast<size_t>(offset)); // avoid sign extension + void *result; + if (!MallocHook::InvokeMmapReplacement( + start, length, prot, flags, fd, offset, &result)) { + result = do_mmap64(start, length, prot, flags, fd, + static_cast<size_t>(offset)); // avoid sign extension + } MallocHook::InvokeMmapHook(result, start, length, prot, flags, fd, offset); return result; } @@ -759,7 +834,11 @@ extern "C" void* mmap(void *start, size_t length, int prot, int flags, extern "C" int munmap(void* start, size_t length) __THROW { MallocHook::InvokeMunmapHook(start, length); - return syscall(SYS_munmap, start, length); + int result; + if (!MallocHook::InvokeMunmapReplacement(start, length, &result)) { + result = syscall(SYS_munmap, start, length); + } + return result; } extern "C" void* mremap(void* old_addr, size_t old_size, size_t new_size, @@ -786,11 +865,20 @@ extern "C" void* sbrk(ptrdiff_t increment) __THROW { /*static*/void* MallocHook::UnhookedMMap(void *start, size_t length, int prot, int flags, int fd, off_t offset) { - return do_mmap64(start, length, prot, flags, fd, offset); + void* result; + if (!MallocHook::InvokeMmapReplacement( + start, length, prot, flags, fd, offset, &result)) { + result = do_mmap64(start, length, prot, flags, fd, offset); + } + return result; } /*static*/int MallocHook::UnhookedMUnmap(void *start, size_t length) { - return sys_munmap(start, length); + int result; + if (!MallocHook::InvokeMunmapReplacement(start, length, &result)) { + result = sys_munmap(start, length); + } + return result; } #else // defined(__linux) && @@ -798,11 +886,20 @@ extern "C" void* sbrk(ptrdiff_t increment) __THROW { /*static*/void* MallocHook::UnhookedMMap(void *start, size_t length, int prot, int flags, int fd, off_t offset) { - return mmap(start, length, prot, flags, fd, offset); + void* result; + if (!MallocHook::InvokeMmapReplacement( + start, length, prot, flags, fd, offset, &result)) { + result = mmap(start, length, prot, flags, fd, offset); + } + return result; } /*static*/int MallocHook::UnhookedMUnmap(void *start, size_t length) { - return munmap(start, length); + int result; + if (!MallocHook::InvokeMunmapReplacement(start, length, &result)) { + result = munmap(start, length); + } + return result; } #endif // defined(__linux) && diff --git a/src/memfs_malloc.cc b/src/memfs_malloc.cc index 210a4ee..3fb55a4 100644 --- a/src/memfs_malloc.cc +++ b/src/memfs_malloc.cc @@ -38,16 +38,23 @@ #ifdef __linux #include <config.h> -#include <errno.h> -#include <fcntl.h> -#include <unistd.h> -#include <inttypes.h> -#include <sys/mman.h> -#include <sys/param.h> -#include <sys/types.h> -#include <sys/vfs.h> // for statfs +#include <errno.h> // for errno, EINVAL +#include <inttypes.h> // for PRId64 +#include <limits.h> // for PATH_MAX +#include <stddef.h> // for size_t, NULL +#ifdef HAVE_STDINT_H +#include <stdint.h> // for int64_t, uintptr_t +#endif +#include <stdio.h> // for snprintf +#include <stdlib.h> // for mkstemp +#include <string.h> // for strerror +#include <sys/mman.h> // for mmap, MAP_FAILED, etc +#include <sys/statfs.h> // for fstatfs, statfs +#include <unistd.h> // for ftruncate, off_t, unlink +#include <new> // for operator new #include <string> +#include <google/malloc_extension.h> #include "base/basictypes.h" #include "base/googleinit.h" #include "base/sysinfo.h" @@ -78,51 +85,68 @@ DEFINE_bool(memfs_malloc_map_private, // Hugetlbfs based allocator for tcmalloc class HugetlbSysAllocator: public SysAllocator { public: - HugetlbSysAllocator(int fd, int page_size) - : big_page_size_(page_size), - hugetlb_fd_(fd), - hugetlb_base_(0) { + explicit HugetlbSysAllocator(SysAllocator* fallback) + : failed_(true), // Unusable until FlagsInitialized() is called + big_page_size_(0), + hugetlb_fd_(-1), + hugetlb_base_(0), + fallback_(fallback) { } void* Alloc(size_t size, size_t *actual_size, size_t alignment); - void DumpStats(TCMalloc_Printer* printer); + void FlagsInitialized(); + bool failed_; // Whether failed to allocate memory. private: + void* AllocInternal(size_t size, size_t *actual_size, size_t alignment); + int64 big_page_size_; - int hugetlb_fd_; // file descriptor for hugetlb + int hugetlb_fd_; // file descriptor for hugetlb off_t hugetlb_base_; -}; -void HugetlbSysAllocator::DumpStats(TCMalloc_Printer* printer) { - printer->printf("HugetlbSysAllocator: failed_=%d allocated=%"PRId64"\n", - failed_, static_cast<int64_t>(hugetlb_base_)); -} + SysAllocator* fallback_; // Default system allocator to fall back to. +}; +static char hugetlb_space[sizeof(HugetlbSysAllocator)]; // No locking needed here since we assume that tcmalloc calls // us with an internal lock held (see tcmalloc/system-alloc.cc). void* HugetlbSysAllocator::Alloc(size_t size, size_t *actual_size, size_t alignment) { - - // don't go any further if we haven't opened the backing file - if (hugetlb_fd_ == -1) { - return NULL; + if (failed_) { + return fallback_->Alloc(size, actual_size, alignment); } // We don't respond to allocation requests smaller than big_page_size_ unless - // the caller is willing to take more than they asked for. + // the caller is ok to take more than they asked for. Used by MetaDataAlloc. if (actual_size == NULL && size < big_page_size_) { - return NULL; + return fallback_->Alloc(size, actual_size, alignment); } // Enforce huge page alignment. Be careful to deal with overflow. - if (alignment < big_page_size_) alignment = big_page_size_; - size_t aligned_size = ((size + alignment - 1) / alignment) * alignment; + size_t new_alignment = alignment; + if (new_alignment < big_page_size_) new_alignment = big_page_size_; + size_t aligned_size = ((size + new_alignment - 1) / + new_alignment) * new_alignment; if (aligned_size < size) { - return NULL; + return fallback_->Alloc(size, actual_size, alignment); + } + + void* result = AllocInternal(aligned_size, actual_size, new_alignment); + if (result != NULL) { + return result; + } + TCMalloc_MESSAGE(__FILE__, __LINE__, + "HugetlbSysAllocator: failed_=%d allocated=%"PRId64"\n", + failed_, static_cast<int64_t>(hugetlb_base_)); + if (FLAGS_memfs_malloc_abort_on_fail) { + CRASH("memfs_malloc_abort_on_fail is set\n"); } - size = aligned_size; + return fallback_->Alloc(size, actual_size, alignment); +} +void* HugetlbSysAllocator::AllocInternal(size_t size, size_t* actual_size, + size_t alignment) { // Ask for extra memory if alignment > pagesize size_t extra = 0; if (alignment > big_page_size_) { @@ -142,9 +166,6 @@ void* HugetlbSysAllocator::Alloc(size_t size, size_t *actual_size, " too large while %"PRId64" bytes remain\n", size, static_cast<int64_t>(limit - hugetlb_base_)); } - if (FLAGS_memfs_malloc_abort_on_fail) { - CRASH("memfs_malloc_abort_on_fail is set\n"); - } return NULL; } @@ -155,9 +176,6 @@ void* HugetlbSysAllocator::Alloc(size_t size, size_t *actual_size, TCMalloc_MESSAGE(__FILE__, __LINE__, "ftruncate failed: %s\n", strerror(errno)); failed_ = true; - if (FLAGS_memfs_malloc_abort_on_fail) { - CRASH("memfs_malloc_abort_on_fail is set\n"); - } return NULL; } @@ -174,9 +192,6 @@ void* HugetlbSysAllocator::Alloc(size_t size, size_t *actual_size, TCMalloc_MESSAGE(__FILE__, __LINE__, "mmap of size %"PRIuS" failed: %s\n", size + extra, strerror(errno)); failed_ = true; - if (FLAGS_memfs_malloc_abort_on_fail) { - CRASH("memfs_malloc_abort_on_fail is set\n"); - } } return NULL; } @@ -197,7 +212,7 @@ void* HugetlbSysAllocator::Alloc(size_t size, size_t *actual_size, return reinterpret_cast<void*>(ptr); } -static void InitSystemAllocator() { +void HugetlbSysAllocator::FlagsInitialized() { if (FLAGS_memfs_malloc_path.length()) { char path[PATH_MAX]; int rc = snprintf(path, sizeof(path), "%s.XXXXXX", @@ -228,12 +243,18 @@ static void InitSystemAllocator() { } int64 page_size = sfs.f_bsize; - SysAllocator *alloc = new HugetlbSysAllocator(hugetlb_fd, page_size); - // Register ourselves with tcmalloc - RegisterSystemAllocator(alloc, 0); + hugetlb_fd_ = hugetlb_fd; + big_page_size_ = page_size; + failed_ = false; } } +static void InitSystemAllocator() { + SysAllocator *alloc = MallocExtension::instance()->GetSystemAllocator(); + HugetlbSysAllocator *hugetlb = new (hugetlb_space) HugetlbSysAllocator(alloc); + MallocExtension::instance()->SetSystemAllocator(hugetlb); +} + REGISTER_MODULE_INITIALIZER(memfs_malloc, { InitSystemAllocator(); }); #endif /* ifdef __linux */ diff --git a/src/memory_region_map.cc b/src/memory_region_map.cc index 17599af..3f17ac7 100644 --- a/src/memory_region_map.cc +++ b/src/memory_region_map.cc @@ -111,6 +111,7 @@ #ifdef HAVE_PTHREAD #include <pthread.h> // for pthread_t, pthread_self() #endif +#include <stddef.h> #include <algorithm> #include <set> diff --git a/src/memory_region_map.h b/src/memory_region_map.h index bc2862e..739514c 100644 --- a/src/memory_region_map.h +++ b/src/memory_region_map.h @@ -39,6 +39,7 @@ #ifdef HAVE_PTHREAD #include <pthread.h> #endif +#include <stddef.h> #include <set> #include "base/stl_allocator.h" #include "base/spinlock.h" diff --git a/src/packed-cache-inl.h b/src/packed-cache-inl.h index 9d2cfe3..77f42b6 100644 --- a/src/packed-cache-inl.h +++ b/src/packed-cache-inl.h @@ -112,10 +112,11 @@ #define TCMALLOC_PACKED_CACHE_INL_H_ #include "config.h" +#include <stddef.h> // for size_t #ifdef HAVE_STDINT_H -#include <stdint.h> +#include <stdint.h> // for uintptr_t #endif -#include "base/basictypes.h" // for COMPILE_ASSERT +#include "base/basictypes.h" #include "internal_logging.h" // A safe way of doing "(1 << n) - 1" -- without worrying about overflow @@ -134,7 +135,12 @@ class PackedCache { public: typedef uintptr_t K; typedef size_t V; +#ifdef TCMALLOC_SMALL_BUT_SLOW + // Decrease the size map cache if running in the small memory mode. static const int kHashbits = 12; +#else + static const int kHashbits = 16; +#endif static const int kValuebits = 7; static const bool kUseWholeKeys = kKeybits + kValuebits <= 8 * sizeof(T); diff --git a/src/page_heap.cc b/src/page_heap.cc index 2e02444..248e462 100644 --- a/src/page_heap.cc +++ b/src/page_heap.cc @@ -31,10 +31,16 @@ // Author: Sanjay Ghemawat <opensource@google.com> #include <config.h> -#include "page_heap.h" - -#include "static_vars.h" -#include "system-alloc.h" +#ifdef HAVE_INTTYPES_H +#include <inttypes.h> // for PRIuPTR +#endif +#include <google/malloc_extension.h> // for MallocRange, etc +#include "base/basictypes.h" +#include "base/commandlineflags.h" +#include "internal_logging.h" // for ASSERT, TCMalloc_Printer, etc +#include "page_heap_allocator.h" // for PageHeapAllocator +#include "static_vars.h" // for Static +#include "system-alloc.h" // for TCMalloc_SystemAlloc, etc DEFINE_double(tcmalloc_release_rate, EnvToDouble("TCMALLOC_RELEASE_RATE", 1.0), @@ -336,11 +342,11 @@ void PageHeap::RegisterSizeClass(Span* span, size_t sc) { } } -static double MB(uint64_t bytes) { +static double MiB(uint64_t bytes) { return bytes / 1048576.0; } -static double PagesToMB(uint64_t pages) { +static double PagesToMiB(uint64_t pages) { return (pages << kPageShift) / 1048576.0; } @@ -381,8 +387,9 @@ void PageHeap::Dump(TCMalloc_Printer* out) { } } out->printf("------------------------------------------------\n"); - out->printf("PageHeap: %d sizes; %6.1f MB free; %6.1f MB unmapped\n", - nonempty_sizes, MB(stats_.free_bytes), MB(stats_.unmapped_bytes)); + out->printf("PageHeap: %d sizes; %6.1f MiB free; %6.1f MiB unmapped\n", + nonempty_sizes, MiB(stats_.free_bytes), + MiB(stats_.unmapped_bytes)); out->printf("------------------------------------------------\n"); uint64_t total_normal = 0; uint64_t total_returned = 0; @@ -394,14 +401,14 @@ void PageHeap::Dump(TCMalloc_Printer* out) { uint64_t r_pages = s * r_length; total_normal += n_pages; total_returned += r_pages; - out->printf("%6u pages * %6u spans ~ %6.1f MB; %6.1f MB cum" - "; unmapped: %6.1f MB; %6.1f MB cum\n", + out->printf("%6u pages * %6u spans ~ %6.1f MiB; %6.1f MiB cum" + "; unmapped: %6.1f MiB; %6.1f MiB cum\n", s, (n_length + r_length), - PagesToMB(n_pages + r_pages), - PagesToMB(total_normal + total_returned), - PagesToMB(r_pages), - PagesToMB(total_returned)); + PagesToMiB(n_pages + r_pages), + PagesToMiB(total_normal + total_returned), + PagesToMiB(r_pages), + PagesToMiB(total_returned)); } } @@ -411,27 +418,27 @@ void PageHeap::Dump(TCMalloc_Printer* out) { int r_spans = 0; out->printf("Normal large spans:\n"); for (Span* s = large_.normal.next; s != &large_.normal; s = s->next) { - out->printf(" [ %6" PRIuPTR " pages ] %6.1f MB\n", - s->length, PagesToMB(s->length)); + out->printf(" [ %6" PRIuPTR " pages ] %6.1f MiB\n", + s->length, PagesToMiB(s->length)); n_pages += s->length; n_spans++; } out->printf("Unmapped large spans:\n"); for (Span* s = large_.returned.next; s != &large_.returned; s = s->next) { - out->printf(" [ %6" PRIuPTR " pages ] %6.1f MB\n", - s->length, PagesToMB(s->length)); + out->printf(" [ %6" PRIuPTR " pages ] %6.1f MiB\n", + s->length, PagesToMiB(s->length)); r_pages += s->length; r_spans++; } total_normal += n_pages; total_returned += r_pages; - out->printf(">255 large * %6u spans ~ %6.1f MB; %6.1f MB cum" - "; unmapped: %6.1f MB; %6.1f MB cum\n", + out->printf(">255 large * %6u spans ~ %6.1f MiB; %6.1f MiB cum" + "; unmapped: %6.1f MiB; %6.1f MiB cum\n", (n_spans + r_spans), - PagesToMB(n_pages + r_pages), - PagesToMB(total_normal + total_returned), - PagesToMB(r_pages), - PagesToMB(total_returned)); + PagesToMiB(n_pages + r_pages), + PagesToMiB(total_normal + total_returned), + PagesToMiB(r_pages), + PagesToMiB(total_returned)); } bool PageHeap::GetNextRange(PageID start, base::MallocRange* r) { diff --git a/src/page_heap.h b/src/page_heap.h index 50ecb36..603e65a 100644 --- a/src/page_heap.h +++ b/src/page_heap.h @@ -34,7 +34,12 @@ #define TCMALLOC_PAGE_HEAP_H_ #include <config.h> +#include <stddef.h> // for size_t +#ifdef HAVE_STDINT_H +#include <stdint.h> // for uint64_t, int64_t, uint16_t +#endif #include <google/malloc_extension.h> +#include "base/basictypes.h" #include "common.h" #include "packed-cache-inl.h" #include "pagemap.h" @@ -50,6 +55,8 @@ // This #ifdef should almost never be set. Set NO_TCMALLOC_SAMPLES if // you're porting to a system where you really can't get a stacktrace. +// Because we control the definition of GetStackTrace, all clients of +// GetStackTrace should #include us rather than stacktrace.h. #ifdef NO_TCMALLOC_SAMPLES // We use #define so code compiles even if you #include stacktrace.h somehow. # define GetStackTrace(stack, depth, skip) (0) @@ -57,6 +64,11 @@ # include <google/stacktrace.h> #endif +class TCMalloc_Printer; +namespace base { +struct MallocRange; +} + namespace tcmalloc { // ------------------------------------------------------------------------- diff --git a/src/page_heap_allocator.h b/src/page_heap_allocator.h index 3f75939..bcff8b3 100644 --- a/src/page_heap_allocator.h +++ b/src/page_heap_allocator.h @@ -33,6 +33,11 @@ #ifndef TCMALLOC_PAGE_HEAP_ALLOCATOR_H_ #define TCMALLOC_PAGE_HEAP_ALLOCATOR_H_ +#include <stddef.h> // for NULL, size_t + +#include "common.h" // for MetaDataAlloc +#include "internal_logging.h" // for ASSERT, CRASH + namespace tcmalloc { // Simple allocator for objects of a specified type. External locking diff --git a/src/pagemap.h b/src/pagemap.h index 1786e68..27cb3da 100644 --- a/src/pagemap.h +++ b/src/pagemap.h @@ -46,6 +46,9 @@ #define TCMALLOC_PAGEMAP_H_ #include "config.h" + +#include <stddef.h> // for NULL, size_t +#include <string.h> // for memset #if defined HAVE_STDINT_H #include <stdint.h> #elif defined HAVE_INTTYPES_H @@ -53,7 +56,7 @@ #else #include <sys/types.h> #endif -#include "internal_logging.h" +#include "internal_logging.h" // for ASSERT // Single-level array template <int BITS> diff --git a/src/sampler.cc b/src/sampler.cc index a13544a..0ea6df1 100755 --- a/src/sampler.cc +++ b/src/sampler.cc @@ -35,7 +35,8 @@ #include "sampler.h" #include <algorithm> // For min() -#include <cmath> +#include <math.h> +#include "base/commandlineflags.h" using std::min; diff --git a/src/sampler.h b/src/sampler.h index fa9e554..8e67fb0 100755 --- a/src/sampler.h +++ b/src/sampler.h @@ -36,8 +36,13 @@ #define TCMALLOC_SAMPLER_H_ #include "config.h" -#include "common.h" -#include "static_vars.h" +#include <stddef.h> // for size_t +#ifdef HAVE_STDINT_H +#include <stdint.h> // for uint64_t, uint32_t, int32_t +#endif +#include <string.h> // for memcpy +#include "base/basictypes.h" // for ASSERT +#include "internal_logging.h" // for ASSERT namespace tcmalloc { diff --git a/src/span.cc b/src/span.cc index ca0bab3..426a6bd 100644 --- a/src/span.cc +++ b/src/span.cc @@ -33,11 +33,11 @@ #include <config.h> #include "span.h" -#ifdef HAVE_INTTYPES_H -#include <inttypes.h> -#endif +#include <string.h> // for NULL, memset -#include "static_vars.h" +#include "internal_logging.h" // for ASSERT +#include "page_heap_allocator.h" // for PageHeapAllocator +#include "static_vars.h" // for Static namespace tcmalloc { diff --git a/src/stack_trace_table.cc b/src/stack_trace_table.cc index 6672af9..faeca6b 100644 --- a/src/stack_trace_table.cc +++ b/src/stack_trace_table.cc @@ -31,10 +31,13 @@ // Author: Andrew Fikes #include <config.h> -#include "base/spinlock.h" -#include "common.h" -#include "static_vars.h" #include "stack_trace_table.h" +#include <string.h> // for NULL, memset +#include "base/spinlock.h" // for SpinLockHolder +#include "common.h" // for StackTrace +#include "internal_logging.h" // for MESSAGE, ASSERT +#include "page_heap_allocator.h" // for PageHeapAllocator +#include "static_vars.h" // for Static namespace tcmalloc { diff --git a/src/stack_trace_table.h b/src/stack_trace_table.h index 799571a..e1d6a8a 100644 --- a/src/stack_trace_table.h +++ b/src/stack_trace_table.h @@ -36,6 +36,7 @@ #define TCMALLOC_STACK_TRACE_TABLE_H_ #include <config.h> +#include <stdint.h> // for uintptr_t #include "common.h" namespace tcmalloc { diff --git a/src/static_vars.cc b/src/static_vars.cc index 18d5146..2ca132e 100644 --- a/src/static_vars.cc +++ b/src/static_vars.cc @@ -31,7 +31,10 @@ // Author: Ken Ashcraft <opensource@google.com> #include "static_vars.h" -#include "sampler.h" // for the init function +#include <stddef.h> // for NULL +#include <new> // for operator new +#include "internal_logging.h" // for CHECK_CONDITION +#include "sampler.h" // for Sampler namespace tcmalloc { diff --git a/src/system-alloc.cc b/src/system-alloc.cc index f28fa22..690953d 100644 --- a/src/system-alloc.cc +++ b/src/system-alloc.cc @@ -31,28 +31,29 @@ // Author: Sanjay Ghemawat #include <config.h> -#include <stddef.h> // for NULL +#include <errno.h> // for EAGAIN, errno +#include <fcntl.h> // for open, O_RDWR +#include <stddef.h> // for size_t, NULL, ptrdiff_t #if defined HAVE_STDINT_H -#include <stdint.h> +#include <stdint.h> // for uintptr_t, intptr_t #elif defined HAVE_INTTYPES_H #include <inttypes.h> #else #include <sys/types.h> #endif -#ifdef HAVE_UNISTD_H -#include <unistd.h> -#endif -#include <fcntl.h> // for open() #ifdef HAVE_MMAP -#include <sys/mman.h> +#include <sys/mman.h> // for munmap, mmap, MADV_DONTNEED, etc #endif -#include <errno.h> +#ifdef HAVE_UNISTD_H +#include <unistd.h> // for sbrk, getpagesize, off_t +#endif +#include <new> // for operator new +#include <google/malloc_extension.h> +#include "base/basictypes.h" +#include "base/commandlineflags.h" +#include "base/spinlock.h" // for SpinLockHolder, SpinLock, etc #include "common.h" -#include "system-alloc.h" #include "internal_logging.h" -#include "base/logging.h" -#include "base/commandlineflags.h" -#include "base/spinlock.h" // On systems (like freebsd) that don't define MAP_ANONYMOUS, use the old // form of the name instead. @@ -107,8 +108,10 @@ static SpinLock spinlock(SpinLock::LINKER_INITIALIZED); static size_t pagesize = 0; #endif -// Configuration parameters. +// The current system allocator +SysAllocator* sys_alloc = NULL; +// Configuration parameters. DEFINE_int32(malloc_devmem_start, EnvToInt("TCMALLOC_DEVMEM_START", 0), "Physical memory starting location in MB for /dev/mem allocation." @@ -130,7 +133,7 @@ public: SbrkSysAllocator() : SysAllocator() { } void* Alloc(size_t size, size_t *actual_size, size_t alignment); - void DumpStats(TCMalloc_Printer* printer); + void FlagsInitialized() {} }; static char sbrk_space[sizeof(SbrkSysAllocator)]; @@ -139,7 +142,7 @@ public: MmapSysAllocator() : SysAllocator() { } void* Alloc(size_t size, size_t *actual_size, size_t alignment); - void DumpStats(TCMalloc_Printer* printer); + void FlagsInitialized() {} }; static char mmap_space[sizeof(MmapSysAllocator)]; @@ -148,24 +151,36 @@ public: DevMemSysAllocator() : SysAllocator() { } void* Alloc(size_t size, size_t *actual_size, size_t alignment); - void DumpStats(TCMalloc_Printer* printer); + void FlagsInitialized() {} }; -static char devmem_space[sizeof(DevMemSysAllocator)]; - -static const int kStaticAllocators = 3; -// kMaxDynamicAllocators + kStaticAllocators; -static const int kMaxAllocators = 5; -static SysAllocator *allocators[kMaxAllocators]; -bool RegisterSystemAllocator(SysAllocator *a, int priority) { - SpinLockHolder lock_holder(&spinlock); +class DefaultSysAllocator : public SysAllocator { + public: + DefaultSysAllocator() : SysAllocator() { + for (int i = 0; i < kMaxAllocators; i++) { + failed_[i] = true; + allocs_[i] = NULL; + } + } + void SetChildAllocator(SysAllocator* alloc, unsigned int index, + const char* name) { + if (index < kMaxAllocators && alloc != NULL) { + allocs_[index] = alloc; + failed_[index] = false; + } + } + void* Alloc(size_t size, size_t *actual_size, size_t alignment); + void FlagsInitialized() {} - // No two allocators should have a priority conflict, since the order - // is determined at compile time. - CHECK_CONDITION(allocators[priority] == NULL); - allocators[priority] = a; - return true; -} + private: + static const int kMaxAllocators = 2; + bool failed_[kMaxAllocators]; + SysAllocator* allocs_[kMaxAllocators]; + const char* names_[kMaxAllocators]; +}; +static char default_space[sizeof(DefaultSysAllocator)]; +static const char sbrk_name[] = "SbrkSysAllocator"; +static const char mmap_name[] = "MmapSysAllocator"; void* SbrkSysAllocator::Alloc(size_t size, size_t *actual_size, @@ -206,13 +221,11 @@ void* SbrkSysAllocator::Alloc(size_t size, size_t *actual_size, // http://sourceware.org/cgi-bin/cvsweb.cgi/~checkout~/libc/misc/sbrk.c?rev=1.1.2.1&content-type=text/plain&cvsroot=glibc // Without this check, sbrk may succeed when it ought to fail.) if (reinterpret_cast<intptr_t>(sbrk(0)) + size < size) { - failed_ = true; return NULL; } void* result = sbrk(size); if (result == reinterpret_cast<void*>(-1)) { - failed_ = true; return NULL; } @@ -232,7 +245,6 @@ void* SbrkSysAllocator::Alloc(size_t size, size_t *actual_size, // that we can find an aligned region within it. result = sbrk(size + alignment - 1); if (result == reinterpret_cast<void*>(-1)) { - failed_ = true; return NULL; } ptr = reinterpret_cast<uintptr_t>(result); @@ -243,10 +255,6 @@ void* SbrkSysAllocator::Alloc(size_t size, size_t *actual_size, #endif // HAVE_SBRK } -void SbrkSysAllocator::DumpStats(TCMalloc_Printer* printer) { - printer->printf("SbrkSysAllocator: failed_=%d\n", failed_); -} - void* MmapSysAllocator::Alloc(size_t size, size_t *actual_size, size_t alignment) { #ifndef HAVE_MMAP @@ -293,7 +301,6 @@ void* MmapSysAllocator::Alloc(size_t size, size_t *actual_size, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); if (result == reinterpret_cast<void*>(MAP_FAILED)) { - failed_ = true; return NULL; } @@ -317,10 +324,6 @@ void* MmapSysAllocator::Alloc(size_t size, size_t *actual_size, #endif // HAVE_MMAP } -void MmapSysAllocator::DumpStats(TCMalloc_Printer* printer) { - printer->printf("MmapSysAllocator: failed_=%d\n", failed_); -} - void* DevMemSysAllocator::Alloc(size_t size, size_t *actual_size, size_t alignment) { #ifndef HAVE_MMAP @@ -345,7 +348,6 @@ void* DevMemSysAllocator::Alloc(size_t size, size_t *actual_size, if (!initialized) { physmem_fd = open("/dev/mem", O_RDWR); if (physmem_fd < 0) { - failed_ = true; return NULL; } physmem_base = FLAGS_malloc_devmem_start*1024LL*1024LL; @@ -377,7 +379,6 @@ void* DevMemSysAllocator::Alloc(size_t size, size_t *actual_size, // check to see if we have any memory left if (physmem_limit != 0 && ((size + extra) > (physmem_limit - physmem_base))) { - failed_ = true; return NULL; } @@ -388,7 +389,6 @@ void* DevMemSysAllocator::Alloc(size_t size, size_t *actual_size, void *result = mmap(0, size + extra, PROT_WRITE|PROT_READ, MAP_SHARED, physmem_fd, physmem_base); if (result == reinterpret_cast<void*>(MAP_FAILED)) { - failed_ = true; return NULL; } uintptr_t ptr = reinterpret_cast<uintptr_t>(result); @@ -414,15 +414,30 @@ void* DevMemSysAllocator::Alloc(size_t size, size_t *actual_size, #endif // HAVE_MMAP } -void DevMemSysAllocator::DumpStats(TCMalloc_Printer* printer) { - printer->printf("DevMemSysAllocator: failed_=%d\n", failed_); +void* DefaultSysAllocator::Alloc(size_t size, size_t *actual_size, + size_t alignment) { + for (int i = 0; i < kMaxAllocators; i++) { + if (!failed_[i] && allocs_[i] != NULL) { + void* result = allocs_[i]->Alloc(size, actual_size, alignment); + if (result != NULL) { + return result; + } + TCMalloc_MESSAGE(__FILE__, __LINE__, "%s failed.\n", names_[i]); + failed_[i] = true; + } + } + // After both failed, reset "failed_" to false so that a single failed + // allocation won't make the allocator never work again. + for (int i = 0; i < kMaxAllocators; i++) { + failed_[i] = false; + } + return NULL; } static bool system_alloc_inited = false; void InitSystemAllocators(void) { - // This determines the order in which system allocators are called - int i = kMaxDynamicAllocators; - allocators[i++] = new (devmem_space) DevMemSysAllocator(); + MmapSysAllocator *mmap = new (mmap_space) MmapSysAllocator(); + SbrkSysAllocator *sbrk = new (sbrk_space) SbrkSysAllocator(); // In 64-bit debug mode, place the mmap allocator first since it // allocates pointers that do not fit in 32 bits and therefore gives @@ -431,13 +446,15 @@ void InitSystemAllocators(void) { // likely to look like pointers and therefore the conservative gc in // the heap-checker is less likely to misinterpret a number as a // pointer). + DefaultSysAllocator *sdef = new (default_space) DefaultSysAllocator(); if (kDebugMode && sizeof(void*) > 4) { - allocators[i++] = new (mmap_space) MmapSysAllocator(); - allocators[i++] = new (sbrk_space) SbrkSysAllocator(); + sdef->SetChildAllocator(mmap, 0, mmap_name); + sdef->SetChildAllocator(sbrk, 1, sbrk_name); } else { - allocators[i++] = new (sbrk_space) SbrkSysAllocator(); - allocators[i++] = new (mmap_space) MmapSysAllocator(); + sdef->SetChildAllocator(sbrk, 0, sbrk_name); + sdef->SetChildAllocator(mmap, 1, mmap_name); } + sys_alloc = sdef; } void* TCMalloc_SystemAlloc(size_t size, size_t *actual_size, @@ -455,35 +472,17 @@ void* TCMalloc_SystemAlloc(size_t size, size_t *actual_size, // Enforce minimum alignment if (alignment < sizeof(MemoryAligner)) alignment = sizeof(MemoryAligner); - // Try twice, once avoiding allocators that failed before, and once - // more trying all allocators even if they failed before. - for (int i = 0; i < 2; i++) { - for (int j = 0; j < kMaxAllocators; j++) { - SysAllocator *a = allocators[j]; - if (a == NULL) continue; - if (a->usable_ && !a->failed_) { - void* result = a->Alloc(size, actual_size, alignment); - if (result != NULL) { - if (actual_size) { - CheckAddressBits<kAddressBits>( - reinterpret_cast<uintptr_t>(result) + *actual_size - 1); - } else { - CheckAddressBits<kAddressBits>( - reinterpret_cast<uintptr_t>(result) + size - 1); - } - return result; - } - } - } - - // nothing worked - reset failed_ flags and try again - for (int j = 0; j < kMaxAllocators; j++) { - SysAllocator *a = allocators[j]; - if (a == NULL) continue; - a->failed_ = false; + void* result = sys_alloc->Alloc(size, actual_size, alignment); + if (result != NULL) { + if (actual_size) { + CheckAddressBits<kAddressBits>( + reinterpret_cast<uintptr_t>(result) + *actual_size - 1); + } else { + CheckAddressBits<kAddressBits>( + reinterpret_cast<uintptr_t>(result) + size - 1); } } - return NULL; + return result; } void TCMalloc_SystemRelease(void* start, size_t length) { @@ -521,13 +520,3 @@ void TCMalloc_SystemRelease(void* start, size_t length) { } #endif } - -void DumpSystemAllocatorStats(TCMalloc_Printer* printer) { - for (int j = 0; j < kMaxAllocators; j++) { - SysAllocator *a = allocators[j]; - if (a == NULL) continue; - if (a->usable_) { - a->DumpStats(printer); - } - } -} diff --git a/src/system-alloc.h b/src/system-alloc.h index 832cbd0..487a36b 100644 --- a/src/system-alloc.h +++ b/src/system-alloc.h @@ -37,7 +37,9 @@ #define TCMALLOC_SYSTEM_ALLOC_H_ #include <config.h> -#include "internal_logging.h" +#include <stddef.h> // for size_t + +class SysAllocator; // REQUIRES: "alignment" is a power of two or "0" to indicate default alignment // @@ -69,49 +71,7 @@ extern void* TCMalloc_SystemAlloc(size_t bytes, size_t *actual_bytes, // be released, partial pages will not.) extern void TCMalloc_SystemRelease(void* start, size_t length); -// Interface to a pluggable system allocator. -class SysAllocator { - public: - SysAllocator() - : usable_(true), - failed_(false) { - }; - virtual ~SysAllocator() {}; - - virtual void* Alloc(size_t size, size_t *actual_size, size_t alignment) = 0; - - // Populate the map with whatever properties the specified allocator finds - // useful for debugging (such as number of bytes allocated and whether the - // allocator has failed). The callee is responsible for any necessary - // locking (and avoiding deadlock). - virtual void DumpStats(TCMalloc_Printer* printer) = 0; - - // So the allocator can be turned off at compile time - bool usable_; - - // Did this allocator fail? If so, we don't need to retry more than twice. - bool failed_; -}; - -// Register a new system allocator. The priority determines the order in -// which the allocators will be invoked. Allocators with numerically lower -// priority are tried first. To keep things simple, the priority of various -// allocators is known at compile time. -// -// Valid range of priorities: [0, kMaxDynamicAllocators) -// -// Please note that we can't use complex data structures and cause -// recursive calls to malloc within this function. So all data structures -// are statically allocated. -// -// Returns true on success. Does nothing on failure. -extern PERFTOOLS_DLL_DECL bool RegisterSystemAllocator(SysAllocator *allocator, - int priority); - -// Number of SysAllocators known to call RegisterSystemAllocator -static const int kMaxDynamicAllocators = 2; - -// Retrieve the current state of various system allocators. -extern PERFTOOLS_DLL_DECL void DumpSystemAllocatorStats(TCMalloc_Printer* printer); +// The current system allocator. +extern PERFTOOLS_DLL_DECL SysAllocator* sys_alloc; -#endif /* TCMALLOC_SYSTEM_ALLOC_H_ */ +#endif /* TCMALLOC_SYSTEM_ALLOC_H__ */ diff --git a/src/tcmalloc.cc b/src/tcmalloc.cc index 61300e6..97c366c 100644 --- a/src/tcmalloc.cc +++ b/src/tcmalloc.cc @@ -86,10 +86,16 @@ // * allocation of a reasonably complicated struct // goes from about 1100 ns to about 300 ns. -#include <config.h> -#include <new> -#include <stdio.h> -#include <stddef.h> +#include "config.h" +#include <google/tcmalloc.h> + +#include <errno.h> // for ENOMEM, EINVAL, errno +#ifdef HAVE_SYS_CDEFS_H +#include <sys/cdefs.h> // for __THROW +#endif +#ifdef HAVE_FEATURES_H +#include <features.h> // for __GLIBC__ +#endif #if defined HAVE_STDINT_H #include <stdint.h> #elif defined HAVE_INTTYPES_H @@ -97,49 +103,48 @@ #else #include <sys/types.h> #endif -// We only need malloc.h for struct mallinfo and for apple builds -#if defined(HAVE_STRUCT_MALLINFO) || defined(__APPLE__) +#include <stddef.h> // for size_t, NULL +#include <stdlib.h> // for getenv +#include <string.h> // for strcmp, memset, strlen, etc +#ifdef HAVE_UNISTD_H +#include <unistd.h> // for getpagesize, write, etc +#endif +#include <algorithm> // for max, min +#include <limits> // for numeric_limits +#include <new> // for nothrow_t (ptr only), etc +#include <vector> // for vector + +#include <google/malloc_extension.h> +#include <google/malloc_hook.h> // for MallocHook +#include "base/basictypes.h" // for int64 +#include "base/commandlineflags.h" // for RegisterFlagValidator, etc +#include "base/dynamic_annotations.h" // for RunningOnValgrind +#include "base/spinlock.h" // for SpinLockHolder +#include "central_freelist.h" // for CentralFreeListPadded +#include "common.h" // for StackTrace, kPageShift, etc +#include "internal_logging.h" // for ASSERT, TCMalloc_Printer, etc +#include "linked_list.h" // for SLL_SetNext +#include "malloc_hook-inl.h" // for MallocHook::InvokeNewHook, etc +#include "page_heap.h" // for PageHeap, PageHeap::Stats +#include "page_heap_allocator.h" // for PageHeapAllocator +#include "span.h" // for Span, DLL_Prepend, etc +#include "stack_trace_table.h" // for StackTraceTable +#include "static_vars.h" // for Static +#include "system-alloc.h" // for DumpSystemAllocatorStats, etc +#include "tcmalloc_guard.h" // for TCMallocGuard +#include "thread_cache.h" // for ThreadCache + +// We only need malloc.h for struct mallinfo. +#ifdef HAVE_STRUCT_MALLINFO // Malloc can be in several places on older versions of OS X. # if defined(HAVE_MALLOC_H) # include <malloc.h> -# elif defined(HAVE_MALLOC_MALLOC_H) -# include <malloc/malloc.h> # elif defined(HAVE_SYS_MALLOC_H) # include <sys/malloc.h> +# elif defined(HAVE_MALLOC_MALLOC_H) +# include <malloc/malloc.h> # endif #endif -#include <string.h> -#ifdef HAVE_PTHREAD -#include <pthread.h> -#endif -#ifdef HAVE_UNISTD_H -#include <unistd.h> -#endif -#include <errno.h> -#include <stdarg.h> -#include <algorithm> -#include <limits> -#include <google/tcmalloc.h> -#include "base/commandlineflags.h" -#include "base/basictypes.h" // gets us PRIu64 -#include "base/sysinfo.h" -#include "base/spinlock.h" -#include "common.h" -#include "malloc_hook-inl.h" -#include <google/malloc_hook.h> -#include <google/malloc_extension.h> -#include "central_freelist.h" -#include "internal_logging.h" -#include "linked_list.h" -#include "maybe_threads.h" -#include "page_heap.h" -#include "page_heap_allocator.h" -#include "pagemap.h" -#include "span.h" -#include "static_vars.h" -#include "system-alloc.h" -#include "tcmalloc_guard.h" -#include "thread_cache.h" #if (defined(_WIN32) && !defined(__CYGWIN__) && !defined(__CYGWIN32__)) && !defined(WIN32_OVERRIDE_ALLOCATORS) # define WIN32_DO_PATCHING 1 @@ -262,130 +267,14 @@ extern "C" { } // extern "C" #endif // #ifndef _WIN32 -// We define this here because on some architectures it's needed soon. -namespace { -inline size_t GetSizeWithCallback(void* ptr, - size_t (*invalid_getsize_fn)(void*)) { - if (ptr == NULL) - return (*invalid_getsize_fn)(ptr); - const PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift; - size_t cl = Static::pageheap()->GetSizeClassIfCached(p); - if (cl != 0) { - return Static::sizemap()->ByteSizeForClass(cl); - } else { - Span *span = Static::pageheap()->GetDescriptor(p); - if (span == NULL) { // means we do not own this memory - return (*invalid_getsize_fn)(ptr); - } else if (span->sizeclass != 0) { - Static::pageheap()->CacheSizeClass(p, span->sizeclass); - return Static::sizemap()->ByteSizeForClass(span->sizeclass); - } else { - return span->length << kPageShift; - } - } -} -} - // Override the libc functions to prefer our own instead. This comes -// first so code in tcmalloc.cc can use the overridden versions. -#if defined(WIN32_DO_PATCHING) - -// One exception: in windows, by default, we patch our code into these +// first so code in tcmalloc.cc can use the overridden versions. One +// exception: in windows, by default, we patch our code into these // functions (via src/windows/patch_function.cc) rather than override // them. In that case, we don't want to do this overriding here. +#if !defined(WIN32_DO_PATCHING) -#elif defined(__APPLE__) - -#include <AvailabilityMacros.h> - -// Mach's two-level naming scheme makes aliasing difficult, but we can -// use apple's malloc_default_zone() to replace the system alloc. -// http://www.opensource.apple.com/source/Libc/Libc-583/include/malloc/malloc.h -// http://www.opensource.apple.com/source/Libc/Libc-583/gen/malloc.c -// We need wrappers for all the routines, sadly. :-( - -namespace { -// malloc_zone semantics are we return 0 if we don't own the memory. -size_t mz_invalid_getsize(void*) { - return 0; -} -size_t mz_size(malloc_zone_t* zone, const void* ptr) { - // TODO(csilvers): change this method to take a const void*, one day. - // TODO(csilvers): this is totally wrong with debugallocation. - return GetSizeWithCallback(const_cast<void*>(ptr), mz_invalid_getsize); -} -void* mz_malloc(malloc_zone_t* zone, size_t size) { - return tc_malloc(size); -} -void* mz_calloc(malloc_zone_t* zone, size_t num_items, size_t size) { - return tc_calloc(num_items, size); -} -void* mz_valloc(malloc_zone_t* zone, size_t size) { - return tc_valloc(size); -} -void mz_free(malloc_zone_t* zone, void* ptr) { - return tc_free(ptr); -} -void* mz_realloc(malloc_zone_t* zone, void* ptr, size_t size) { - return tc_realloc(ptr, size); -} -void* mz_memalign(malloc_zone_t* zone, size_t align, size_t size) { - return tc_memalign(align, size); -} -void mz_destroy(malloc_zone_t* zone) { - // A no-op -- we will not be destroyed! -} -} // unnamed namespace - -static void ReplaceSystemAlloc() { - static malloc_zone_t system_zone_copy; - malloc_zone_t* system_zone = malloc_default_zone(); - memcpy(&system_zone_copy, system_zone, sizeof(system_zone_copy)); - - system_zone->zone_name = "tcmalloc"; - system_zone->size = &mz_size; - system_zone->malloc = &mz_malloc; - system_zone->calloc = &mz_calloc; - system_zone->valloc = &mz_valloc; - system_zone->free = &mz_free; - system_zone->realloc = &mz_realloc; - system_zone->destroy = &mz_destroy; - system_zone->batch_malloc = NULL; - system_zone->batch_free = NULL; -#ifdef MAC_OS_X_VERSION_10_6 // from AvailabilityMacros.h - system_zone->memalign = &mz_memalign; - system_zone->free_definite_size = NULL; -#endif - - // TODO(csilvers): figure out if this version of malloc.h supports - // batch_malloc, batch_free, memalign, and free_definite_size, and - // set those to NULL if so. - - // Now register the old system zone, so allocations that happened - // before we ran this command can still be executed. - malloc_zone_register(&system_zone_copy); -} -#define HAVE_REPLACE_SYSTEM_ALLOC 1 - -// OS X doesn't have memalign, posix_memalign, pvalloc, or cfree, so -// we can just define our own. :-) -extern "C" { - void cfree(void* p) __THROW { tc_cfree(p); } - void* memalign(size_t a, size_t s) __THROW { return tc_memalign(a, s); } - void* pvalloc(size_t s) __THROW { return tc_pvalloc(s); } - int posix_memalign(void** r, size_t a, size_t s) __THROW { - return tc_posix_memalign(r, a, s); - } - void malloc_stats(void) __THROW { tc_malloc_stats(); } - int mallopt(int cmd, int v) __THROW { return tc_mallopt(cmd, v); } -#ifdef HAVE_STRUCT_MALLINFO - struct mallinfo mallinfo(void) __THROW { return tc_mallinfo(); } -#endif - // An alias for malloc_size(), which os x defines. - size_t malloc_usable_size(void* p) __THROW { return tc_malloc_size(p); } -} // extern "C" - -#elif defined(__GNUC__) && !defined(__MACH__) +#if defined(__GNUC__) && !defined(__MACH__) // Potentially faster variants that use the gcc alias extension. // FreeBSD does support aliases, but apparently not correctly. :-( // NOTE: we make many of these symbols weak, but do so in the makefile @@ -460,8 +349,7 @@ extern "C" { size_t malloc_size(void* p) __THROW { return tc_malloc_size(p); } size_t malloc_usable_size(void* p) __THROW { return tc_malloc_size(p); } } // extern "C" - -#endif // #if defined(WIN32_DO_PATCHING) ... +#endif // #if defined(__GNUC__) // Some library routines on RedHat 9 allocate memory using malloc() // and free it using __libc_free() (or vice-versa). Since we provide @@ -498,6 +386,8 @@ extern "C" { #undef ALIAS +#endif // #ifndef(WIN32_DO_PATCHING) + // ----------------------- IMPLEMENTATION ------------------------------- @@ -540,10 +430,9 @@ static void ExtractStats(TCMallocStats* r, uint64_t* class_count) { for (int cl = 0; cl < kNumClasses; ++cl) { const int length = Static::central_cache()[cl].length(); const int tc_length = Static::central_cache()[cl].tc_length(); - const size_t cache_overhead = Static::central_cache()[cl].OverheadBytes(); const size_t size = static_cast<uint64_t>( Static::sizemap()->ByteSizeForClass(cl)); - r->central_bytes += (size * length) + cache_overhead; + r->central_bytes += (size * length); r->transfer_bytes += (size * tc_length); if (class_count) class_count[cl] = length + tc_length; } @@ -564,7 +453,7 @@ static void DumpStats(TCMalloc_Printer* out, int level) { uint64_t class_count[kNumClasses]; ExtractStats(&stats, (level >= 2 ? class_count : NULL)); - static const double MB = 1048576.0; + static const double MiB = 1048576.0; const uint64_t virtual_memory_used = (stats.pageheap.system_bytes + stats.metadata_bytes); @@ -577,19 +466,23 @@ static void DumpStats(TCMalloc_Printer* out, int level) { - stats.transfer_bytes - stats.thread_bytes); +#ifdef TCMALLOC_SMALL_BUT_SLOW + out->printf( + "NOTE: SMALL MEMORY MODEL IS IN USE, PERFORMANCE MAY SUFFER.\n"); +#endif out->printf( "------------------------------------------------\n" - "MALLOC: %12" PRIu64 " (%7.1f MB) Bytes in use by application\n" - "MALLOC: + %12" PRIu64 " (%7.1f MB) Bytes in page heap freelist\n" - "MALLOC: + %12" PRIu64 " (%7.1f MB) Bytes in central cache freelist\n" - "MALLOC: + %12" PRIu64 " (%7.1f MB) Bytes in transfer cache freelist\n" - "MALLOC: + %12" PRIu64 " (%7.1f MB) Bytes in thread cache freelists\n" - "MALLOC: + %12" PRIu64 " (%7.1f MB) Bytes in malloc metadata\n" + "MALLOC: %12" PRIu64 " (%7.1f MiB) Bytes in use by application\n" + "MALLOC: + %12" PRIu64 " (%7.1f MiB) Bytes in page heap freelist\n" + "MALLOC: + %12" PRIu64 " (%7.1f MiB) Bytes in central cache freelist\n" + "MALLOC: + %12" PRIu64 " (%7.1f MiB) Bytes in transfer cache freelist\n" + "MALLOC: + %12" PRIu64 " (%7.1f MiB) Bytes in thread cache freelists\n" + "MALLOC: + %12" PRIu64 " (%7.1f MiB) Bytes in malloc metadata\n" "MALLOC: ------------\n" - "MALLOC: = %12" PRIu64 " (%7.1f MB) Actual memory used (physical + swap)\n" - "MALLOC: + %12" PRIu64 " (%7.1f MB) Bytes released to OS (aka unmapped)\n" + "MALLOC: = %12" PRIu64 " (%7.1f MiB) Actual memory used (physical + swap)\n" + "MALLOC: + %12" PRIu64 " (%7.1f MiB) Bytes released to OS (aka unmapped)\n" "MALLOC: ------------\n" - "MALLOC: = %12" PRIu64 " (%7.1f MB) Virtual address space used\n" + "MALLOC: = %12" PRIu64 " (%7.1f MiB) Virtual address space used\n" "MALLOC:\n" "MALLOC: %12" PRIu64 " Spans in use\n" "MALLOC: %12" PRIu64 " Thread heaps in use\n" @@ -599,15 +492,15 @@ static void DumpStats(TCMalloc_Printer* out, int level) { " (via madvise()).\n" "Bytes released to the OS take up virtual address space" " but no physical memory.\n", - bytes_in_use_by_app, bytes_in_use_by_app / MB, - stats.pageheap.free_bytes, stats.pageheap.free_bytes / MB, - stats.central_bytes, stats.central_bytes / MB, - stats.transfer_bytes, stats.transfer_bytes / MB, - stats.thread_bytes, stats.thread_bytes / MB, - stats.metadata_bytes, stats.metadata_bytes / MB, - physical_memory_used, physical_memory_used / MB, - stats.pageheap.unmapped_bytes, stats.pageheap.unmapped_bytes / MB, - virtual_memory_used, virtual_memory_used / MB, + bytes_in_use_by_app, bytes_in_use_by_app / MiB, + stats.pageheap.free_bytes, stats.pageheap.free_bytes / MiB, + stats.central_bytes, stats.central_bytes / MiB, + stats.transfer_bytes, stats.transfer_bytes / MiB, + stats.thread_bytes, stats.thread_bytes / MiB, + stats.metadata_bytes, stats.metadata_bytes / MiB, + physical_memory_used, physical_memory_used / MiB, + stats.pageheap.unmapped_bytes, stats.pageheap.unmapped_bytes / MiB, + virtual_memory_used, virtual_memory_used / MiB, uint64_t(Static::span_allocator()->inuse()), uint64_t(ThreadCache::HeapsInUse()), uint64_t(kPageSize)); @@ -623,19 +516,16 @@ static void DumpStats(TCMalloc_Printer* out, int level) { class_count[cl] * Static::sizemap()->ByteSizeForClass(cl); cumulative += class_bytes; out->printf("class %3d [ %8" PRIuS " bytes ] : " - "%8" PRIu64 " objs; %5.1f MB; %5.1f cum MB\n", + "%8" PRIu64 " objs; %5.1f MiB; %5.1f cum MiB\n", cl, Static::sizemap()->ByteSizeForClass(cl), class_count[cl], - class_bytes / MB, - cumulative / MB); + class_bytes / MiB, + cumulative / MiB); } } SpinLockHolder h(Static::pageheap_lock()); Static::pageheap()->Dump(out); - - out->printf("------------------------------------------------\n"); - DumpSystemAllocatorStats(out); } } @@ -864,6 +754,16 @@ class TCMallocImplementation : public MallocExtension { virtual void MarkThreadBusy(); // Implemented below + virtual SysAllocator* GetSystemAllocator() { + SpinLockHolder h(Static::pageheap_lock()); + return sys_alloc; + } + + virtual void SetSystemAllocator(SysAllocator* alloc) { + SpinLockHolder h(Static::pageheap_lock()); + sys_alloc = alloc; + } + virtual void ReleaseToSystem(size_t num_bytes) { SpinLockHolder h(Static::pageheap_lock()); if (num_bytes <= extra_bytes_released_) { @@ -1028,15 +928,12 @@ TCMallocGuard::TCMallocGuard() { // patch the windows VirtualAlloc, etc. PatchWindowsFunctions(); // defined in windows/patch_functions.cc #endif -#ifdef HAVE_REPLACE_SYSTEM_ALLOC - ReplaceSystemAlloc(); // for OS X -#endif tc_free(tc_malloc(1)); ThreadCache::InitTSD(); tc_free(tc_malloc(1)); // Either we, or debugallocation.cc, or valgrind will control memory // management. We register our extension if we're the winner. -#ifdef TCMALLOC_FOR_DEBUGALLOCATION +#ifdef TCMALLOC_USING_DEBUGALLOCATION // Let debugallocation register its extension. #else if (RunningOnValgrind()) { @@ -1103,7 +1000,6 @@ static void* DoSampledAllocation(size_t size) { // Sampling failed because of lack of memory return span; } - *stack = tmp; span->sample = 1; span->objects = stack; @@ -1160,7 +1056,7 @@ inline bool should_report_large(Length num_pages) { const int64 threshold = large_alloc_threshold; if (threshold > 0 && num_pages >= (threshold >> kPageShift)) { // Increase the threshold by 1/8 every time we generate a report. - // We cap the threshold at 8GB to avoid overflow problems. + // We cap the threshold at 8GiB to avoid overflow problems. large_alloc_threshold = (threshold + threshold/8 < 8ll<<30 ? threshold + threshold/8 : 8ll<<30); return true; @@ -1274,9 +1170,9 @@ inline void do_free_with_callback(void* ptr, void (*invalid_free_fn)(void*)) { ASSERT(reinterpret_cast<uintptr_t>(ptr) % kPageSize == 0); ASSERT(span != NULL && span->start == p); if (span->sample) { + StackTrace* st = reinterpret_cast<StackTrace*>(span->objects); tcmalloc::DLL_Remove(span); - Static::stacktrace_allocator()->Delete( - reinterpret_cast<StackTrace*>(span->objects)); + Static::stacktrace_allocator()->Delete(st); span->objects = NULL; } Static::pageheap()->Delete(span); @@ -1288,6 +1184,27 @@ inline void do_free(void* ptr) { return do_free_with_callback(ptr, &InvalidFree); } +inline size_t GetSizeWithCallback(void* ptr, + size_t (*invalid_getsize_fn)(void*)) { + if (ptr == NULL) + return 0; + const PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift; + size_t cl = Static::pageheap()->GetSizeClassIfCached(p); + if (cl != 0) { + return Static::sizemap()->ByteSizeForClass(cl); + } else { + Span *span = Static::pageheap()->GetDescriptor(p); + if (span == NULL) { // means we do not own this memory + return (*invalid_getsize_fn)(ptr); + } else if (span->sizeclass != 0) { + Static::pageheap()->CacheSizeClass(p, span->sizeclass); + return Static::sizemap()->ByteSizeForClass(span->sizeclass); + } else { + return span->length << kPageShift; + } + } +} + // This lets you call back to a given function pointer if ptr is invalid. // It is used primarily by windows code which wants a specialized callback. inline void* do_realloc_with_callback( @@ -1591,7 +1508,7 @@ extern "C" PERFTOOLS_DLL_DECL int tc_set_new_mode(int flag) __THROW { return old_mode; } -#ifndef TCMALLOC_FOR_DEBUGALLOCATION // debugallocation.cc defines its own +#ifndef TCMALLOC_USING_DEBUGALLOCATION // debugallocation.cc defines its own // CAVEAT: The code structure below ensures that MallocHook methods are always // called from the stack frame of the invoked allocation function. @@ -1778,4 +1695,4 @@ static void *MemalignOverride(size_t align, size_t size, const void *caller) } void *(*__memalign_hook)(size_t, size_t, const void *) = MemalignOverride; -#endif // #ifndef TCMALLOC_FOR_DEBUGALLOCATION +#endif // TCMALLOC_USING_DEBUGALLOCATION diff --git a/src/tests/frag_unittest.cc b/src/tests/frag_unittest.cc index 160c41c..5ba02bd 100644 --- a/src/tests/frag_unittest.cc +++ b/src/tests/frag_unittest.cc @@ -50,8 +50,8 @@ using std::vector; int main(int argc, char** argv) { - // Make kAllocSize larger than tcmalloc page size. - static const int kAllocSize = 9 << kPageShift; + // Make kAllocSize one page larger than the maximum small object size. + static const int kAllocSize = kMaxSize + kPageSize; // Allocate 400MB in total. static const int kTotalAlloc = 400 << 20; static const int kAllocIterations = kTotalAlloc / kAllocSize; @@ -62,6 +62,11 @@ int main(int argc, char** argv) { saved[i] = new char[kAllocSize]; } + // Check the current "slack". + size_t slack_before; + MallocExtension::instance()->GetNumericProperty("tcmalloc.slack_bytes", + &slack_before); + // Free alternating ones to fragment heap size_t free_bytes = 0; for (int i = 0; i < saved.size(); i += 2) { @@ -69,10 +74,13 @@ int main(int argc, char** argv) { free_bytes += kAllocSize; } - // Check that slack is within 10% of expected - size_t slack; + // Check that slack delta is within 10% of expected. + size_t slack_after; MallocExtension::instance()->GetNumericProperty("tcmalloc.slack_bytes", - &slack); + &slack_after); + CHECK_GE(slack_after, slack_before); + size_t slack = slack_after - slack_before; + CHECK_GT(double(slack), 0.9*free_bytes); CHECK_LT(double(slack), 1.1*free_bytes); diff --git a/src/tests/heap-checker_unittest.cc b/src/tests/heap-checker_unittest.cc index e33e068..404c9f1 100644 --- a/src/tests/heap-checker_unittest.cc +++ b/src/tests/heap-checker_unittest.cc @@ -86,15 +86,17 @@ #include <pwd.h> #endif +#include <algorithm> #include <iostream> // for cout #include <iomanip> // for hex -#include <set> -#include <map> #include <list> +#include <map> #include <memory> -#include <vector> +#include <set> #include <string> +#include <vector> +#include "base/commandlineflags.h" #include "base/googleinit.h" #include "base/logging.h" #include "base/commandlineflags.h" diff --git a/src/tests/malloc_hook_test.cc b/src/tests/malloc_hook_test.cc new file mode 100644 index 0000000..dc65b68 --- /dev/null +++ b/src/tests/malloc_hook_test.cc @@ -0,0 +1,345 @@ +// Copyright (c) 2011, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// ---- +// Author: llib@google.com (Bill Clarke) + +#include "config_for_unittests.h" +#include <assert.h> +#include <stdio.h> +#ifdef HAVE_MMAP +#include <sys/mman.h> +#endif +#include <algorithm> +#include <string> +#include <vector> +#include <google/malloc_hook.h> +#include "malloc_hook-inl.h" +#include "base/logging.h" +#include "base/spinlock.h" +#include "base/sysinfo.h" +#include "tests/testutil.h" + +namespace { + +using std::string; +using std::vector; + +vector<void (*)()> g_testlist; // the tests to run + +#define TEST(a, b) \ + struct Test_##a##_##b { \ + Test_##a##_##b() { g_testlist.push_back(&Run); } \ + static void Run(); \ + }; \ + static Test_##a##_##b g_test_##a##_##b; \ + void Test_##a##_##b::Run() + + +static int RUN_ALL_TESTS() { + vector<void (*)()>::const_iterator it; + for (it = g_testlist.begin(); it != g_testlist.end(); ++it) { + (*it)(); // The test will error-exit if there's a problem. + } + fprintf(stderr, "\nPassed %d tests\n\nPASS\n", + static_cast<int>(g_testlist.size())); + return 0; +} + +using base::internal::kHookListMaxValues; + +// Since HookList is a template and is defined in malloc_hook.cc, we can only +// use an instantiation of it from malloc_hook.cc. We then reinterpret those +// values as integers for testing. +typedef base::internal::HookList<MallocHook::NewHook> TestHookList; + +int TestHookList_Traverse(const TestHookList& list, int* output_array, int n) { + MallocHook::NewHook values_as_hooks[kHookListMaxValues]; + int result = list.Traverse(values_as_hooks, std::min(n, kHookListMaxValues)); + for (int i = 0; i < result; ++i) { + output_array[i] = reinterpret_cast<const int&>(values_as_hooks[i]); + } + return result; +} + +bool TestHookList_Add(TestHookList* list, int val) { + return list->Add(reinterpret_cast<MallocHook::NewHook>(val)); +} + +bool TestHookList_Remove(TestHookList* list, int val) { + return list->Remove(reinterpret_cast<MallocHook::NewHook>(val)); +} + +// Note that this is almost the same as INIT_HOOK_LIST in malloc_hook.cc without +// the cast. +#define INIT_HOOK_LIST(initial_value) { 1, { initial_value } } + +TEST(HookListTest, InitialValueExists) { + TestHookList list = INIT_HOOK_LIST(69); + int values[2] = { 0, 0 }; + EXPECT_EQ(1, TestHookList_Traverse(list, values, 2)); + EXPECT_EQ(69, values[0]); + EXPECT_EQ(1, list.priv_end); +} + +TEST(HookListTest, CanRemoveInitialValue) { + TestHookList list = INIT_HOOK_LIST(69); + ASSERT_TRUE(TestHookList_Remove(&list, 69)); + EXPECT_EQ(0, list.priv_end); + + int values[2] = { 0, 0 }; + EXPECT_EQ(0, TestHookList_Traverse(list, values, 2)); +} + +TEST(HookListTest, AddAppends) { + TestHookList list = INIT_HOOK_LIST(69); + ASSERT_TRUE(TestHookList_Add(&list, 42)); + EXPECT_EQ(2, list.priv_end); + + int values[2] = { 0, 0 }; + EXPECT_EQ(2, TestHookList_Traverse(list, values, 2)); + EXPECT_EQ(69, values[0]); + EXPECT_EQ(42, values[1]); +} + +TEST(HookListTest, RemoveWorksAndWillClearSize) { + TestHookList list = INIT_HOOK_LIST(69); + ASSERT_TRUE(TestHookList_Add(&list, 42)); + + ASSERT_TRUE(TestHookList_Remove(&list, 69)); + EXPECT_EQ(2, list.priv_end); + + int values[2] = { 0, 0 }; + EXPECT_EQ(1, TestHookList_Traverse(list, values, 2)); + EXPECT_EQ(42, values[0]); + + ASSERT_TRUE(TestHookList_Remove(&list, 42)); + EXPECT_EQ(0, list.priv_end); + EXPECT_EQ(0, TestHookList_Traverse(list, values, 2)); +} + +TEST(HookListTest, AddPrependsAfterRemove) { + TestHookList list = INIT_HOOK_LIST(69); + ASSERT_TRUE(TestHookList_Add(&list, 42)); + + ASSERT_TRUE(TestHookList_Remove(&list, 69)); + EXPECT_EQ(2, list.priv_end); + + ASSERT_TRUE(TestHookList_Add(&list, 7)); + EXPECT_EQ(2, list.priv_end); + + int values[2] = { 0, 0 }; + EXPECT_EQ(2, TestHookList_Traverse(list, values, 2)); + EXPECT_EQ(7, values[0]); + EXPECT_EQ(42, values[1]); +} + +TEST(HookListTest, InvalidAddRejected) { + TestHookList list = INIT_HOOK_LIST(69); + EXPECT_FALSE(TestHookList_Add(&list, 0)); + + int values[2] = { 0, 0 }; + EXPECT_EQ(1, TestHookList_Traverse(list, values, 2)); + EXPECT_EQ(69, values[0]); + EXPECT_EQ(1, list.priv_end); +} + +TEST(HookListTest, FillUpTheList) { + TestHookList list = INIT_HOOK_LIST(69); + int num_inserts = 0; + while (TestHookList_Add(&list, ++num_inserts)) + ; + EXPECT_EQ(kHookListMaxValues, num_inserts); + EXPECT_EQ(kHookListMaxValues, list.priv_end); + + int values[kHookListMaxValues + 1]; + EXPECT_EQ(kHookListMaxValues, TestHookList_Traverse(list, values, + kHookListMaxValues)); + EXPECT_EQ(69, values[0]); + for (int i = 1; i < kHookListMaxValues; ++i) { + EXPECT_EQ(i, values[i]); + } +} + +void MultithreadedTestThread(TestHookList* list, int shift, + int thread_num) { + string message; + char buf[64]; + for (int i = 1; i < 1000; ++i) { + // In each loop, we insert a unique value, check it exists, remove it, and + // check it doesn't exist. We also record some stats to log at the end of + // each thread. Each insertion location and the length of the list is + // non-deterministic (except for the very first one, over all threads, and + // after the very last one the list should be empty). + int value = (i << shift) + thread_num; + EXPECT_TRUE(TestHookList_Add(list, value)); + sched_yield(); // Ensure some more interleaving. + int values[kHookListMaxValues + 1]; + int num_values = TestHookList_Traverse(*list, values, kHookListMaxValues); + EXPECT_LT(0, num_values); + int value_index; + for (value_index = 0; + value_index < num_values && values[value_index] != value; + ++value_index) + ; + EXPECT_LT(value_index, num_values); // Should have found value. + snprintf(buf, sizeof(buf), "[%d/%d; ", value_index, num_values); + message += buf; + sched_yield(); + EXPECT_TRUE(TestHookList_Remove(list, value)); + sched_yield(); + num_values = TestHookList_Traverse(*list, values, kHookListMaxValues); + for (value_index = 0; + value_index < num_values && values[value_index] != value; + ++value_index) + ; + EXPECT_EQ(value_index, num_values); // Should not have found value. + snprintf(buf, sizeof(buf), "%d]", num_values); + message += buf; + sched_yield(); + } + fprintf(stderr, "thread %d: %s\n", thread_num, message.c_str()); +} + +static volatile int num_threads_remaining; +static TestHookList list = INIT_HOOK_LIST(69); +static SpinLock threadcount_lock; + +void MultithreadedTestThreadRunner(int thread_num) { + // Wait for all threads to start running. + { + SpinLockHolder h(&threadcount_lock); + assert(num_threads_remaining > 0); + --num_threads_remaining; + + // We should use condvars and the like, but for this test, we'll + // go simple and busy-wait. + while (num_threads_remaining > 0) { + threadcount_lock.Unlock(); + SleepForMilliseconds(100); + threadcount_lock.Lock(); + } + } + + // shift is the smallest number such that (1<<shift) > kHookListMaxValues + int shift = 0; + for (int i = kHookListMaxValues; i > 0; i >>= 1) + shift += 1; + + MultithreadedTestThread(&list, shift, thread_num); +} + + +TEST(HookListTest, MultithreadedTest) { + ASSERT_TRUE(TestHookList_Remove(&list, 69)); + ASSERT_EQ(0, list.priv_end); + + // Run kHookListMaxValues thread, each running MultithreadedTestThread. + // First, we need to set up the rest of the globals. + num_threads_remaining = kHookListMaxValues; // a global var + RunManyThreadsWithId(&MultithreadedTestThreadRunner, num_threads_remaining, + 1 << 15); + + int values[kHookListMaxValues + 1]; + EXPECT_EQ(0, TestHookList_Traverse(list, values, kHookListMaxValues)); + EXPECT_EQ(0, list.priv_end); +} + +#ifdef HAVE_MMAP +int mmap_calls = 0; +int mmap_matching_calls = 0; +int munmap_calls = 0; +int munmap_matching_calls = 0; +const int kMmapMagicFd = 1; +void* const kMmapMagicPointer = reinterpret_cast<void*>(1); + +int MmapReplacement(const void* start, + size_t size, + int protection, + int flags, + int fd, + off_t offset, + void** result) { + ++mmap_calls; + if (fd == kMmapMagicFd) { + ++mmap_matching_calls; + *result = kMmapMagicPointer; + return true; + } + return false; +} + +int MunmapReplacement(const void* ptr, size_t size, int* result) { + ++munmap_calls; + if (ptr == kMmapMagicPointer) { + ++munmap_matching_calls; + *result = 0; + return true; + } + return false; +} + +TEST(MallocMookTest, MmapReplacements) { + mmap_calls = mmap_matching_calls = munmap_calls = munmap_matching_calls = 0; + MallocHook::SetMmapReplacement(&MmapReplacement); + MallocHook::SetMunmapReplacement(&MunmapReplacement); + EXPECT_EQ(kMmapMagicPointer, mmap(NULL, 1, PROT_READ, MAP_PRIVATE, + kMmapMagicFd, 0)); + EXPECT_EQ(1, mmap_matching_calls); + + char* ptr = reinterpret_cast<char*>( + mmap(NULL, 1, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0)); + EXPECT_EQ(2, mmap_calls); + EXPECT_EQ(1, mmap_matching_calls); + ASSERT_NE(MAP_FAILED, ptr); + *ptr = 'a'; + + EXPECT_EQ(0, munmap(kMmapMagicPointer, 1)); + EXPECT_EQ(1, munmap_calls); + EXPECT_EQ(1, munmap_matching_calls); + + EXPECT_EQ(0, munmap(ptr, 1)); + EXPECT_EQ(2, munmap_calls); + EXPECT_EQ(1, munmap_matching_calls); + + // The DEATH test below is flaky, because we've just munmapped the memory, + // making it available for mmap()ing again. There is no guarantee that it + // will stay unmapped, and in fact it gets reused ~10% of the time. + // It the area is reused, then not only we don't die, but we also corrupt + // whoever owns that memory now. + // EXPECT_DEATH(*ptr = 'a', "SIGSEGV"); +} +#endif // #ifdef HAVE_MMAN + +} // namespace + +int main(int argc, char** argv) { + return RUN_ALL_TESTS(); +} diff --git a/src/tests/memalign_unittest.cc b/src/tests/memalign_unittest.cc index 1b707dd..b354bb4 100644 --- a/src/tests/memalign_unittest.cc +++ b/src/tests/memalign_unittest.cc @@ -49,6 +49,7 @@ #include <unistd.h> // for getpagesize() #endif #include "tcmalloc.h" // must come early, to pick up posix_memalign +#include <assert.h> #include <stdlib.h> // defines posix_memalign #include <stdio.h> // for the printf at the end #ifdef HAVE_STDINT_H diff --git a/src/tests/page_heap_test.cc b/src/tests/page_heap_test.cc index 9120b78..9f5f3c8 100644 --- a/src/tests/page_heap_test.cc +++ b/src/tests/page_heap_test.cc @@ -1,11 +1,11 @@ // Copyright 2009 Google Inc. All Rights Reserved. // Author: fikes@google.com (Andrew Fikes) -#include <stdio.h> #include "config_for_unittests.h" +#include "page_heap.h" +#include <stdio.h> #include "base/logging.h" #include "common.h" -#include "page_heap.h" namespace { diff --git a/src/tests/realloc_unittest.cc b/src/tests/realloc_unittest.cc index 20edb50..4267421 100644 --- a/src/tests/realloc_unittest.cc +++ b/src/tests/realloc_unittest.cc @@ -33,13 +33,16 @@ // Test realloc() functionality #include "config_for_unittests.h" +#include <assert.h> // for assert #include <stdio.h> -#include <stdlib.h> -#include <algorithm> // for min() +#include <stddef.h> // for size_t, NULL +#include <stdlib.h> // for free, malloc, realloc +#include <algorithm> // for min #include "base/logging.h" using std::min; + // Fill a buffer of the specified size with a predetermined pattern static void Fill(unsigned char* buffer, int n) { for (int i = 0; i < n; i++) { diff --git a/src/tests/system-alloc_unittest.cc b/src/tests/system-alloc_unittest.cc index da76285..c006425 100644 --- a/src/tests/system-alloc_unittest.cc +++ b/src/tests/system-alloc_unittest.cc @@ -31,6 +31,7 @@ // Author: Arun Sharma #include "config_for_unittests.h" +#include "system-alloc.h" #include <stdio.h> #if defined HAVE_STDINT_H #include <stdint.h> // to get uintptr_t @@ -39,9 +40,9 @@ #endif #include <sys/types.h> #include <algorithm> -#include "base/logging.h" -#include "common.h" -#include "system-alloc.h" +#include "base/logging.h" // for Check_GEImpl, Check_LTImpl, etc +#include <google/malloc_extension.h> // for MallocExtension::instance +#include "common.h" // for kAddressBits class ArraySysAllocator : public SysAllocator { public: @@ -55,6 +56,11 @@ public: void* Alloc(size_t size, size_t *actual_size, size_t alignment) { invoked_ = true; + + if (size > kArraySize) { + return NULL; + } + void *result = &array_[ptr_]; uintptr_t ptr = reinterpret_cast<uintptr_t>(result); @@ -75,8 +81,9 @@ public: return reinterpret_cast<void *>(ptr); } - void DumpStats(TCMalloc_Printer* printer) { + void DumpStats() { } + void FlagsInitialized() {} private: static const int kArraySize = 8 * 1024 * 1024; @@ -89,7 +96,7 @@ const int ArraySysAllocator::kArraySize; ArraySysAllocator a; static void TestBasicInvoked() { - RegisterSystemAllocator(&a, 0); + MallocExtension::instance()->SetSystemAllocator(&a); // An allocation size that is likely to trigger the system allocator. // XXX: this is implementation specific. @@ -112,8 +119,19 @@ TEST(AddressBits, CpuVirtualBits) { } #endif +static void TestBasicRetryFailTest() { + // Check with the allocator still works after a failed allocation. + void* p = malloc(1ULL << 50); // Asking for 1P ram + CHECK(p == NULL); + + char* q = new char[1024]; + CHECK(q != NULL); + delete [] q; +} + int main(int argc, char** argv) { TestBasicInvoked(); + TestBasicRetryFailTest(); printf("PASS\n"); return 0; diff --git a/src/tests/tcmalloc_large_unittest.cc b/src/tests/tcmalloc_large_unittest.cc index 260ac29..ad3482e 100644 --- a/src/tests/tcmalloc_large_unittest.cc +++ b/src/tests/tcmalloc_large_unittest.cc @@ -35,12 +35,12 @@ // For 32 bits, this means allocations near 2^32 bytes and 2^31 bytes. // For 64 bits, this means allocations near 2^64 bytes and 2^63 bytes. -#include <stddef.h> -#include <stdlib.h> +#include <stddef.h> // for size_t, NULL +#include <stdlib.h> // for malloc, free, realloc #include <stdio.h> -#include <set> +#include <set> // for set, etc -#include "base/logging.h" +#include "base/logging.h" // for operator<<, CHECK, etc using std::set; diff --git a/src/thread_cache.cc b/src/thread_cache.cc index 64e3b07..b00e3b4 100644 --- a/src/thread_cache.cc +++ b/src/thread_cache.cc @@ -31,11 +31,12 @@ // Author: Ken Ashcraft <opensource@google.com> #include <config.h> -#ifdef HAVE_INTTYPES_H -#include <inttypes.h> -#endif -#include <algorithm> // for min and max #include "thread_cache.h" +#include <string.h> // for memcpy +#include <algorithm> // for max, min +#include "base/commandlineflags.h" // for SpinLockHolder +#include "base/spinlock.h" // for SpinLockHolder +#include "central_freelist.h" // for CentralFreeListPadded #include "maybe_threads.h" using std::min; diff --git a/src/thread_cache.h b/src/thread_cache.h index 352c683..1742d5b 100644 --- a/src/thread_cache.h +++ b/src/thread_cache.h @@ -35,8 +35,13 @@ #include <config.h> #ifdef HAVE_PTHREAD -#include <pthread.h> +#include <pthread.h> // for pthread_t, pthread_key_t #endif +#include <stddef.h> // for size_t, NULL +#ifdef HAVE_STDINT_H +#include <stdint.h> // for uint32_t, uint64_t +#endif +#include <sys/types.h> // for ssize_t #include "common.h" #include "linked_list.h" #include "maybe_threads.h" @@ -44,6 +49,13 @@ #include "sampler.h" #include "static_vars.h" +#include "common.h" // for SizeMap, kMaxSize, etc +#include "internal_logging.h" // for ASSERT, etc +#include "linked_list.h" // for SLL_Pop, SLL_PopRange, etc +#include "page_heap_allocator.h" // for PageHeapAllocator +#include "sampler.h" // for Sampler +#include "static_vars.h" // for Static + namespace tcmalloc { // Even if we have support for thread-local storage in the compiler diff --git a/src/windows/patch_functions.cc b/src/windows/patch_functions.cc index fc57c82..f837e7a 100644 --- a/src/windows/patch_functions.cc +++ b/src/windows/patch_functions.cc @@ -122,6 +122,11 @@ const char kMangledDeleteArrayNothrow[] = "??_V@YAXPAXABUnothrow_t@std@@@Z"; extern "C" PERFTOOLS_DLL_DECL void _tcmalloc(); void _tcmalloc() { } +// This is the version needed for windows x64, which has a different +// decoration scheme which doesn't auto-add a leading underscore. +extern "C" PERFTOOLS_DLL_DECL void __tcmalloc(); +void __tcmalloc() { } + namespace { // most everything here is in an unnamed namespace typedef void (*GenericFnPtr)(); diff --git a/src/windows/port.cc b/src/windows/port.cc index aa972d0..1ecdace 100644 --- a/src/windows/port.cc +++ b/src/windows/port.cc @@ -101,9 +101,15 @@ bool CheckIfKernelSupportsTLS() { // Force a reference to p_thread_callback_tcmalloc and p_process_term_tcmalloc // to prevent whole program optimization from discarding the variables. #ifdef _MSC_VER +#if defined(_M_IX86) #pragma comment(linker, "/INCLUDE:__tls_used") #pragma comment(linker, "/INCLUDE:_p_thread_callback_tcmalloc") #pragma comment(linker, "/INCLUDE:_p_process_term_tcmalloc") +#elif defined(_M_X64) +#pragma comment(linker, "/INCLUDE:_tls_used") +#pragma comment(linker, "/INCLUDE:p_thread_callback_tcmalloc") +#pragma comment(linker, "/INCLUDE:p_process_term_tcmalloc") +#endif #endif // When destr_fn eventually runs, it's supposed to take as its |