SERVER-21464: Return all threaed-local memory to tcmalloc central cache when a thread goes idle

author: Martin Bligh <mbligh@mongodb.com> 2015-11-15 16:25:11 -0500
committer: Martin Bligh <mbligh@mongodb.com> 2015-11-15 16:25:11 -0500
commit: 7790034b4b1cba68d44a8df920edd64cc05ccdd9 (patch)
tree: 107fc9d031f3dc6a800b28beaa7edafe005c24cb
parent: 4c6d1b8b81df9d224ed2a7a3744d3ad2db32fa3d (diff)
download: mongo-7790034b4b1cba68d44a8df920edd64cc05ccdd9.tar.gz
7 files changed, 41 insertions, 35 deletions
diff --git a/src/mongo/util/SConscript b/src/mongo/util/SConscript
index c6804f15802..a742cf6ddb3 100644
--- a/src/mongo/util/SConscript
+++ b/src/mongo/util/SConscript
@@ -248,7 +248,7 @@ if get_option('allocator') == 'tcmalloc':
         # level configure check, though its effects should still be scoped just to these files.
         tcmspEnv.Append(
             CPPDEFINES=[
-                'MONGO_HAVE_GPERFTOOLS_SHRINK_CACHE_SIZE'
+                'MONGO_HAVE_GPERFTOOLS_GET_THREAD_CACHE_SIZE'
             ]
         )
 
diff --git a/src/mongo/util/tcmalloc_server_status_section.cpp b/src/mongo/util/tcmalloc_server_status_section.cpp
index 2a696d72882..fe4522345d9 100644
--- a/src/mongo/util/tcmalloc_server_status_section.cpp
+++ b/src/mongo/util/tcmalloc_server_status_section.cpp
@@ -50,30 +50,43 @@ namespace {
 // it is better to release memory when it is likely the thread will be blocked for
 // a long time.
 const int kManyClients = 40;
-size_t tcmallocPoolSize = 0;
 
-// Callback to allow TCMalloc to release freed memory to the central list at favorable times.
+stdx::mutex tcmallocCleanupLock;
+
+/**
+ *  Callback to allow TCMalloc to release freed memory to the central list at
+ *  favorable times. Ideally would do some milder cleanup or scavenge...
+ */
 void threadStateChange() {
-    int thread_count = Listener::globalTicketHolder.used();
-    if (thread_count <= kManyClients)
+    if (Listener::globalTicketHolder.used() <= kManyClients)
+        return;
+
+#if MONGO_HAVE_GPERFTOOLS_GET_THREAD_CACHE_SIZE
+    size_t threadCacheSizeBytes = MallocExtension::instance()->GetThreadCacheSize();
+
+    static const size_t kMaxThreadCacheSizeBytes = 0x10000;
+    if (threadCacheSizeBytes < kMaxThreadCacheSizeBytes) {
+        // This number was chosen a bit magically.
+        // At 1000 threads and the current (64mb) thread local cache size, we're "full".
+        // So we may want this number to scale with the number of current clients.
         return;
+    }
+
+    LOG(1) << "thread over memory limit, cleaning up, current: " << (threadCacheSizeBytes / 1024)
+           << "k";
 
-#if MONGO_HAVE_GPERFTOOLS_SHRINK_CACHE_SIZE
-    MallocExtension::instance()->ShrinkCacheIfAboveSize(tcmallocPoolSize / thread_count);
-#else
+    // We synchronize as the tcmalloc central list uses a spinlock, and we can cause a really
+    // terrible runaway if we're not careful.
+    stdx::lock_guard<stdx::mutex> lk(tcmallocCleanupLock);
+#endif
     MallocExtension::instance()->MarkThreadIdle();
     MallocExtension::instance()->MarkThreadBusy();
-#endif
 }
 
 // Register threadStateChange callback
 MONGO_INITIALIZER(TCMallocThreadIdleListener)(InitializerContext*) {
-    if (!RUNNING_ON_VALGRIND) {
+    if (!RUNNING_ON_VALGRIND)
         registerThreadIdleCallback(&threadStateChange);
-        invariant(MallocExtension::instance()->GetNumericProperty(
-            "tcmalloc.max_total_thread_cache_bytes", &tcmallocPoolSize));
-    }
-    LOG(1) << "tcmallocPoolSize: " << tcmallocPoolSize << "\n";
     return Status::OK();
 }
 
diff --git a/src/third_party/gperftools-2.2/src/gperftools/malloc_extension.h b/src/third_party/gperftools-2.2/src/gperftools/malloc_extension.h
index 48ed5ad6893..f331f5f2e49 100644
--- a/src/third_party/gperftools-2.2/src/gperftools/malloc_extension.h
+++ b/src/third_party/gperftools-2.2/src/gperftools/malloc_extension.h
@@ -240,6 +240,10 @@ class PERFTOOLS_DLL_DECL MallocExtension {
   // Most malloc implementations ignore this routine.
   virtual void MarkThreadBusy();
 
+  // Gets the size of this thread's cache in bytes.
+  // MONGODB ADDITION
+  virtual size_t GetThreadCacheSize();
+
   // Gets the system allocator used by the malloc extension instance. Returns
   // NULL for malloc implementations that do not support pluggable system
   // allocators.
@@ -391,9 +395,6 @@ class PERFTOOLS_DLL_DECL MallocExtension {
   // Like ReadStackTraces(), but returns stack traces that caused growth
   // in the address space size.
   virtual void** ReadHeapGrowthStackTraces();
-
-  // Shrink per-thread cache size if above a min_size in bytes.
-  virtual void ShrinkCacheIfAboveSize(size_t min_size);
 };
 
 namespace base {
diff --git a/src/third_party/gperftools-2.2/src/malloc_extension.cc b/src/third_party/gperftools-2.2/src/malloc_extension.cc
index b9b481a5314..9126efbeaa7 100644
--- a/src/third_party/gperftools-2.2/src/malloc_extension.cc
+++ b/src/third_party/gperftools-2.2/src/malloc_extension.cc
@@ -147,12 +147,12 @@ void MallocExtension::MarkThreadIdle() {
   // Default implementation does nothing
 }
 
-void MallocExtension::ShrinkCacheIfAboveSize(size_t min_size) {
+void MallocExtension::MarkThreadBusy() {
   // Default implementation does nothing
 }
 
-void MallocExtension::MarkThreadBusy() {
-  // Default implementation does nothing
+size_t MallocExtension::GetThreadCacheSize() {
+  return 0;
 }
 
 SysAllocator* MallocExtension::GetSystemAllocator() {
diff --git a/src/third_party/gperftools-2.2/src/tcmalloc.cc b/src/third_party/gperftools-2.2/src/tcmalloc.cc
index 5604233c755..82024b336c1 100644
--- a/src/third_party/gperftools-2.2/src/tcmalloc.cc
+++ b/src/third_party/gperftools-2.2/src/tcmalloc.cc
@@ -729,12 +729,15 @@ class TCMallocImplementation : public MallocExtension {
     ThreadCache::BecomeIdle();
   }
 
-  virtual void ShrinkCacheIfAboveSize(size_t min_size) {
-    ThreadCache::ShrinkCacheIfAboveSize(min_size);
-  }
-
   virtual void MarkThreadBusy();  // Implemented below
 
+  virtual size_t GetThreadCacheSize() {
+    ThreadCache* tc = ThreadCache::GetCacheIfPresent();
+    if (!tc)
+      return 0;
+    return tc->Size();
+  }
+
   virtual SysAllocator* GetSystemAllocator() {
     SpinLockHolder h(Static::pageheap_lock());
     return sys_alloc;
diff --git a/src/third_party/gperftools-2.2/src/thread_cache.cc b/src/third_party/gperftools-2.2/src/thread_cache.cc
index 34cc855ddfc..b98fbee043f 100644
--- a/src/third_party/gperftools-2.2/src/thread_cache.cc
+++ b/src/third_party/gperftools-2.2/src/thread_cache.cc
@@ -437,16 +437,6 @@ void ThreadCache::BecomeIdle() {
   DeleteCache(heap);
 }
 
-void ThreadCache::ShrinkCacheIfAboveSize(size_t min_size) {
-  if (!tsd_inited_) return;              // No caches yet
-  ThreadCache* heap = GetThreadHeap();
-  if (heap == NULL) return;             // No thread cache to remove
-  if (heap->in_setspecific_) return;    // Do not disturb the active caller
-
-  if (heap->Size() > min_size)
-    heap->Cleanup();
-}
-
 void ThreadCache::DestroyThreadCache(void* ptr) {
   // Note that "ptr" cannot be NULL since pthread promises not
   // to invoke the destructor on NULL values, but for safety,
diff --git a/src/third_party/gperftools-2.2/src/thread_cache.h b/src/third_party/gperftools-2.2/src/thread_cache.h
index e31c2a42b06..5f0dc127bcc 100644
--- a/src/third_party/gperftools-2.2/src/thread_cache.h
+++ b/src/third_party/gperftools-2.2/src/thread_cache.h
@@ -116,7 +116,6 @@ class ThreadCache {
   static ThreadCache* GetCacheWhichMustBePresent();
   static ThreadCache* CreateCacheIfNecessary();
   static void         BecomeIdle();
-  static void         ShrinkCacheIfAboveSize(size_t min_size);
   static size_t       MinSizeForSlowPath();
   static void         SetMinSizeForSlowPath(size_t size);
author	Martin Bligh <mbligh@mongodb.com>	2015-11-15 16:25:11 -0500
committer	Martin Bligh <mbligh@mongodb.com>	2015-11-15 16:25:11 -0500
commit	7790034b4b1cba68d44a8df920edd64cc05ccdd9 (patch)
tree	107fc9d031f3dc6a800b28beaa7edafe005c24cb
parent	4c6d1b8b81df9d224ed2a7a3744d3ad2db32fa3d (diff)
download	mongo-7790034b4b1cba68d44a8df920edd64cc05ccdd9.tar.gz