summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRaphael Moreira Zinsly <rzinsly@linux.vnet.ibm.com>2014-12-23 10:29:49 -0200
committerAliaksey Kandratsenka <alk@tut.by>2014-12-23 10:51:51 -0800
commit3f55d874be8812aef9f0f567048188584962b4c1 (patch)
tree70b3412fe3c0980e18487bb68363eae704d637cb
parent1035d5c18f64d114ac790b92a96f3b3a1a301eb9 (diff)
downloadgperftools-3f55d874be8812aef9f0f567048188584962b4c1.tar.gz
New configure flags to set the alignment and page size of tcmalloc
Added two new configure flags, --with-tcmalloc-pagesize and --with-tcmalloc-alignment, in order to set the tcmalloc internal page size and tcmalloc allocation alignment without the need of a compiler directive and to make the choice of the page size independent of the allocation alignment.
-rw-r--r--INSTALL20
-rw-r--r--configure.ac37
-rw-r--r--src/common.h36
3 files changed, 65 insertions, 28 deletions
diff --git a/INSTALL b/INSTALL
index f448076..b6bc08e 100644
--- a/INSTALL
+++ b/INSTALL
@@ -102,19 +102,19 @@ cost of using more space (due to internal fragmentation).
Internally, tcmalloc divides its memory into "pages." The default
page size is chosen to minimize memory use by reducing fragmentation.
The cost is that keeping track of these pages can cost tcmalloc time.
-We've added a new, experimental flag to tcmalloc that enables a larger
-page size. In general, this will increase the memory needs of
-applications using tcmalloc. However, in many cases it will speed up
-the applications as well, particularly if they allocate and free a lot
-of memory. We've seen average speedups of 3-5% on Google
-applications.
+We've added a new flag to tcmalloc that enables a larger page size.
+In general, this will increase the memory needs of applications using
+tcmalloc. However, in many cases it will speed up the applications
+as well, particularly if they allocate and free a lot of memory. We've
+seen average speedups of 3-5% on Google applications.
-This feature is still very experimental; it's not even a configure
-flag yet. To build libtcmalloc with large pages, run
+To build libtcmalloc with large pages you need to use the
+--with-tcmalloc-pagesize=ARG configure flag, e.g.:
- ./configure <normal flags> CXXFLAGS=-DTCMALLOC_LARGE_PAGES
+ ./configure <other flags> --with-tcmalloc-pagesize=32
-(or add -DTCMALLOC_LARGE_PAGES to your existing CXXFLAGS argument).
+The ARG argument can be 8, 32 or 64 which sets the internal page size to
+8K, 32K and 64K repectively. The default is 8K.
*** SMALL TCMALLOC CACHES: TRADING SPACE FOR TIME
diff --git a/configure.ac b/configure.ac
index 0061be4..a54ffee 100644
--- a/configure.ac
+++ b/configure.ac
@@ -22,7 +22,6 @@ AM_INIT_AUTOMAKE([dist-zip])
AC_CONFIG_HEADERS([src/config.h])
AM_MAINTAINER_MODE()
-
# Export the version information (for tc_version and friends)
TC_VERSION_MAJOR=`expr "$PACKAGE_VERSION" : '\([[0-9]]*\)'`
TC_VERSION_MINOR=`expr "$PACKAGE_VERSION" : '[[0-9]]*\.\([[0-9]]*\)'`
@@ -42,6 +41,8 @@ default_enable_heap_profiler=yes
default_enable_heap_checker=yes
default_enable_debugalloc=yes
default_enable_minimal=no
+default_tcmalloc_pagesize=8
+default_tcmalloc_alignment=16
need_nanosleep=yes # Used later, to decide if to run ACX_NANOSLEEP
case "$host" in
*-mingw*) default_enable_minimal=yes; default_enable_debugalloc=no;
@@ -95,6 +96,40 @@ AC_ARG_ENABLE([libunwind],
[enable libunwind linking])],
[],
[enable_libunwind="$default_enable_libunwind"])
+AC_ARG_WITH([tcmalloc-pagesize],
+ [AS_HELP_STRING([--with-tcmalloc-pagesize],
+ [Set the tcmalloc internal page size to 8K, 32K or 64K])],
+ [],
+ [with_tcmalloc_pagesize=$default_tcmalloc_pagesize])
+AC_ARG_WITH([tcmalloc-alignment],
+ [AS_HELP_STRING([--with-tcmalloc-alignment],
+ [Set the tcmalloc allocation alignment to 8 or 16 bytes])],
+ [],
+ [with_tcmalloc_alignment=$default_tcmalloc_alignment])
+
+case "$with_tcmalloc_pagesize" in
+ 8)
+ #Default tcmalloc page size.
+ ;;
+ 32)
+ AC_DEFINE(TCMALLOC_32K_PAGES, 1,
+ [Define 32K of internal pages size for tcmalloc]);;
+ 64)
+ AC_DEFINE(TCMALLOC_64K_PAGES, 1,
+ [Define 64K of internal pages size for tcmalloc]);;
+ *)
+ AC_MSG_WARN([${with_tcmalloc_pagesize}K size not supported, using default tcmalloc page size.])
+esac
+case "$with_tcmalloc_alignment" in
+ 8)
+ AC_DEFINE(TCMALLOC_ALIGN_8BYTES, 1,
+ [Define 8 bytes of allocation alignment for tcmalloc]);;
+ 16)
+ #Default tcmalloc allocation alignment.
+ ;;
+ *)
+ AC_MSG_WARN([${with_tcmalloc_alignment} bytes not supported, using default tcmalloc allocation alignment.])
+esac
# Checks for programs.
AC_PROG_CXX
diff --git a/src/common.h b/src/common.h
index 18bcad8..c3484d3 100644
--- a/src/common.h
+++ b/src/common.h
@@ -62,6 +62,19 @@ typedef uintptr_t Length;
// Configuration
//-------------------------------------------------------------------
+#if defined(TCMALLOC_ALIGN_8BYTES)
+// Unless we force to use 8 bytes alignment we use an alignment of
+// at least 16 bytes to statisfy requirements for some SSE types.
+// Keep in mind when using the 16 bytes alignment you can have a space
+// waste due alignment of 25%. (eg malloc of 24 bytes will get 32 bytes)
+static const size_t kMinAlign = 8;
+// Number of classes created until reach page size 128.
+static const size_t kBaseClasses = 16;
+#else
+static const size_t kMinAlign = 16;
+static const size_t kBaseClasses = 9;
+#endif
+
// Using large pages speeds up the execution at a cost of larger memory use.
// Deallocation may speed up by a factor as the page map gets 8x smaller, so
// lookups in the page map result in fewer L2 cache misses, which translates to
@@ -70,28 +83,17 @@ typedef uintptr_t Length;
// the thread cache allowance to avoid passing more free ranges to and from
// central lists. Also, larger pages are less likely to get freed.
// These two factors cause a bounded increase in memory use.
-
-#if defined(TCMALLOC_LARGE_PAGES)
+#if defined(TCMALLOC_32K_PAGES)
static const size_t kPageShift = 15;
-static const size_t kNumClasses = 78;
-static const size_t kMinAlign = 16;
-#elif defined(TCMALLOC_LARGE_PAGES64K)
+static const size_t kNumClasses = kBaseClasses + 69;
+#elif defined(TCMALLOC_64K_PAGES)
static const size_t kPageShift = 16;
-static const size_t kNumClasses = 82;
-static const size_t kMinAlign = 16;
-#elif defined(TCMALLOC_ALIGN_8BYTES)
-static const size_t kPageShift = 13;
-static const size_t kNumClasses = 95;
-// Unless we force to use 8 bytes alignment we use an alignment of
-// at least 16 bytes to statisfy requirements for some SSE types.
-// Keep in mind when using the 16 bytes alignment you can have a space
-// waste due alignment of 25%. (eg malloc of 24 bytes will get 32 bytes)
-static const size_t kMinAlign = 8;
+static const size_t kNumClasses = kBaseClasses + 73;
#else
static const size_t kPageShift = 13;
-static const size_t kNumClasses = 88;
-static const size_t kMinAlign = 16;
+static const size_t kNumClasses = kBaseClasses + 79;
#endif
+
static const size_t kMaxThreadCacheSize = 4 << 20;
static const size_t kPageSize = 1 << kPageShift;