summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog20
-rw-r--r--Makefile.am24
-rw-r--r--aclocal.m438
-rwxr-xr-xconfigure37
-rw-r--r--configure.ac2
-rw-r--r--doc/cpu_profiler.html18
-rw-r--r--src/base/elfcore.c1046
-rw-r--r--src/base/elfcore.h104
-rw-r--r--src/base/linux_syscall_support.h381
-rw-r--r--src/base/linuxthreads.c118
-rw-r--r--src/base/linuxthreads.h3
-rw-r--r--src/base/thread_lister.c12
-rw-r--r--src/base/thread_lister.h14
-rw-r--r--src/google/heap-checker.h10
-rw-r--r--src/google/malloc_extension.h13
-rw-r--r--src/google/profiler.h77
-rw-r--r--src/heap-checker.cc78
-rw-r--r--src/heap-profiler.cc1
-rw-r--r--src/internal_logging.cc2
-rw-r--r--src/internal_spinlock.h2
-rw-r--r--src/malloc_extension.cc84
-rw-r--r--src/malloc_hook.cc40
-rw-r--r--src/pagemap.h10
-rwxr-xr-xsrc/pprof665
-rw-r--r--src/profiler.cc171
-rw-r--r--src/stacktrace.cc42
-rw-r--r--src/system-alloc.cc20
-rw-r--r--src/tcmalloc.cc159
-rw-r--r--src/tests/tcmalloc_large_unittest.cc137
-rw-r--r--src/tests/tcmalloc_unittest.cc3
30 files changed, 1750 insertions, 1581 deletions
diff --git a/ChangeLog b/ChangeLog
index b8391f8..ea3e0a0 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -32,3 +32,23 @@ Fri Jun 24 18:02:26 2005 Google Inc. <opensource@google.com>
* Add support for mallopt() and mallinfo (sanjay)
* Improve stacktrace's performance on some 64-bit systems (etune)
* Improve the stacktrace unittest (etune)
+
+Wed Oct 26 15:19:16 2005 Google Inc. <opensource@google.com>
+
+ * Decrease fragmentation in tcmalloc (lefevere)
+ * Support for ARM in some of the thread-specific code (markus)
+ * Turn off heap-checker for statically-linked binaries, which
+ cause error leak reports now (etune)
+ * Many pprof improvements, including a command-line interface (jeff)
+ * CPU profiling now automatically affects all threads in linux 2.6.
+ (Kernel bugs break CPU profiling and threads in linux 2.4 a bit.)
+ ProfilerEnable() and ProfilerDisable() are deprecated. (sanjay)
+ * tcmalloc now correctly intercepts memalign (m3b, maxim)
+ * Syntax fix: added missing va_end()s. Helps non-gcc compiling (etune)
+ * Fixed a few coredumper bugs: race condition after PTRACE_DETACH,
+ ignore non-aligned stackframe pointers (markus, menage)
+ * 64-bit cleanup, especially for spinlock code (etune) and mmap (sanjay)
+ * Better support for finding threads in linux (markus)
+ * tcmalloc now tracks those stack traces that allocate memory (sanjay)
+ * Work around a weird setspecific problem (sanjay)
+ * Fix tcmalloc overflow problems when an alloc is close to 2G/4G (sanjay)
diff --git a/Makefile.am b/Makefile.am
index 21ab0af..cab257c 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -1,4 +1,4 @@
-## Process this file with automake to produce Makefile.in
+## Process this file with automake to produce Makefile.in
# Note: for every library we create, we're explicit about what symbols
# we export. In order to avoid complications with C++ mangling, we always
@@ -26,7 +26,7 @@ docdir = $(prefix)/doc/$(PACKAGE)-$(VERSION)
# Add your documentation files (in doc/) in addition to these
# top-level boilerplate files. Also add a TODO file if you have one.
# We'll add to this later, on a library-by-library basis
-dist_doc_DATA = AUTHORS COPYING ChangeLog INSTALL NEWS README TODO
+dist_doc_DATA = AUTHORS COPYING ChangeLog INSTALL NEWS README TODO
# The libraries (.so's) you want to install
# We'll add to this later, on a library-by-library basis
@@ -75,7 +75,7 @@ stacktrace_unittest_SOURCES = src/tests/stacktrace_unittest.cc \
stacktrace_unittest_LDADD = libstacktrace.la
### Documentation
-dist_doc_DATA +=
+dist_doc_DATA +=
### ------- tcmalloc_minimal (thread-caching malloc)
@@ -90,7 +90,7 @@ S_TCMALLOC_MINIMAL_INCLUDES = src/config.h \
src/maybe_threads.h
SG_TCMALLOC_MINIMAL_INCLUDES = src/google/malloc_hook.h \
src/google/malloc_extension.h \
- src/google/stacktrace.h
+ src/google/stacktrace.h
SGP_TCMALLOC_MINIMAL_INCLUDES = src/google/perftools/hash_set.h
TCMALLOC_MINIMAL_INCLUDES = $(S_TCMALLOC_MINIMAL_INCLUDES) $(SG_TCMALLOC_MINIMAL_INCLUDES) $(SGP_TCMALLOC_MINIMAL_INCLUDES)
googleinclude_HEADERS += $(SG_TCMALLOC_MINIMAL_INCLUDES)
@@ -135,6 +135,12 @@ tcmalloc_unittest_CXXFLAGS = $(PTHREAD_CFLAGS)
tcmalloc_unittest_LDFLAGS = $(PTHREAD_CFLAGS)
tcmalloc_unittest_LDADD = libtcmalloc.la $(PTHREAD_LIBS)
+TESTS += tcmalloc_large_unittest
+tcmalloc_large_unittest_SOURCES = src/tests/tcmalloc_large_unittest.cc
+tcmalloc_large_unittest_CXXFLAGS = $(PTHREAD_CFLAGS)
+tcmalloc_large_unittest_LDFLAGS = $(PTHREAD_CFLAGS)
+tcmalloc_large_unittest_LDADD = libtcmalloc.la $(PTHREAD_LIBS)
+
# performance/unittests originally from ptmalloc2
TESTS += ptmalloc_unittest1 ptmalloc_unittest2
PTMALLOC_UNITTEST_INCLUDES = src/tests/ptmalloc/t-test.h \
@@ -144,7 +150,7 @@ PTMALLOC_UNITTEST_INCLUDES = src/tests/ptmalloc/t-test.h \
src/tests/ptmalloc/malloc-machine.h
ptmalloc_unittest1_SOURCES = src/tests/ptmalloc/t-test1.c \
$(PTMALLOC_UNITTEST_INCLUDES)
-ptmalloc_unittest1_CFLAGS = $(PTHREAD_CFLAGS) -DUSE_PTHREADS
+ptmalloc_unittest1_CFLAGS = $(PTHREAD_CFLAGS) -DUSE_PTHREADS
ptmalloc_unittest1_LDFLAGS = $(PTHREAD_CFLAGS)
ptmalloc_unittest1_LDADD = $(PTHREAD_LIBS)
ptmalloc_unittest2_SOURCES = src/tests/ptmalloc/t-test2.c \
@@ -180,7 +186,7 @@ dist_doc_DATA += doc/tcmalloc.html \
doc/tcmalloc-opspersec.vs.size.3.threads.png \
doc/tcmalloc-opspersec.vs.size.4.threads.png \
doc/tcmalloc-opspersec.vs.size.5.threads.png \
- doc/tcmalloc-opspersec.vs.size.8.threads.png
+ doc/tcmalloc-opspersec.vs.size.8.threads.png
# I don't know how to say "distribute the .dot files but don't install them";
# noinst doesn't seem to work with data. I separate them out anyway, in case
@@ -206,6 +212,7 @@ S_TCMALLOC_INCLUDES = src/config.h \
src/base/logging.h \
src/base/googleinit.h \
src/base/elfcore.h \
+ src/base/linux_syscall_support.h \
src/base/linuxthreads.h \
src/base/thread_lister.h \
src/maybe_threads.h
@@ -213,7 +220,7 @@ SG_TCMALLOC_INCLUDES = src/google/malloc_hook.h \
src/google/malloc_extension.h \
src/google/heap-profiler.h \
src/google/heap-checker.h \
- src/google/stacktrace.h
+ src/google/stacktrace.h
SGP_TCMALLOC_INCLUDES = src/google/perftools/hash_set.h
TCMALLOC_INCLUDES = $(S_TCMALLOC_INCLUDES) $(SG_TCMALLOC_INCLUDES) $(SGP_TCMALLOC_INCLUDES)
googleinclude_HEADERS += $(SG_TCMALLOC_INCLUDES)
@@ -230,7 +237,6 @@ libtcmalloc_la_SOURCES = src/internal_logging.cc \
src/heap-profiler.cc \
src/heap-checker.cc \
src/heap-checker-bcad.cc \
- src/base/elfcore.c \
src/base/linuxthreads.c \
src/base/thread_lister.c \
$(TCMALLOC_INCLUDES)
@@ -304,7 +310,7 @@ S_CPU_PROFILER_INCLUDES = src/config.h \
src/base/logging.h
SG_CPU_PROFILER_INCLUDES = src/google/profiler.h \
src/google/stacktrace.h
-SGP_CPU_PROFILER_INCLUDES =
+SGP_CPU_PROFILER_INCLUDES =
CPU_PROFILER_INCLUDES = $(S_CPU_PROFILER_INCLUDES) $(SG_CPU_PROFILER_INCLUDES) $(SGP_CPU_PROFILER_INCLUDES)
googleinclude_HEADERS += $(SG_CPU_PROFILER_INCLUDES)
perftoolsinclude_HEADERS += $(SGP_CPU_PROFILER_INCLUDES)
diff --git a/aclocal.m4 b/aclocal.m4
index ae8a9c0..0b68740 100644
--- a/aclocal.m4
+++ b/aclocal.m4
@@ -6969,21 +6969,31 @@ AC_DEFUN([AC_CXX_STL_NAMESPACE],
fi
])
-# Checks whether the compiler implements namespaces
+dnl @synopsis AC_CXX_NAMESPACES
+dnl
+dnl If the compiler can prevent names clashes using namespaces, define
+dnl HAVE_NAMESPACES.
+dnl
+dnl @category Cxx
+dnl @author Todd Veldhuizen
+dnl @author Luc Maisonobe <luc@spaceroots.org>
+dnl @version 2004-02-04
+dnl @license AllPermissive
+
AC_DEFUN([AC_CXX_NAMESPACES],
- [AC_CACHE_CHECK(whether the compiler implements namespaces,
- ac_cv_cxx_namespaces,
- [AC_LANG_SAVE
- AC_LANG_CPLUSPLUS
- AC_TRY_COMPILE([namespace Outer {
- namespace Inner { int i = 0; }}],
- [using namespace Outer::Inner; return i;],
- ac_cv_cxx_namespaces=yes,
- ac_cv_cxx_namespaces=no)
- AC_LANG_RESTORE])
- if test "$ac_cv_cxx_namespaces" = yes; then
- AC_DEFINE(HAVE_NAMESPACES, 1, [define if the compiler implements namespaces])
- fi])
+[AC_CACHE_CHECK(whether the compiler implements namespaces,
+ac_cv_cxx_namespaces,
+[AC_LANG_SAVE
+ AC_LANG_CPLUSPLUS
+ AC_TRY_COMPILE([namespace Outer { namespace Inner { int i = 0; }}],
+ [using namespace Outer::Inner; return i;],
+ ac_cv_cxx_namespaces=yes, ac_cv_cxx_namespaces=no)
+ AC_LANG_RESTORE
+])
+if test "$ac_cv_cxx_namespaces" = yes; then
+ AC_DEFINE(HAVE_NAMESPACES,,[define if the compiler implements namespaces])
+fi
+])
# Figures out where hash_set is defined, and then writes out the
# location to the file specified in $1. The output file also
diff --git a/configure b/configure
index 421ada1..339ad8f 100755
--- a/configure
+++ b/configure
@@ -1,6 +1,6 @@
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.57 for google-perftools 0.3.
+# Generated by GNU Autoconf 2.57 for google-perftools 0.4.
#
# Report bugs to <opensource@google.com>.
#
@@ -422,8 +422,8 @@ SHELL=${CONFIG_SHELL-/bin/sh}
# Identity of this package.
PACKAGE_NAME='google-perftools'
PACKAGE_TARNAME='google-perftools'
-PACKAGE_VERSION='0.3'
-PACKAGE_STRING='google-perftools 0.3'
+PACKAGE_VERSION='0.4'
+PACKAGE_STRING='google-perftools 0.4'
PACKAGE_BUGREPORT='opensource@google.com'
ac_unique_file="README"
@@ -953,7 +953,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
-\`configure' configures google-perftools 0.3 to adapt to many kinds of systems.
+\`configure' configures google-perftools 0.4 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
@@ -1019,7 +1019,7 @@ fi
if test -n "$ac_init_help"; then
case $ac_init_help in
- short | recursive ) echo "Configuration of google-perftools 0.3:";;
+ short | recursive ) echo "Configuration of google-perftools 0.4:";;
esac
cat <<\_ACEOF
@@ -1125,7 +1125,7 @@ fi
test -n "$ac_init_help" && exit 0
if $ac_init_version; then
cat <<\_ACEOF
-google-perftools configure 0.3
+google-perftools configure 0.4
generated by GNU Autoconf 2.57
Copyright 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001, 2002
@@ -1140,7 +1140,7 @@ cat >&5 <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
-It was created by google-perftools $as_me 0.3, which was
+It was created by google-perftools $as_me 0.4, which was
generated by GNU Autoconf 2.57. Invocation command line was
$ $0 $@
@@ -1733,7 +1733,7 @@ fi
# Define the identity of the package.
PACKAGE=google-perftools
- VERSION=0.3
+ VERSION=0.4
cat >>confdefs.h <<_ACEOF
@@ -21090,21 +21090,20 @@ if test "${ac_cv_cxx_namespaces+set}" = set; then
else
- ac_ext=cc
+ ac_ext=cc
ac_cpp='$CXXCPP $CPPFLAGS'
ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5'
ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
- cat >conftest.$ac_ext <<_ACEOF
+ cat >conftest.$ac_ext <<_ACEOF
#line $LINENO "configure"
/* confdefs.h. */
_ACEOF
cat confdefs.h >>conftest.$ac_ext
cat >>conftest.$ac_ext <<_ACEOF
/* end confdefs.h. */
-namespace Outer {
- namespace Inner { int i = 0; }}
+namespace Outer { namespace Inner { int i = 0; }}
int
main ()
{
@@ -21133,22 +21132,24 @@ sed 's/^/| /' conftest.$ac_ext >&5
ac_cv_cxx_namespaces=no
fi
rm -f conftest.$ac_objext conftest.$ac_ext
- ac_ext=c
+ ac_ext=c
ac_cpp='$CPP $CPPFLAGS'
ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
fi
echo "$as_me:$LINENO: result: $ac_cv_cxx_namespaces" >&5
echo "${ECHO_T}$ac_cv_cxx_namespaces" >&6
- if test "$ac_cv_cxx_namespaces" = yes; then
+if test "$ac_cv_cxx_namespaces" = yes; then
cat >>confdefs.h <<\_ACEOF
-#define HAVE_NAMESPACES 1
+#define HAVE_NAMESPACES
_ACEOF
- fi
+fi
+
echo "$as_me:$LINENO: checking what namespace STL code is in" >&5
echo $ECHO_N "checking what namespace STL code is in... $ECHO_C" >&6
if test "${ac_cv_cxx_stl_namespace+set}" = set; then
@@ -22015,7 +22016,7 @@ _ASBOX
} >&5
cat >&5 <<_CSEOF
-This file was extended by google-perftools $as_me 0.3, which was
+This file was extended by google-perftools $as_me 0.4, which was
generated by GNU Autoconf 2.57. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
@@ -22078,7 +22079,7 @@ _ACEOF
cat >>$CONFIG_STATUS <<_ACEOF
ac_cs_version="\\
-google-perftools config.status 0.3
+google-perftools config.status 0.4
configured by $0, generated by GNU Autoconf 2.57,
with options \\"`echo "$ac_configure_args" | sed 's/[\\""\`\$]/\\\\&/g'`\\"
diff --git a/configure.ac b/configure.ac
index 245d166..7da5e86 100644
--- a/configure.ac
+++ b/configure.ac
@@ -5,7 +5,7 @@
# make sure we're interpreted by some minimal autoconf
AC_PREREQ(2.57)
-AC_INIT(google-perftools, 0.3, opensource@google.com)
+AC_INIT(google-perftools, 0.4, opensource@google.com)
# The argument here is just something that should be in the current directory
# (for sanity checking)
AC_CONFIG_SRCDIR(README)
diff --git a/doc/cpu_profiler.html b/doc/cpu_profiler.html
index ad0e9fd..bc18940 100644
--- a/doc/cpu_profiler.html
+++ b/doc/cpu_profiler.html
@@ -45,17 +45,13 @@ given run of an executable:</p>
profile-filename as an argument.
</ol>
-<p>Profiling works correctly with threads. To use, just call
-ProfilerRegisterThread() at the beginning of the routine the thread
-runs. Profiling also works correctly with sub-processes: each child
+<p>In Linux 2.6 and above, profiling works correctly with threads,
+automatically profiling all threads. In Linux 2.4, profiling only
+profiles the main thread (due to a kernel bug involving itimers and
+threads). Profiling works correctly with sub-processes: each child
process gets its own profile with its own name (generated by combining
CPUPROFILE with the child's process id).</p>
-<p>You can also turn profiling on and off throughout the code, and do
-other tweaks. This functionality will not frequently be needed. See
-/usr/local/include/google/profiler.h (or src/google/profiler.h in this
-directory) for more details.</p>
-
<p>For security reasons, CPU profiling will not write to a file -- and
is thus not usable -- for setuid programs.</p>
@@ -68,12 +64,6 @@ profile.</p>
<table frame=box rules=sides cellpadding=5 width=100%>
<tr>
-<td><code>PROFILESELECTED=1</code></td>
- <td>If set, cpu-profiler will only profile regions of code
- surrounded with
- <code>ProfilerEnable()</code>/<code>ProfilerDisable()</code>.
- </td>
-</tr><tr>
<td><code>PROFILEFREQUENCY=<i>x</i></code></td>
<td>How many interrupts/second the cpu-profiler samples.
</td>
diff --git a/src/base/elfcore.c b/src/base/elfcore.c
deleted file mode 100644
index d7bce9a..0000000
--- a/src/base/elfcore.c
+++ /dev/null
@@ -1,1046 +0,0 @@
-/* Copyright (c) 2005, Google Inc.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following disclaimer
- * in the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Google Inc. nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * ---
- * Author: Markus Gutschke
- */
-
-#include "base/elfcore.h"
-#if defined DUMPER
-
-#include <elf.h>
-#include <errno.h>
-#include <fcntl.h>
-#include <limits.h>
-#include <linux/unistd.h>
-#include <pthread.h>
-#include <signal.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/prctl.h>
-#include <sys/ptrace.h>
-#include <sys/resource.h>
-#include <sys/socket.h>
-#include <sys/stat.h>
-#include <sys/sysctl.h>
-#include <sys/time.h>
-#include <sys/types.h>
-#include <sys/uio.h>
-#include <sys/wait.h>
-#include <unistd.h>
-
-#include "base/thread_lister.h"
-
-/* Definitions missing from the standard header files */
-#ifndef NT_PRFPXREG
-#define NT_PRFPXREG 20
-#endif
-#ifndef PTRACE_GETFPXREGS
-#define PTRACE_GETFPXREGS ((enum __ptrace_request)18)
-#endif
-#ifndef PR_GET_DUMPABLE
-#define PR_GET_DUMPABLE 3
-#endif
-#ifndef PR_SET_DUMPABLE
-#define PR_SET_DUMPABLE 4
-#endif
-
-
-/* Data structures found in x86-32/64 core dumps on Linux; similar data
- * structures are defined in /usr/include/{linux,asm}/... but those
- * headers conflict with the rest of the libc headers. So we cannot
- * include them here.
- */
-
-typedef struct i386_fpxregs { /* SSE registers */
- uint16_t cwd;
- uint16_t swd;
- uint16_t twd;
- uint16_t fop;
- uint32_t fip;
- uint32_t fcs;
- uint32_t foo;
- uint32_t fos;
- uint32_t mxcsr;
- uint32_t mxcsr_mask;
- uint32_t st_space[32]; /* 8*16 bytes for each FP-reg = 128 bytes */
- uint32_t xmm_space[64]; /* 16*16 bytes for each XMM-reg = 128 bytes */
- uint32_t padding[24];
-} i386_fpxregs;
-
-
-#ifdef __x86_64__
-/* Linux on x86-64 stores all FPU registers in the SSE structure */
-typedef i386_fpxregs i386_fpregs;
-#else
-typedef struct i386_fpregs { /* FPU registers */
- uint32_t cwd;
- uint32_t swd;
- uint32_t twd;
- uint32_t fip;
- uint32_t fcs;
- uint32_t foo;
- uint32_t fos;
- uint32_t st_space[20]; /* 8*10 bytes for each FP-reg = 80 bytes */
-} i386_fpregs;
-#endif
-
-
-typedef struct i386_timeval { /* Time value with microsecond resolution */
- long tv_sec; /* Seconds */
- long tv_usec; /* Microseconds */
-} i386_timeval;
-
-
-typedef struct i386_siginfo { /* Information about signal (unused) */
- int32_t si_signo; /* Signal number */
- int32_t si_code; /* Extra code */
- int32_t si_errno; /* Errno */
-} i386_siginfo;
-
-
-typedef struct i386_prstatus { /* Information about thread; includes CPU reg*/
- struct i386_siginfo pr_info; /* Info associated with signal */
- uint16_t pr_cursig; /* Current signal */
- unsigned long pr_sigpend; /* Set of pending signals */
- unsigned long pr_sighold; /* Set of held signals */
- pid_t pr_pid; /* Process ID */
- pid_t pr_ppid; /* Parent's process ID */
- pid_t pr_pgrp; /* Group ID */
- pid_t pr_sid; /* Session ID */
- i386_timeval pr_utime; /* User time */
- i386_timeval pr_stime; /* System time */
- i386_timeval pr_cutime; /* Cumulative user time */
- i386_timeval pr_cstime; /* Cumulative system time */
- i386_regs pr_reg; /* CPU registers */
- uint32_t pr_fpvalid; /* True if math co-processor being used */
-} i386_prstatus;
-
-
-typedef struct i386_prpsinfo { /* Information about process */
- unsigned char pr_state; /* Numeric process state */
- char pr_sname; /* Char for pr_state */
- unsigned char pr_zomb; /* Zombie */
- signed char pr_nice; /* Nice val */
- unsigned long pr_flag; /* Flags */
-#ifdef __x86_64__
- uint32_t pr_uid; /* User ID */
- uint32_t pr_gid; /* Group ID */
-#else
- uint16_t pr_uid; /* User ID */
- uint16_t pr_gid; /* Group ID */
-#endif
- pid_t pr_pid; /* Process ID */
- pid_t pr_ppid; /* Parent's process ID */
- pid_t pr_pgrp; /* Group ID */
- pid_t pr_sid; /* Session ID */
- char pr_fname[16]; /* Filename of executable */
- char pr_psargs[80]; /* Initial part of arg list */
-} i386_prpsinfo;
-
-
-typedef struct i386_user { /* Ptrace returns this data for thread state */
- i386_regs regs; /* CPU registers */
- unsigned long fpvalid; /* True if math co-processor being used */
- i386_fpregs fpregs; /* FPU registers */
- unsigned long tsize; /* Text segment size in pages */
- unsigned long dsize; /* Data segment size in pages */
- unsigned long ssize; /* Stack segment size in pages */
- unsigned long start_code; /* Starting virtual address of text */
- unsigned long start_stack; /* Starting virtual address of stack area */
- unsigned long signal; /* Signal that caused the core dump */
- unsigned long reserved; /* No longer used */
- i386_regs *regs_ptr; /* Used by gdb to help find the CPU registers*/
- i386_fpregs *fpregs_ptr; /* Pointer to FPU registers */
- unsigned long magic; /* Magic for old A.OUT core files */
- char comm[32]; /* User command that was responsible */
- unsigned long debugreg[8];
- unsigned long error_code; /* CPU error code or 0 */
- unsigned long fault_address; /* CR3 or 0 */
-} i386_user;
-
-
-#ifdef __x86_64__
- #define ELF_CLASS ELFCLASS64
- #define ELF_ARCH EM_X86_64
- #define Ehdr Elf64_Ehdr
- #define Phdr Elf64_Phdr
- #define Shdr Elf64_Shdr
- #define Nhdr Elf64_Nhdr
-#else
- #define ELF_CLASS ELFCLASS32
- #define ELF_ARCH EM_386
- #define Ehdr Elf32_Ehdr
- #define Phdr Elf32_Phdr
- #define Shdr Elf32_Shdr
- #define Nhdr Elf32_Nhdr
-#endif
-
-
-/* After forking, we must make sure to only call system calls. */
-#if __BOUNDED_POINTERS__
- #error "Need to port invocations of syscalls for bounded ptrs"
-#else
- /* The code in this file gets executed after threads have been suspended.
- * As a consequence, we cannot call any functions that acquire locks.
- * Unfortunately, libc wraps most system calls (e.g. in order to implement
- * pthread_atfork, and to make calls cancellable), which means we cannot
- * call these functions. Instead, we have to call syscall() directly.
- */
- #include <stdarg.h>
- #include <syscall.h>
- #ifdef __x86_64__
- #define sys_recvmsg(s,m,f) syscall(SYS_recvmsg, (s), (m), (f))
- #define sys_sendmsg(s,m,f) syscall(SYS_sendmsg, (s), (m), (f))
- #define sys_shutdown(s,h) syscall(SYS_shutdown, (s), (h))
- #define sys_sigaction(s,a,o) syscall(SYS_rt_sigaction, (s), (a),(o),\
- _NSIG/8)
- #define sys_sigprocmask(h,s,o) syscall(SYS_rt_sigprocmask, (h), (s),(o),\
- _NSIG/8)
- #define sys_socketpair(d,t,p,s) syscall(SYS_socketpair, (d), (t), (p),(s))
- #define sys_waitpid(p,s,o) syscall(SYS_wait4, (p), (s), (o),(void *)0)
- #else
- static int sys_socketcall(int op, ...) {
- int rc;
- va_list ap;
- va_start(ap, op);
- rc = syscall(SYS_socketcall, op, ap);
- va_end(ap);
- return rc;
- }
- #define sys_recvmsg(s,m,f) sys_socketcall(17, (s), (m), (f))
- #define sys_sendmsg(s,m,f) sys_socketcall(16, (s), (m), (f))
- #define sys_shutdown(s,h) sys_socketcall(13, (s), (h))
- #define sys_sigaction(s,a,o) syscall(SYS_sigaction, (s), (a), (o))
- #define sys_sigprocmask(h,s,o) syscall(SYS_sigprocmask,(h), (s), (o))
- #define sys_socketpair(d,t,p,s) sys_socketcall(8, (d), (t), (p),(s))
- #define sys_waitpid(p,s,o) syscall(SYS_waitpid, (p), (s), (o))
- #endif
- #define sys_close(f) syscall(SYS_close, (f))
- #define sys_exit(r) syscall(SYS_exit, (r))
- #define sys_fork() syscall(SYS_fork)
- #define sys_getegid() syscall(SYS_getegid)
- #define sys_geteuid() syscall(SYS_geteuid)
- #define sys_getpgrp() syscall(SYS_getpgrp)
- #define sys_getpid() syscall(SYS_getpid)
- #define sys_getppid() syscall(SYS_getppid)
- #define sys_getpriority(a,b) syscall(SYS_getpriority)
- #define sys_getrlimit(r,l) syscall(SYS_getrlimit, (r), (l))
- #define sys_getsid(p) syscall(SYS_getsid, (p))
- #define sys_open(f,p,m) syscall(SYS_open, (f), (p), (m))
- #define sys_pipe(f) syscall(SYS_pipe, (f))
- #define sys_prctl(o,a) syscall(SYS_prctl, (o), (a))
- #define sys_ptrace(r,p,a,d) syscall(SYS_ptrace, (r), (p), (a),(d))
- #define sys_read(f,b,c) syscall(SYS_read, (f), (b), (c))
- #define sys_readlink(p,b,s) syscall(SYS_readlink, (p), (b), (s))
- #define sys_write(f,b,c) syscall(SYS_write, (f), (b), (c))
-
- static int sys_sysconf(int name) {
- extern int __getpagesize(void);
- switch (name) {
- case _SC_OPEN_MAX: {
- struct rlimit ru;
- return sys_getrlimit(RLIMIT_NOFILE, &ru) < 0 ? 8192 : ru.rlim_cur;
- }
- case _SC_PAGESIZE:
- return __getpagesize();
- default:
- errno = ENOSYS;
- return -1;
- }
- }
-
- static pid_t sys_gettid() {
- #ifndef SYS_gettid
- #define SYS_gettid 224
- #endif
- pid_t tid = syscall(SYS_gettid);
- if (tid != -1) {
- return tid;
- }
- return sys_getpid();
- }
-#endif
-
-
-/* Re-runs fn until it doesn't cause EINTR
- */
-
-#define NO_INTR(fn) do {} while ((fn) < 0 && errno == EINTR)
-
-/* Wrapper for read() which is guaranteed to never return EINTR.
- */
-static ssize_t c_read(int f, const void *buf, size_t bytes) {
- if (bytes > 0) {
- ssize_t rc;
- NO_INTR(rc = sys_read(f, buf, bytes));
- return rc;
- }
- return 0;
-}
-
-/* Wrapper for write() which is guaranteed to never return EINTR nor
- * short writes.
- */
-static ssize_t c_write(int f, const void *void_buf, size_t bytes) {
- const unsigned char *buf = (const unsigned char*)void_buf;
- size_t len = bytes;
- while (len > 0) {
- ssize_t rc;
- NO_INTR(rc = sys_write(f, buf, len));
- if (rc < 0)
- return rc;
- else if (rc == 0)
- break;
- buf += rc;
- len -= rc;
- }
- return bytes;
-}
-
-
-struct io {
- int fd;
- unsigned char *data, *end;
- unsigned char buf[4096];
-};
-
-
-/* Reads one character from the "io" file. This function has the same
- * semantics as fgetc(), but we cannot call any library functions at this
- * time.
- */
-static int GetChar(struct io *io) {
- unsigned char *ptr = io->data;
- if (ptr == io->end) {
- /* Even though we are parsing one character at a time, read in larger
- * chunks.
- */
- ssize_t n = c_read(io->fd, io->buf, sizeof(io->buf));
- if (n <= 0) {
- if (n == 0)
- errno = 0;
- return -1;
- }
- ptr = &io->buf[0];
- io->end = &io->buf[n];
- }
- io->data = ptr+1;
- return *ptr;
-}
-
-
-/* Place the hex number read from "io" into "*hex". The first non-hex
- * character is returned (or -1 in the case of end-of-file).
- */
-static int GetHex(struct io *io, size_t *hex) {
- int ch;
- *hex = 0;
- while (((ch = GetChar(io)) >= '0' && ch <= '9') ||
- (ch >= 'A' && ch <= 'F') || (ch >= 'a' && ch <= 'f'))
- *hex = (*hex << 4) | (ch < 'A' ? ch - '0' : (ch & 0xF) + 9);
- return ch;
-}
-
-
-/* Computes the amount of leading zeros in a memory region.
- */
-static size_t LeadingZeros(int *loopback, void *mem, size_t len,
- size_t pagesize) {
- char buf[pagesize];
- size_t count;
- char *ptr = 0;
- for (count = 0; count < len; ) {
- /* Read a page by going through the pipe. Assume that we can write at
- * least one page without blocking.
- *
- * "Normal" kernels do not require this hack. But some of the security
- * patches (e.g. grsec) can be configured to disallow read access of
- * executable pages. So, directly scanning the memory range would
- * result in a segmentation fault.
- *
- * If we cannot access a page, we assume that it was all zeros.
- */
- if ((count % pagesize) == 0) {
- if (c_write(loopback[1], (char *)mem + count, pagesize) < 0 ||
- c_read(loopback[0], buf, pagesize) < 0) {
- count += pagesize;
- continue;
- } else
- ptr = buf;
- }
- if (*ptr++)
- break;
- count++;
- }
- return count & ~(pagesize-1);
-}
-
-
-/* This function is invoked from a seperate process. It has access to a
- * copy-on-write copy of the parents address space, and all crucial
- * information about the parent has been computed by the caller.
- */
-static void CreateElfCore(int fd, i386_prpsinfo *prpsinfo, i386_user *user,
- i386_prstatus *prstatus, int num_threads,
- pid_t *pids, i386_regs *regs, i386_fpregs *fpregs,
- i386_fpxregs *fpxregs, size_t pagesize) {
- /* Count the number of mappings in "/proc/self/maps". We are guaranteed
- * that this number is not going to change while this function executes.
- */
- int num_mappings = 0;
- struct io io;
- int loopback[2] = { -1, -1 };
-
- if (sys_pipe(loopback) < 0)
- goto done;
-
- io.data = io.end = 0;
- NO_INTR(io.fd = sys_open("/proc/self/maps", O_RDONLY, 0));
- if (io.fd >= 0) {
- int i, ch;
- while ((ch = GetChar(&io)) >= 0) {
- num_mappings += (ch == '\n');
- }
- if (errno != 0) {
- read_error:
- NO_INTR(sys_close(io.fd));
- goto done;
- }
- NO_INTR(sys_close(io.fd));
-
- /* Read all mappings. This requires re-opening "/proc/self/maps" */
- /* scope */ {
- struct {
- size_t start_address, end_address, offset;
- int flags;
- } mappings[num_mappings];
- io.data = io.end = 0;
- NO_INTR(io.fd = sys_open("/proc/self/maps", O_RDONLY, 0));
- if (io.fd >= 0) {
- size_t note_align;
- /* Parse entries of the form:
- * "^[0-9A-F]*-[0-9A-F]* [r-][w-][x-][p-] [0-9A-F]*.*$"
- */
- for (i = 0; i < num_mappings;) {
- static const char * const dev_zero = "/dev/zero";
- const char *dev = dev_zero;
- int j, is_device;
- size_t zeros;
-
- memset(&mappings[i], 0, sizeof(mappings[i]));
-
- /* Read start and end addresses */
- if (GetHex(&io, &mappings[i].start_address) != '-' ||
- GetHex(&io, &mappings[i].end_address) != ' ')
- goto read_error;
-
- /* Read flags */
- while ((ch = GetChar(&io)) != ' ') {
- if (ch < 0)
- goto read_error;
- mappings[i].flags = (mappings[i].flags << 1) | (ch != '-');
- }
- /* Drop the private/shared bit. This makes the flags compatible with
- * the ELF access bits
- */
- mappings[i].flags >>= 1;
-
- /* Read offset */
- if ((ch = GetHex(&io, &mappings[i].offset)) != ' ')
- goto read_error;
-
- /* Skip over device numbers, and inode number */
- for (j = 0; j < 2; j++) {
- while (ch == ' ') {
- ch = GetChar(&io);
- }
- while (ch != ' ' && ch != '\n') {
- if (ch < 0)
- goto read_error;
- ch = GetChar(&io);
- }
- while (ch == ' ') {
- ch = GetChar(&io);
- }
- if (ch < 0)
- goto read_error;
- }
-
- /* Check whether this is a mapping for a device */
- while (*dev && ch == *dev) {
- ch = GetChar(&io);
- dev++;
- }
- is_device = dev >= dev_zero + 5 &&
- ((ch != '\n' && ch != ' ') || *dev != '\000');
-
- /* Skip until end of line */
- while (ch != '\n') {
- if (ch < 0)
- goto read_error;
- ch = GetChar(&io);
- }
-
- /* Skip leading zeroed pages (as found in the stack segment) */
- if ((mappings[i].flags & PF_R) && !is_device) {
- zeros = LeadingZeros(loopback, (void *)mappings[i].start_address,
- mappings[i].end_address - mappings[i].start_address,
- pagesize);
- mappings[i].start_address += zeros;
- }
-
- /* Remove mapping, if it was not readable, or completely zero
- * anyway. The former is usually the case of stack guard pages, and
- * the latter occasionally happens for unused memory.
- * Also, be careful not to touch mapped devices.
- */
- if ((mappings[i].flags & PF_R) == 0 ||
- mappings[i].start_address == mappings[i].end_address ||
- is_device) {
- num_mappings--;
- } else {
- i++;
- }
- }
- NO_INTR(sys_close(io.fd));
-
- /* Write out the ELF header */
- /* scope */ {
- Ehdr ehdr;
- memset(&ehdr, 0, sizeof(ehdr));
- ehdr.e_ident[0] = ELFMAG0;
- ehdr.e_ident[1] = ELFMAG1;
- ehdr.e_ident[2] = ELFMAG2;
- ehdr.e_ident[3] = ELFMAG3;
- ehdr.e_ident[4] = ELF_CLASS;
- ehdr.e_ident[5] = ELFDATA2LSB;
- ehdr.e_ident[6] = EV_CURRENT;
- ehdr.e_type = ET_CORE;
- ehdr.e_machine = ELF_ARCH;
- ehdr.e_version = EV_CURRENT;
- ehdr.e_phoff = sizeof(ehdr);
- ehdr.e_ehsize = sizeof(ehdr);
- ehdr.e_phentsize= sizeof(Phdr);
- ehdr.e_phnum = num_mappings + 1;
- ehdr.e_shentsize= sizeof(Shdr);
- if (c_write(fd, &ehdr, sizeof(ehdr)) != sizeof(ehdr)) {
- goto done;
- }
- }
-
- /* Write program headers, starting with the PT_NOTE entry */
- /* scope */ {
- Phdr phdr;
- size_t offset = sizeof(Ehdr) + (num_mappings + 1)*sizeof(Phdr);
- size_t filesz = sizeof(Nhdr) + 4 + sizeof(i386_prpsinfo) +
- sizeof(Nhdr) + 4 + sizeof(i386_user) +
- num_threads*(
- + sizeof(Nhdr) + 4 + sizeof(i386_prstatus)
- + sizeof(Nhdr) + 4 + sizeof(i386_fpregs));
- #ifndef __x86_64__
- if (fpxregs) {
- filesz += num_threads*(
- sizeof(Nhdr) + 4 + sizeof(i386_fpxregs));
- }
- #endif
- memset(&phdr, 0, sizeof(phdr));
- phdr.p_type = PT_NOTE;
- phdr.p_offset = offset;
- phdr.p_filesz = filesz;
- if (c_write(fd, &phdr, sizeof(phdr)) != sizeof(phdr)) {
- goto done;
- }
-
- /* Now follow with program headers for each of the memory segments */
- phdr.p_type = PT_LOAD;
- phdr.p_align = pagesize;
- phdr.p_paddr = 0;
- note_align = phdr.p_align - ((offset+filesz) % phdr.p_align);
- if (note_align == phdr.p_align)
- note_align = 0;
- offset += note_align;
- for (i = 0; i < num_mappings; i++) {
- offset += filesz;
- filesz = mappings[i].end_address -mappings[i].start_address;
- phdr.p_offset = offset;
- phdr.p_vaddr = mappings[i].start_address;
- phdr.p_memsz = filesz;
-
- /* Do not write contents for memory segments that are read-only */
- if ((mappings[i].flags & PF_W) == 0)
- filesz = 0;
- phdr.p_filesz = filesz;
- phdr.p_flags = mappings[i].flags;
- if (c_write(fd, &phdr, sizeof(phdr)) != sizeof(phdr)) {
- goto done;
- }
- }
- }
-
- /* Write note section */
- /* scope */ {
- Nhdr nhdr;
- memset(&nhdr, 0, sizeof(nhdr));
- nhdr.n_namesz = 4;
- nhdr.n_descsz = sizeof(i386_prpsinfo);
- nhdr.n_type = NT_PRPSINFO;
- if (c_write(fd, &nhdr, sizeof(nhdr)) != sizeof(nhdr) ||
- c_write(fd, "CORE", 4) != 4 ||
- c_write(fd, prpsinfo, sizeof(i386_prpsinfo)) !=
- sizeof(i386_prpsinfo)) {
- goto done;
- }
- nhdr.n_descsz = sizeof(i386_user);
- nhdr.n_type = NT_PRXREG;
- if (c_write(fd, &nhdr, sizeof(nhdr)) != sizeof(nhdr) ||
- c_write(fd, "CORE", 4) != 4 ||
- c_write(fd, user, sizeof(i386_user)) != sizeof(i386_user)) {
- goto done;
- }
-
- for (i = num_threads; i-- > 0; ) {
- /* Process status and integer registers */
- nhdr.n_descsz = sizeof(i386_prstatus);
- nhdr.n_type = NT_PRSTATUS;
- prstatus->pr_pid = pids[i];
- prstatus->pr_reg = regs[i];
- if (c_write(fd, &nhdr, sizeof(nhdr)) != sizeof(nhdr) ||
- c_write(fd, "CORE", 4) != 4 ||
- c_write(fd, prstatus, sizeof(i386_prstatus)) !=
- sizeof(i386_prstatus)) {
- goto done;
- }
-
- /* FPU registers */
- nhdr.n_descsz = sizeof(i386_fpregs);
- nhdr.n_type = NT_FPREGSET;
- if (c_write(fd, &nhdr, sizeof(nhdr)) != sizeof(nhdr) ||
- c_write(fd, "CORE", 4) != 4 ||
- c_write(fd, fpregs+1, sizeof(i386_fpregs)) !=
- sizeof(i386_fpregs)) {
- goto done;
- }
-
- /* SSE registers */
- #ifndef __x86_64__
- /* Linux on x86-64 stores all FPU registers in the SSE structure */
- if (fpxregs) {
- nhdr.n_descsz = sizeof(i386_fpxregs);
- nhdr.n_type = NT_PRFPXREG;
- if (c_write(fd, &nhdr, sizeof(nhdr)) != sizeof(nhdr) ||
- c_write(fd, "CORE", 4) != 4 ||
- c_write(fd, fpxregs+1, sizeof(i386_fpxregs)) !=
- sizeof(i386_fpxregs)) {
- goto done;
- }
- }
- #endif
- }
- }
-
- /* Align all following segments to multiples of page size */
- if (note_align) {
- char scratch[note_align];
- memset(scratch, 0, sizeof(scratch));
- if (c_write(fd, scratch, sizeof(scratch)) != sizeof(scratch)) {
- goto done;
- }
- }
-
- /* Write all memory segments */
- for (i = 0; i < num_mappings; i++) {
- if (mappings[i].flags & PF_W &&
- c_write(fd, (void *)mappings[i].start_address,
- mappings[i].end_address - mappings[i].start_address) !=
- mappings[i].end_address - mappings[i].start_address) {
- goto done;
- }
- }
- }
- }
- }
-
-done:
- if (loopback[0] >= 0)
- NO_INTR(sys_close(loopback[0]));
- if (loopback[1] >= 0)
- NO_INTR(sys_close(loopback[1]));
- NO_INTR(sys_close(fd));
- return;
-}
-
-
-/* Internal function for generating a core file. This function works for
- * both single- and multi-threaded core files. It assumes that all threads
- * are already suspended, and will resume them before returning.
- *
- * The caller must make sure that prctl(PR_SET_DUMPABLE, 1) has been called,
- * or this function might fail.
- */
-int InternalGetCoreDump(void *frame, int num_threads, pid_t *thread_pids) {
- long i;
- int rc = -1, fd = -1, threads = num_threads, hasSSE = 0;
- i386_prpsinfo prpsinfo;
- i386_prstatus prstatus;
- pid_t pids[threads + 1];
- i386_regs thread_regs[threads + 1];
- i386_fpregs thread_fpregs[threads + 1];
- i386_fpxregs thread_fpxregs[threads + 1];
- int pair[2];
- int main_pid = sys_gettid();
-
- /* Get thread status */
- if (threads)
- memcpy(pids, thread_pids, threads * sizeof(pid_t));
- memset(thread_regs, 0, (threads + 1) * sizeof(i386_regs));
- memset(thread_fpregs, 0, (threads + 1) * sizeof(i386_fpregs));
- memset(thread_fpxregs, 0, (threads + 1) * sizeof(i386_fpxregs));
-
- /* Threads are already attached, read their registers now */
- for (i = 0; i < threads; i++) {
- char scratch[4096];
- memset(scratch, 0xFF, sizeof(scratch));
- if (sys_ptrace(PTRACE_GETREGS, pids[i], scratch, scratch) == 0) {
- memcpy(thread_regs + i, scratch, sizeof(i386_regs));
- memset(scratch, 0xFF, sizeof(scratch));
- if (sys_ptrace(PTRACE_GETFPREGS, pids[i], scratch, scratch) == 0) {
- memcpy(thread_fpregs + i, scratch, sizeof(i386_fpregs));
- memset(scratch, 0xFF, sizeof(scratch));
- #ifndef __x86_64__
- /* Linux on x86-64 stores all FPU registers in the SSE structure */
- if (sys_ptrace(PTRACE_GETFPXREGS, pids[i], scratch, scratch) == 0) {
- memcpy(thread_fpxregs + i, scratch, sizeof(i386_fpxregs));
- } else {
- hasSSE = 0;
- }
- #endif
- } else
- goto ptrace;
- } else {
- ptrace: /* Oh, well, undo everything and get out of here */
- ResumeAllProcessThreads(threads, pids);
- goto error;
- }
- }
-
- /* Build the PRPSINFO data structure */
- memset(&prpsinfo, 0, sizeof(prpsinfo));
- prpsinfo.pr_sname = 'R';
- prpsinfo.pr_nice = sys_getpriority(PRIO_PROCESS, 0);
- prpsinfo.pr_uid = sys_geteuid();
- prpsinfo.pr_gid = sys_getegid();
- prpsinfo.pr_pid = main_pid;
- prpsinfo.pr_ppid = sys_getppid();
- prpsinfo.pr_pgrp = sys_getpgrp();
- prpsinfo.pr_sid = sys_getsid(0);
- /* scope */ {
- char scratch[4096], *cmd = scratch, *ptr;
- ssize_t size, len;
- int cmd_fd;
- memset(&scratch, 0, sizeof(scratch));
- size = sys_readlink("/proc/self/exe", scratch, sizeof(scratch));
- len = 0;
- for (ptr = cmd; *ptr != '\000' && size-- > 0; ptr++) {
- if (*ptr == '/') {
- cmd = ptr+1;
- len = 0;
- } else
- len++;
- }
- memcpy(prpsinfo.pr_fname, cmd,
- len > sizeof(prpsinfo.pr_fname) ? sizeof(prpsinfo.pr_fname) : len);
- NO_INTR(cmd_fd = sys_open("/proc/self/cmdline", O_RDONLY, 0));
- if (cmd_fd >= 0) {
- char *ptr;
- ssize_t size = c_read(cmd_fd, &prpsinfo.pr_psargs,
- sizeof(prpsinfo.pr_psargs));
- for (ptr = prpsinfo.pr_psargs; size-- > 0; ptr++)
- if (*ptr == '\000')
- *ptr = ' ';
- NO_INTR(sys_close(cmd_fd));
- }
- }
-
- /* Build the PRSTATUS data structure */
- /* scope */ {
- int stat_fd;
- memset(&prstatus, 0, sizeof(prstatus));
- prstatus.pr_pid = prpsinfo.pr_pid;
- prstatus.pr_ppid = prpsinfo.pr_ppid;
- prstatus.pr_pgrp = prpsinfo.pr_pgrp;
- prstatus.pr_sid = prpsinfo.pr_sid;
- prstatus.pr_fpvalid = 1;
- NO_INTR(stat_fd = sys_open("/proc/self/stat", O_RDONLY, 0));
- if (stat_fd >= 0) {
- char scratch[4096];
- ssize_t size = c_read(stat_fd, scratch, sizeof(scratch) - 1);
- if (size >= 0) {
- unsigned long tms;
- char *ptr = scratch;
- scratch[size] = '\000';
-
- /* User time */
- for (i = 13; i && *ptr; ptr++) if (*ptr == ' ') i--;
- tms = 0;
- while (*ptr && *ptr != ' ') tms = 10*tms + *ptr++ - '0';
- prstatus.pr_utime.tv_sec = tms / 1000;
- prstatus.pr_utime.tv_usec = (tms % 1000) * 1000;
-
- /* System time */
- if (*ptr) ptr++;
- tms = 0;
- while (*ptr && *ptr != ' ') tms = 10*tms + *ptr++ - '0';
- prstatus.pr_stime.tv_sec = tms / 1000;
- prstatus.pr_stime.tv_usec = (tms % 1000) * 1000;
-
- /* Cumulative user time */
- if (*ptr) ptr++;
- tms = 0;
- while (*ptr && *ptr != ' ') tms = 10*tms + *ptr++ - '0';
- prstatus.pr_cutime.tv_sec = tms / 1000;
- prstatus.pr_cutime.tv_usec = (tms % 1000) * 1000;
-
- /* Cumulative system time */
- if (*ptr) ptr++;
- tms = 0;
- while (*ptr && *ptr != ' ') tms = 10*tms + *ptr++ - '0';
- prstatus.pr_cstime.tv_sec = tms / 1000;
- prstatus.pr_cstime.tv_usec = (tms % 1000) * 1000;
-
- /* Pending signals */
- for (i = 14; i && *ptr; ptr++) if (*ptr == ' ') i--;
- while (*ptr && *ptr != ' ')
- prstatus.pr_sigpend = 10*prstatus.pr_sigpend + *ptr++ - '0';
-
- /* Held signals */
- if (*ptr) ptr++;
- while (*ptr && *ptr != ' ')
- prstatus.pr_sigpend = 10*prstatus.pr_sigpend + *ptr++ - '0';
- }
- NO_INTR(sys_close(stat_fd));
- }
- }
-
- /* Create a file descriptor that can be used for reading data from
- * our child process. This is a little complicated because we need
- * to make sure there is no race condition with other threads
- * calling fork() at the same time (this is somewhat mitigated,
- * because our threads are supposedly suspended at this time). We
- * have to avoid other processes holding our file handles open. We
- * can do this by creating the pipe in the child and passing the
- * file handle back to the parent.
- */
- if (sys_socketpair(AF_UNIX, SOCK_STREAM, 0, pair) >= 0) {
- int openmax = sys_sysconf(_SC_OPEN_MAX);
- int pagesize = sys_sysconf(_SC_PAGESIZE);
-
- /* Block signals prior to forking. Technically, POSIX requires us to call
- * pthread_sigmask(), if this is a threaded application. When using
- * glibc, we are OK calling sigprocmask(), though. We will end up
- * blocking additional signals that libpthread uses internally, but that
- * is actually exactly what we want.
- *
- * Also, POSIX claims that this should not actually be necessarily, but
- * reality says otherwise.
- */
- sigset_t old_signals, blocked_signals;
- sigfillset(&blocked_signals);
- sys_sigprocmask(SIG_BLOCK, &blocked_signals, &old_signals);
-
- /* Create a new core dump in child process; call sys_fork() in order to
- * avoid complications with pthread_atfork() handlers. In the child
- * process, we should only ever call system calls.
- */
- if ((rc = sys_fork()) == 0) {
- i386_user user;
- int fds[2];
-
- /* All signals are blocked at this time, but we could still end up
- * executing synchronous signals (such as SIGILL, SIGFPE, SIGSEGV,
- * SIGBUS, or SIGTRAP). Reset them to SIG_DFL.
- */
- static const int signals[] = { SIGABRT, SIGILL, SIGFPE, SIGSEGV, SIGBUS};
- for (i = 0; i < sizeof(signals)/sizeof(*signals); i++) {
- struct sigaction act;
- memset(&act, 0, sizeof(act));
- act.sa_handler = SIG_DFL;
- act.sa_flags = SA_RESTART;
- sys_sigaction(signals[i], &act, NULL);
- }
-
- /* Get parent's CPU registers, and user data structure */
- if (sys_ptrace(PTRACE_ATTACH, main_pid, (void *)0, (void *)0) >= 0) {
- char scratch[4096];
- while (sys_waitpid(main_pid, (void *)0, __WALL) < 0) {
- if (errno != EINTR)
- sys_exit(1);
- }
- for (i = 0; i < sizeof(user); i += sizeof(int))
- ((int *)&user)[i/sizeof(int)] = sys_ptrace(PTRACE_PEEKUSER,
- main_pid, (void *)i, (void *) i);
- memset(scratch, 0xFF, sizeof(scratch));
- if (sys_ptrace(PTRACE_GETREGS, main_pid, scratch, scratch) == 0) {
- memcpy(thread_regs + threads, scratch, sizeof(i386_regs));
- memset(scratch, 0xFF, sizeof(scratch));
- if (sys_ptrace(PTRACE_GETFPREGS, main_pid, scratch, scratch) == 0) {
- memcpy(thread_fpregs + threads, scratch, sizeof(i386_fpregs));
- memset(scratch, 0xFF, sizeof(scratch));
- #ifndef __x86_64__
- /* Linux on x86-64 stores all FPU regs in the SSE structure */
- if (sys_ptrace(PTRACE_GETFPXREGS,main_pid,scratch,scratch) == 0) {
- memcpy(thread_fpxregs +threads,scratch,sizeof(i386_fpxregs));
- } else {
- hasSSE = 0;
- }
- #endif
- } else
- sys_exit(1);
- } else
- sys_exit(1);
- } else
- sys_exit(1);
- sys_ptrace(PTRACE_DETACH, main_pid, (void *)0, (void *)0);
-
- /* Fake a somewhat reasonable looking stack frame for the
- * getCoreDump() function.
- */
- SET_FRAME(*(Frame *)frame, thread_regs[threads]);
- memcpy(&user.regs, thread_regs + threads, sizeof(i386_regs));
- pids[threads++] = main_pid;
-
- /* Create a pipe for communicating with parent */
- if (sys_pipe(fds) < 0)
- sys_exit(1);
-
- /* Pass file handle to parent */
- /* scope */ {
- char cmsg_buf[CMSG_SPACE(sizeof(int))];
- struct iovec iov;
- struct msghdr msg;
- struct cmsghdr *cmsg;
- memset(&iov, 0, sizeof(iov));
- memset(&msg, 0, sizeof(msg));
- iov.iov_base = (void *)"";
- iov.iov_len = 1;
- msg.msg_iov = &iov;
- msg.msg_iovlen = 1;
- msg.msg_control = &cmsg_buf;
- msg.msg_controllen = sizeof(cmsg_buf);
- cmsg = CMSG_FIRSTHDR(&msg);
- cmsg->cmsg_level = SOL_SOCKET;
- cmsg->cmsg_type = SCM_RIGHTS;
- cmsg->cmsg_len = CMSG_LEN(sizeof(int));
- *(int *)CMSG_DATA(cmsg) = fds[0];
- while (sys_sendmsg(pair[1], &msg, 0) < 0) {
- if (errno != EINTR)
- sys_exit(1);
- }
- while (sys_shutdown(pair[1], SHUT_RDWR) < 0) {
- if (errno != EINTR)
- sys_exit(1);
- }
- }
-
- /* Close all file handles other than the write end of our pipe */
- for (i = 0; i < openmax; i++)
- if (i != fds[1])
- NO_INTR(sys_close(i));
-
- /* Turn into a daemon process, so that "init" can reap us */
- if ((rc = sys_fork()) == 0) {
- CreateElfCore(fds[1], &prpsinfo, &user, &prstatus, threads,
- pids, thread_regs, thread_fpregs,
- hasSSE ? thread_fpxregs : NULL, pagesize);
- sys_exit(0);
- } else {
- sys_exit(rc < 0 ? 1 : 0);
- }
-
- /* Make the compiler happy. We never actually get here. */
- return 0;
- }
-
- /* In the parent */
- sys_sigprocmask(SIG_SETMASK, &old_signals, (void *)0);
- NO_INTR(sys_close(pair[1]));
-
- /* Get pipe file handle from child */
- /* scope */ {
- char buffer[1], cmsg_buf[CMSG_SPACE(sizeof(int))];
- struct iovec iov;
- struct msghdr msg;
- for (;;) {
- int nbytes;
- memset(&iov, 0, sizeof(iov));
- memset(&msg, 0, sizeof(msg));
- iov.iov_base = buffer;
- iov.iov_len = 1;
- msg.msg_iov = &iov;
- msg.msg_iovlen = 1;
- msg.msg_control = &cmsg_buf;
- msg.msg_controllen = sizeof(cmsg_buf);
- if ((nbytes = sys_recvmsg(pair[0], &msg, 0)) > 0) {
- struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg);
- if (cmsg != NULL && cmsg->cmsg_level == SOL_SOCKET &&
- cmsg->cmsg_type == SCM_RIGHTS)
- fd = *(int *)CMSG_DATA(cmsg);
- break;
- } else if (nbytes == 0 || errno != EINTR) {
- break;
- }
- }
- }
- sys_shutdown(pair[0], SHUT_RDWR);
- NO_INTR(sys_close(pair[0]));
- }
-
- ResumeAllProcessThreads(threads, pids);
-
- /* Wait for child to detach itself */
- if (rc > 0) {
- int status;
- while (sys_waitpid(rc, &status, 0) < 0) {
- if (errno != EINTR)
- goto error;
- }
- rc = WEXITSTATUS(status) ? -1 : 0;
- }
-
- /* Check if child process ran successfully */
- if (rc >= 0) {
- return fd;
- }
-
-error:
- if (fd > 0)
- NO_INTR(sys_close(fd));
- return -1;
-}
-#endif
diff --git a/src/base/elfcore.h b/src/base/elfcore.h
index 90dea58..34e40a7 100644
--- a/src/base/elfcore.h
+++ b/src/base/elfcore.h
@@ -37,8 +37,10 @@
/* We currently only support x86-32 and x86-64 on Linux. Porting to
* other related platforms should not be difficult.
*/
-#if (defined(__i386__) || defined(__x86_64__)) && defined(__linux)
+#if (defined(__i386__) || defined(__x86_64__) || defined(__ARM_ARCH_3__)) && \
+ defined(__linux)
+#include <stdarg.h>
#include <stdint.h>
#include <sys/types.h>
#include "config.h"
@@ -57,30 +59,40 @@
* core file.
*/
-typedef struct i386_regs { /* Normal (non-FPU) CPU registers */
-#ifdef __x86_64__
- #define BP rbp
- #define SP rsp
- #define IP rip
- uint64_t r15,r14,r13,r12,rbp,rbx,r11,r10;
- uint64_t r9,r8,rax,rcx,rdx,rsi,rdi,orig_rax;
- uint64_t rip,cs,eflags;
- uint64_t rsp,ss;
- uint64_t fs_base, gs_base;
- uint64_t ds,es,fs,gs;
-#else
- #define BP ebp
- #define SP esp
- #define IP eip
- uint32_t ebx, ecx, edx, esi, edi, ebp, eax;
- uint16_t ds, __ds, es, __es;
- uint16_t fs, __fs, gs, __gs;
- uint32_t orig_eax, eip;
- uint16_t cs, __cs;
- uint32_t eflags, esp;
- uint16_t ss, __ss;
+#if defined(__i386__) || defined(__x86_64__)
+ typedef struct i386_regs { /* Normal (non-FPU) CPU registers */
+ #ifdef __x86_64__
+ #define BP rbp
+ #define SP rsp
+ #define IP rip
+ uint64_t r15,r14,r13,r12,rbp,rbx,r11,r10;
+ uint64_t r9,r8,rax,rcx,rdx,rsi,rdi,orig_rax;
+ uint64_t rip,cs,eflags;
+ uint64_t rsp,ss;
+ uint64_t fs_base, gs_base;
+ uint64_t ds,es,fs,gs;
+ #else
+ #define BP ebp
+ #define SP esp
+ #define IP eip
+ uint32_t ebx, ecx, edx, esi, edi, ebp, eax;
+ uint16_t ds, __ds, es, __es;
+ uint16_t fs, __fs, gs, __gs;
+ uint32_t orig_eax, eip;
+ uint16_t cs, __cs;
+ uint32_t eflags, esp;
+ uint16_t ss, __ss;
+ #endif
+ } i386_regs;
+#elif defined(__ARM_ARCH_3__)
+ typedef struct arm_regs { /* General purpose registers */
+ #define BP uregs[11] /* Frame pointer */
+ #define SP uregs[13] /* Stack pointer */
+ #define IP uregs[15] /* Program counter */
+ #define LR uregs[14] /* Link register */
+ long uregs[18];
+ } arm_regs;
#endif
-} i386_regs;
#if defined(__i386__) && defined(__GNUC__)
/* On x86 we provide an optimized version of the FRAME() macro, if the
@@ -88,7 +100,7 @@ typedef struct i386_regs { /* Normal (non-FPU) CPU registers */
* more accurate values for CPU registers.
*/
typedef struct Frame {
- struct i386_regs regs;
+ struct i386_regs uregs;
int errno_;
} Frame;
#define FRAME(f) Frame f; \
@@ -135,7 +147,39 @@ typedef struct i386_regs { /* Normal (non-FPU) CPU registers */
#define SET_FRAME(f,r) \
do { \
errno = (f).errno_; \
- (r) = (f).regs; \
+ (r) = (f).uregs; \
+ } while (0)
+#elif defined(__ARM_ARCH_3__) && defined(__GNUC__)
+ /* ARM calling conventions are a little more tricky. A little assembly
+ * helps in obtaining an accurate snapshot of all registers.
+ */
+ typedef struct Frame {
+ struct arm_regs arm;
+ int errno_;
+ } Frame;
+ #define FRAME(f) Frame f; \
+ do { \
+ long cpsr; \
+ f.errno_ = errno; \
+ __asm__ volatile( \
+ "stmia %0, {r0-r15}\n" /* All integer regs */\
+ : : "r"(&f.arm) : "memory"); \
+ f.arm.uregs[16] = 0; \
+ __asm__ volatile( \
+ "mrs %0, cpsr\n" /* Condition code reg */\
+ : "=r"(cpsr)); \
+ f.arm.uregs[17] = cpsr; \
+ } while (0)
+ #define SET_FRAME(f,r) \
+ do { \
+ /* Don't override the FPU status register. */\
+ /* Use the value obtained from ptrace(). This*/\
+ /* works, because our code does not perform */\
+ /* any FPU operations, itself. */\
+ long fps = (f).arm.uregs[16]; \
+ errno = (f).errno_; \
+ (r) = (f).arm; \
+ (r).uregs[16] = fps; \
} while (0)
#else
/* If we do not have a hand-optimized assembly version of the FRAME()
@@ -179,7 +223,7 @@ typedef struct i386_regs { /* Normal (non-FPU) CPU registers */
* dumps. If called as
*
* FRAME(frame);
- * InternalGetCoreDump(&frame, 0, NULL);
+ * InternalGetCoreDump(&frame, 0, NULL, ap);
*
* it creates a core file that only contains information about the
* calling thread.
@@ -205,7 +249,11 @@ typedef struct i386_regs { /* Normal (non-FPU) CPU registers */
* threaded environment, but it is ultimately the caller's responsibility
* to provide locking.
*/
-int InternalGetCoreDump(void *frame, int num_threads, pid_t *thread_pids);
+int InternalGetCoreDump(void *frame, int num_threads, pid_t *thread_pids,
+ va_list ap
+ /* const char *PATH,
+ const struct CoredumperCompressor *compressors,
+ const struct CoredumperCompressor **selected_comp */);
#endif
diff --git a/src/base/linux_syscall_support.h b/src/base/linux_syscall_support.h
new file mode 100644
index 0000000..1fa081c
--- /dev/null
+++ b/src/base/linux_syscall_support.h
@@ -0,0 +1,381 @@
+/* Copyright (c) 2005, Google Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Google Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * ---
+ * Author: Markus Gutschke
+ */
+
+/* This file includes Linux-specific support functions common to the
+ * coredumper and the thread lister; primarily, this is a collection
+ * of direct system calls, and a couple of symbols missing from
+ * standard header files.
+ */
+#ifndef _LINUX_CORE_SUPPORT_H
+#define _LINUX_CORE_SUPPORT_H
+
+/* We currently only support x86-32 and x86-64 on Linux. Porting to
+ * other related platforms should not be difficult.
+ */
+#if (defined(__i386__) || defined(__x86_64__) || defined(__ARM_ARCH_3__)) && \
+ defined(__linux)
+
+#include <asm/posix_types.h>
+#include <asm/stat.h>
+#include <asm/types.h>
+#include <errno.h>
+#include <linux/dirent.h>
+#include <signal.h>
+#include <stdarg.h>
+#include <sys/ptrace.h>
+#include <sys/resource.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <syscall.h>
+#include <unistd.h>
+#include <linux/unistd.h>
+
+/* Definitions missing from the standard header files */
+#ifndef O_DIRECTORY
+#if defined(__ARM_ARCH_3__)
+#define O_DIRECTORY 0040000
+#else
+#define O_DIRECTORY 0200000
+#endif
+#endif
+#ifndef NT_PRFPXREG
+#define NT_PRFPXREG 20
+#endif
+#ifndef PTRACE_GETFPXREGS
+#define PTRACE_GETFPXREGS ((enum __ptrace_request)18)
+#endif
+#ifndef PR_GET_DUMPABLE
+#define PR_GET_DUMPABLE 3
+#endif
+#ifndef PR_SET_DUMPABLE
+#define PR_SET_DUMPABLE 4
+#endif
+#ifndef __NR_gettid
+#define __NR_gettid 224
+#endif
+
+
+/* After forking, we must make sure to only call system calls. */
+#if __BOUNDED_POINTERS__
+ #error "Need to port invocations of syscalls for bounded ptrs"
+#else
+ /* The core dumper and the thread lister get executed after threads
+ * have been suspended. As a consequence, we cannot call any functions
+ * that acquire locks. Unfortunately, libc wraps most system calls
+ * (e.g. in order to implement pthread_atfork, and to make calls
+ * cancellable), which means we cannot call these functions. Instead,
+ * we have to call syscall() directly.
+ */
+ #if defined(__i386__)
+ /* In PIC mode (e.g. when building shared libraries), gcc for i386
+ * reserves ebx. Unfortunately, most distribution ship with implementations
+ * of _syscallX() which clobber ebx.
+ * Also, most definitions of _syscallX() neglect to mark "memory" as being
+ * clobbered. This causes problems with compilers, that do a better job
+ * at optimizing across __asm__ calls.
+ * So, we just have to redefine all of the _syscallX() macros.
+ */
+ #define BODY(type,args...) \
+ long __res; \
+ __asm__ __volatile__("push %%ebx\n" \
+ "movl %2,%%ebx\n" \
+ "int $0x80\n" \
+ "pop %%ebx" \
+ args \
+ : "memory"); \
+ __syscall_return(type,__res)
+ #undef _syscall0
+ #define _syscall0(type,name) \
+ type name(void) { \
+ long __res; \
+ __asm__ volatile("int $0x80" \
+ : "=a" (__res) \
+ : "0" (__NR_##name) \
+ : "memory"); \
+ __syscall_return(type,__res); \
+ }
+ #undef _syscall1
+ #define _syscall1(type,name,type1,arg1) \
+ type name(type1 arg1) { \
+ BODY(type, \
+ : "=a" (__res) \
+ : "0" (__NR_##name),"r" ((long)(arg1))); \
+ }
+ #undef _syscall2
+ #define _syscall2(type,name,type1,arg1,type2,arg2) \
+ type name(type1 arg1,type2 arg2) { \
+ BODY(type, \
+ : "=a" (__res) \
+ : "0" (__NR_##name),"r" ((long)(arg1)), "c" ((long)(arg2))); \
+ }
+ #undef _syscall3
+ #define _syscall3(type,name,type1,arg1,type2,arg2,type3,arg3) \
+ type name(type1 arg1,type2 arg2,type3 arg3) { \
+ BODY(type, \
+ : "=a" (__res) \
+ : "0" (__NR_##name),"r" ((long)(arg1)),"c" ((long)(arg2)), \
+ "d" ((long)(arg3))); \
+ }
+ #undef _syscall4
+ #define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4) \
+ type name (type1 arg1, type2 arg2, type3 arg3, type4 arg4) { \
+ BODY(type, \
+ : "=a" (__res) \
+ : "0" (__NR_##name),"r" ((long)(arg1)),"c" ((long)(arg2)), \
+ "d" ((long)(arg3)),"S" ((long)(arg4))); \
+ }
+ #elif defined(__ARM_ARCH_3__)
+ /* Most definitions of _syscallX() neglect to mark "memory" as being
+ * clobbered. This causes problems with compilers, that do a better job
+ * at optimizing across __asm__ calls.
+ * So, we just have to redefine all fo the _syscallX() macros.
+ */
+ #define REG(r,a) register long __r##r __asm__("r"#r) = (long)a
+ #define BODY(type,name,args...) \
+ register long __res_r0 __asm__("r0"); \
+ long __res; \
+ __asm__ __volatile__ (__syscall(name) \
+ : "=r"(__res_r0) : args : "lr", "memory"); \
+ __res = __res_r0; \
+ __syscall_return(type, __res)
+ #undef _syscall0
+ #define _syscall0(type, name) \
+ type name() { \
+ BODY(type, name); \
+ }
+ #undef _syscall1
+ #define _syscall1(type, name, type1, arg1) \
+ type name(type1 arg1) { \
+ REG(0, arg1); BODY(type, name, "r"(__r0)); \
+ }
+ #undef _syscall2
+ #define _syscall2(type, name, type1, arg1, type2, arg2) \
+ type name(type1 arg1, type2 arg2) { \
+ REG(0, arg1); REG(1, arg2); BODY(type, name, "r"(__r0), "r"(__r1)); \
+ }
+ #undef _syscall3
+ #define _syscall3(type, name, type1, arg1, type2, arg2, type3, arg3) \
+ type name(type1 arg1, type2 arg2, type3 arg3) { \
+ REG(0, arg1); REG(1, arg2); REG(2, arg3); \
+ BODY(type, name, "r"(__r0), "r"(__r1), "r"(__r2)); \
+ }
+ #undef _syscall4
+ #define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4) \
+ type name(type1 arg1, type2 arg2, type3 arg3, type4 arg4) { \
+ REG(0, arg1); REG(1, arg2); REG(2, arg3); REG(3, arg4); \
+ BODY(type, name, "r"(__r0), "r"(__r1), "r"(__r2), "r"(__r3)); \
+ }
+ #endif
+ #if defined(__x86_64__)
+ #define __NR_sys_recvmsg __NR_recvmsg
+ #define __NR_sys_sendmsg __NR_sendmsg
+ #define __NR_sys_shutdown __NR_shutdown
+ #define __NR_sys_rt_sigaction __NR_rt_sigaction
+ #define __NR_sys_rt_sigprocmask __NR_rt_sigprocmask
+ #define __NR_sys_socket __NR_socket
+ #define __NR_sys_socketpair __NR_socketpair
+ static inline _syscall3(int, sys_recvmsg, int, s,
+ struct msghdr*, m, int, f);
+ static inline _syscall3(int, sys_sendmsg, int, s,
+ const struct msghdr*, m, int, f);
+ static inline _syscall2(int, sys_shutdown, int, s,
+ int, h);
+ static inline _syscall4(int, sys_rt_sigaction, int, s,
+ const struct sigaction*, a,
+ struct sigaction*, o, int, c);
+ static inline _syscall4(int, sys_rt_sigprocmask, int, h,
+ const sigset_t*, s, sigset_t*, o, int, c);
+ static inline _syscall3(int, sys_socket, int, d,
+ int, t, int, p);
+ static inline _syscall4(int, sys_socketpair, int, d,
+ int, t, int, p, int*, s);
+ #define sys_sigaction(s,a,o) sys_rt_sigaction((s), (a), (o), \
+ (_NSIG+7)/8)
+ #define sys_sigprocmask(h,s,o) sys_rt_sigprocmask((h), (s),(o), \
+ (_NSIG+7)/8)
+ #endif
+ #if defined(__x86_64__) || defined(__ARM_ARCH_3__)
+ #define __NR_sys_wait4 __NR_wait4
+
+ static inline _syscall4(pid_t, sys_wait4, pid_t, p,
+ int*, s, int, o,
+ struct rusage*, r);
+
+ #define sys_waitpid(p,s,o) sys_wait4((p), (s), (o), 0)
+ #endif
+ #if defined(__i386__) || defined(__ARM_ARCH_3__)
+ #define __NR_sys_sigaction __NR_sigaction
+ #define __NR_sys_sigprocmask __NR_sigprocmask
+ #define __NR_sys__socketcall __NR_socketcall
+
+ static inline _syscall3(int, sys_sigaction, int, s,
+ const struct sigaction*, a, struct sigaction*, o);
+ static inline _syscall3(int, sys_sigprocmask, int, h,
+ const sigset_t*, s, sigset_t*, o);
+ static inline _syscall2(int, sys__socketcall, int, c,
+ va_list, a);
+ static inline int sys_socketcall(int op, ...) {
+ int rc;
+ va_list ap;
+ va_start(ap, op);
+ rc = sys__socketcall(op, ap);
+ va_end(ap);
+ return rc;
+ }
+ #define sys_recvmsg(s,m,f) sys_socketcall(17, (s), (m), (f))
+ #define sys_sendmsg(s,m,f) sys_socketcall(16, (s), (m), (f))
+ #define sys_shutdown(s,h) sys_socketcall(13, (s), (h))
+ #define sys_socket(d,t,p) sys_socketcall(1, (d), (t), (p))
+ #define sys_socketpair(d,t,p,s) sys_socketcall(8, (d), (t), (p),(s))
+ #endif
+ #if defined(__i386__)
+ #define __NR_sys_waitpid __NR_waitpid
+ static inline _syscall3(pid_t, sys_waitpid, pid_t, p,
+ int*, s, int, o);
+ #endif
+ #define __NR_sys_close __NR_close
+ #define __NR_sys_dup __NR_dup
+ #define __NR_sys_dup2 __NR_dup2
+ #define __NR_sys_execve __NR_execve
+ #define __NR_sys__exit __NR_exit
+ #define __NR_sys_fcntl __NR_fcntl
+ #define __NR_sys_fork __NR_fork
+ #define __NR_sys_fstat __NR_fstat
+ #define __NR_sys_getdents __NR_getdents
+ #define __NR_sys_getegid __NR_getegid
+ #define __NR_sys_geteuid __NR_geteuid
+ #define __NR_sys_getpgrp __NR_getpgrp
+ #define __NR_sys_getpid __NR_getpid
+ #define __NR_sys_getppid __NR_getppid
+ #define __NR_sys_getpriority __NR_getpriority
+ #define __NR_sys_getrlimit __NR_getrlimit
+ #define __NR_sys_getsid __NR_getsid
+ #define __NR__gettid __NR_gettid
+ #define __NR_sys_kill __NR_kill
+ #define __NR_sys_lseek __NR_lseek
+ #define __NR_sys_open __NR_open
+ #define __NR_sys_pipe __NR_pipe
+ #define __NR_sys_prctl __NR_prctl
+ #define __NR_sys_ptrace __NR_ptrace
+ #define __NR_sys_read __NR_read
+ #define __NR_sys_readlink __NR_readlink
+ #define __NR_sys_sched_yield __NR_sched_yield
+ #define __NR_sys_stat __NR_stat
+ #define __NR_sys_write __NR_write
+ static inline _syscall1(int, sys_close, int, f);
+ static inline _syscall1(int, sys_dup, int, f);
+ static inline _syscall2(int, sys_dup2, int, s,
+ int, d);
+ static inline _syscall3(int, sys_execve, const char*, f,
+ const char*const*,a,const char*const*, e);
+ static inline _syscall1(int, sys__exit, int, e);
+ static inline _syscall3(int, sys_fcntl, int, f,
+ int, c, long, a);
+ static inline _syscall0(pid_t, sys_fork);
+ static inline _syscall2(int, sys_fstat, int, f,
+ struct stat*, b);
+ static inline _syscall3(int, sys_getdents, int, f,
+ struct dirent*, d, int, c);
+ static inline _syscall0(gid_t, sys_getegid);
+ static inline _syscall0(uid_t, sys_geteuid);
+ static inline _syscall0(pid_t, sys_getpgrp);
+ static inline _syscall0(pid_t, sys_getpid);
+ static inline _syscall0(pid_t, sys_getppid);
+ static inline _syscall2(int, sys_getpriority, int, a,
+ int, b);
+ static inline _syscall2(int, sys_getrlimit, int, r,
+ struct rlimit*, l);
+ static inline _syscall1(pid_t, sys_getsid, pid_t, p);
+ static inline _syscall0(pid_t, _gettid);
+ static inline _syscall2(int, sys_kill, pid_t, p,
+ int, s);
+ static inline _syscall3(off_t, sys_lseek, int, f,
+ off_t, o, int, w);
+ static inline _syscall3(int, sys_open, const char*, p,
+ int, f, int, m);
+ static inline _syscall1(int, sys_pipe, int*, p);
+ static inline _syscall2(int, sys_prctl, int, o,
+ long, a);
+ static inline _syscall4(long, sys_ptrace, int, r,
+ pid_t, p, void *, a, void *, d);
+ static inline _syscall3(ssize_t, sys_read, int, f,
+ void *, b, size_t, c);
+ static inline _syscall3(int, sys_readlink, const char*, p,
+ char*, b, size_t, s);
+ static inline _syscall0(int, sys_sched_yield);
+ static inline _syscall2(int, sys_stat, const char*, f,
+ struct stat*, b);
+ static inline _syscall3(ssize_t, sys_write, int, f,
+ const void *, b, size_t, c);
+
+ static inline int sys_sysconf(int name) {
+ extern int __getpagesize(void);
+ switch (name) {
+ case _SC_OPEN_MAX: {
+ struct rlimit ru;
+ return sys_getrlimit(RLIMIT_NOFILE, &ru) < 0 ? 8192 : ru.rlim_cur;
+ }
+ case _SC_PAGESIZE:
+ return __getpagesize();
+ default:
+ errno = ENOSYS;
+ return -1;
+ }
+ }
+
+ static inline pid_t sys_gettid() {
+ pid_t tid = _gettid();
+ if (tid != -1) {
+ return tid;
+ }
+ return sys_getpid();
+ }
+
+ static inline void sys_ptrace_detach(pid_t pid) {
+ /* PTRACE_DETACH can sometimes forget to wake up the tracee and it
+ * then sends job control signals to the real parent, rather than to
+ * the tracer. We reduce the risk of this happening by starting a
+ * whole new time slice, and then quickly sending a SIGCONT signal
+ * right after detaching from the tracee.
+ */
+ sys_sched_yield();
+ sys_ptrace(PTRACE_DETACH, pid, (void *)0, (void *)0);
+ sys_kill(pid, SIGCONT);
+ }
+ #undef REG
+ #undef BODY
+#endif
+
+
+#endif
+#endif
diff --git a/src/base/linuxthreads.c b/src/base/linuxthreads.c
index adc1e8e..9e13e83 100644
--- a/src/base/linuxthreads.c
+++ b/src/base/linuxthreads.c
@@ -35,68 +35,15 @@
#ifdef THREADS
-#include <errno.h>
#include <fcntl.h>
#include <stdlib.h>
#include <string.h>
-#include <sys/prctl.h>
-#include <sys/ptrace.h>
-#include <sys/types.h>
#include <sys/socket.h>
-#include <sys/syscall.h>
#include <sys/wait.h>
-#include <unistd.h>
+#include "base/linux_syscall_support.h"
#include "base/thread_lister.h"
-#ifndef O_DIRECTORY
-#define O_DIRECTORY 0200000
-#endif
-
-#if __BOUNDED_POINTERS__
- #error "Need to port invocations of syscalls for bounded ptrs"
-#else
- /* (Most of) the code in this file gets executed after threads have been
- * suspended. As a consequence, we cannot call any functions that acquire
- * locks. Unfortunately, libc wraps most system calls (e.g. in order to
- * implement pthread_atfork, and to make calls cancellable), which means
- * we cannot call these functions. Instead, we have to call syscall()
- * directly.
- */
- #include <asm/stat.h>
- #include <asm/posix_types.h>
- #include <asm/types.h>
- #include <linux/dirent.h>
- #include <stdarg.h>
- #include <syscall.h>
- #ifdef __x86_64__
- #define sys_socket(d,t,p) syscall(SYS_socket, (d), (t), (p))
- #define sys_waitpid(p,s,o) syscall(SYS_wait4, (p), (s), (o), (void *)0)
- #else
- static int sys_socketcall(int op, ...) {
- int rc;
- va_list ap;
- va_start(ap, op);
- rc = syscall(SYS_socketcall, op, ap);
- va_end(ap);
- return rc;
- }
- #define sys_socket(d,t,p) sys_socketcall(1, (d), (t), (p))
- #define sys_waitpid(p,s,o) syscall(SYS_waitpid, (p), (s), (o))
- #endif
-
- #define sys_close(f) syscall(SYS_close, (f))
- #define sys_fcntl(f,c,a) syscall(SYS_fcntl, (f), (c), (a))
- #define sys_fstat(f,b) syscall(SYS_fstat, (f), (b))
- #define sys_getdents(f,d,c) syscall(SYS_getdents, (f), (d), (c))
- #define sys_getpid() syscall(SYS_getpid)
- #define sys_lseek(f,o,w) syscall(SYS_lseek, (f), (o), (w))
- #define sys_open(f,p,m) syscall(SYS_open, (f), (p), (m))
- #define sys_prctl(o,a) syscall(SYS_prctl, (o), (a))
- #define sys_ptrace(r,p,a,d) syscall(SYS_ptrace, (r), (p), (a), (d))
- #define sys_stat(f,b) syscall(SYS_stat, (f), (b))
-#endif
-
/* itoa() is not a standard function, and we cannot safely call printf()
* after suspending threads. So, we just implement our own copy. A
@@ -149,13 +96,15 @@ static int c_open(const char *fname, int flags, int mode) {
* 'callback' is supposed to do or arrange for ResumeAllProcessThreads.
* We return -1 on error and the return value of 'callback' on success.
*/
-int GetAllProcessThreads(void *parameter,
- GetAllProcessThreadsCallBack callback) {
- int marker = -1, proc = -1, dumpable = 1;
- int num_threads = 0, max_threads = 0;
- char marker_name[48], *marker_path;
- struct stat proc_sb, marker_sb;
- pid_t my_pid = sys_getpid();
+int ListAllProcessThreads(void *parameter,
+ ListAllProcessThreadsCallBack callback, ...) {
+ static const char *const proc_paths[] = { "/proc/self/task/", "/proc/", 0 };
+ const char *const *proc_path = proc_paths;
+ int marker = -1, proc = -1, dumpable = 1;
+ int num_threads = 0, max_threads = 0;
+ char marker_name[48], *marker_path;
+ struct stat proc_sb, marker_sb;
+ pid_t my_pid = sys_getpid();
/* Create "marker" that we can use to detect threads sharing the same
* address space and the same file handles. By setting the FD_CLOEXEC flag
@@ -186,9 +135,12 @@ int GetAllProcessThreads(void *parameter,
* a separate "task" directory. We check there first, and then fall back
* on the older naming convention if necessary.
*/
- if (((proc = c_open("/proc/self/task/", O_RDONLY|O_DIRECTORY, 0)) < 0 &&
- (proc = c_open("/proc/", O_RDONLY|O_DIRECTORY, 0)) < 0) ||
- sys_fstat(proc, &proc_sb) < 0)
+ if ((proc = c_open(*proc_path, O_RDONLY|O_DIRECTORY, 0)) < 0) {
+ if (*++proc_path != NULL)
+ continue;
+ goto failure;
+ }
+ if (sys_fstat(proc, &proc_sb) < 0)
goto failure;
/* Since we are suspending threads, we cannot call any libc functions that
@@ -251,7 +203,7 @@ int GetAllProcessThreads(void *parameter,
if (sys_stat(fname, &tmp_sb) >= 0 &&
marker_sb.st_dev == tmp_sb.st_dev &&
marker_sb.st_ino == tmp_sb.st_ino) {
- int i, j;
+ long i, j;
/* Found one of our threads, make sure it is no duplicate */
for (i = 0; i < num_threads; i++) {
@@ -282,7 +234,7 @@ int GetAllProcessThreads(void *parameter,
}
while (sys_waitpid(pid, (void *)0, __WALL) < 0) {
if (errno != EINTR) {
- sys_ptrace(PTRACE_DETACH, pid, (void *)0, (void *)0);
+ sys_ptrace_detach(pid);
goto next_entry;
}
}
@@ -293,7 +245,7 @@ int GetAllProcessThreads(void *parameter,
* show the "marker". This is probably a forked child
* process rather than a thread.
*/
- sys_ptrace(PTRACE_DETACH, pid, (void *)0, (void *)0);
+ sys_ptrace_detach(pid);
} else {
pids[num_threads++] = pid;
added_entries++;
@@ -304,19 +256,27 @@ int GetAllProcessThreads(void *parameter,
next_entry:;
}
}
- NO_INTR(sys_close(marker));
NO_INTR(sys_close(proc));
- /* Now we are ready to call the callback,
- * which takes care of resuming the threads for us.
+ /* If we failed to find any threads, try looking somewhere else in
+ * /proc. Maybe, threads are reported differently on this system.
*/
- result = callback(parameter, num_threads, pids);
-
- /* Restore the "dumpable" state of the process */
- if (!dumpable)
- sys_prctl(PR_SET_DUMPABLE, dumpable);
- return result;
-
+ if (num_threads > 1 || !*++proc_path) {
+ va_list ap;
+ NO_INTR(sys_close(marker));
+
+ /* Now we are ready to call the callback,
+ * which takes care of resuming the threads for us.
+ */
+ va_start(ap, callback);
+ result = callback(parameter, num_threads, pids, ap);
+ va_end(ap);
+
+ /* Restore the "dumpable" state of the process */
+ if (!dumpable)
+ sys_prctl(PR_SET_DUMPABLE, dumpable);
+ return result;
+ }
detach_threads:
/* Resume all threads prior to retrying the operation */
ResumeAllProcessThreads(num_threads, pids);
@@ -336,11 +296,11 @@ failure:
}
/* This function resumes the list of all linux threads that
- * GetAllProcessThreads pauses before giving to its callback.
+ * ListAllProcessThreads pauses before giving to its callback.
*/
void ResumeAllProcessThreads(int num_threads, pid_t *thread_pids) {
while (num_threads-- > 0) {
- sys_ptrace(PTRACE_DETACH, thread_pids[num_threads], (void *)0, (void *)0);
+ sys_ptrace_detach(thread_pids[num_threads]);
}
}
diff --git a/src/base/linuxthreads.h b/src/base/linuxthreads.h
index 636fd6c..a6d4298 100644
--- a/src/base/linuxthreads.h
+++ b/src/base/linuxthreads.h
@@ -40,7 +40,8 @@
/* We currently only support x86-32 and x86-64 on Linux. Porting to other
* related platforms should not be difficult.
*/
-#if (defined(__i386__) || defined(__x86_64__)) && defined(__linux)
+#if (defined(__i386__) || defined(__x86_64__) || defined(__ARM_ARCH_3__)) && \
+ defined(__linux)
/* Define the THREADS symbol to make sure that there is exactly one core dumper
* built into the library.
diff --git a/src/base/thread_lister.c b/src/base/thread_lister.c
index 8de404d..7eca594 100644
--- a/src/base/thread_lister.c
+++ b/src/base/thread_lister.c
@@ -43,9 +43,15 @@
* or if the multi-threading code has not been ported, yet.
*/
-int GetAllProcessThreads(void *parameter,
- GetAllProcessThreadsCallBack callback) {
- return callback(parameter, 0, NULL);
+int ListAllProcessThreads(void *parameter,
+ ListAllProcessThreadsCallBack callback, ...) {
+ int rc;
+ va_list ap;
+
+ va_start(ap, callback);
+ rc = callback(parameter, 0, NULL, ap);
+ va_end(ap);
+ return rc;
}
void ResumeAllProcessThreads(int num_threads, pid_t *thread_pids) {
diff --git a/src/base/thread_lister.h b/src/base/thread_lister.h
index 6bae064..49bf3bf 100644
--- a/src/base/thread_lister.h
+++ b/src/base/thread_lister.h
@@ -34,15 +34,17 @@
#ifndef _THREAD_LISTER_H
#define _THREAD_LISTER_H
+#include <stdarg.h>
#include <sys/types.h>
#ifdef __cplusplus
extern "C" {
#endif
-typedef int (*GetAllProcessThreadsCallBack)(void *parameter,
- int num_threads,
- pid_t *thread_pids);
+typedef int (*ListAllProcessThreadsCallBack)(void *parameter,
+ int num_threads,
+ pid_t *thread_pids,
+ va_list ap);
/* This function gets the list of all linux threads of the current process
* but this one and passes them to the 'callback' along with the 'parameter'
@@ -51,11 +53,11 @@ typedef int (*GetAllProcessThreadsCallBack)(void *parameter,
* 'callback' is supposed to do or arrange for ResumeAllProcessThreads.
* We return -1 on error and the return value of 'callback' on success.
*/
-int GetAllProcessThreads(void *parameter,
- GetAllProcessThreadsCallBack callback);
+int ListAllProcessThreads(void *parameter,
+ ListAllProcessThreadsCallBack callback, ...);
/* This function resumes the list of all linux threads that
- * GetAllProcessThreads pauses before giving to its callback.
+ * ListAllProcessThreads pauses before giving to its callback.
*/
void ResumeAllProcessThreads(int num_threads, pid_t *thread_pids);
diff --git a/src/google/heap-checker.h b/src/google/heap-checker.h
index ef6c343..c9607ca 100644
--- a/src/google/heap-checker.h
+++ b/src/google/heap-checker.h
@@ -484,9 +484,15 @@ class HeapLeakChecker {
static void DoMainHeapCheck();
// Type of task for UseProcMaps
- enum ProcMapsTask { RECORD_GLOBAL_DATA_LOCKED, DISABLE_LIBRARY_ALLOCS };
+ enum ProcMapsTask {
+ RECORD_GLOBAL_DATA_LOCKED,
+ DISABLE_LIBRARY_ALLOCS
+ };
+ // Success/Error Return codes for UseProcMaps.
+ enum ProcMapsResult { PROC_MAPS_USED, CANT_OPEN_PROC_MAPS,
+ NO_SHARED_LIBS_IN_PROC_MAPS };
// Read /proc/self/maps, parse it, and do the 'proc_maps_task' for each line.
- static void UseProcMaps(ProcMapsTask proc_maps_task);
+ static ProcMapsResult UseProcMaps(ProcMapsTask proc_maps_task);
// A ProcMapsTask to disable allocations from 'library'
// that is mapped to [start_address..end_address)
// (only if library is a certain system library).
diff --git a/src/google/malloc_extension.h b/src/google/malloc_extension.h
index 3de0955..e088154 100644
--- a/src/google/malloc_extension.h
+++ b/src/google/malloc_extension.h
@@ -79,6 +79,15 @@ class MallocExtension {
// contents of "*result" are preserved.
virtual void GetHeapSample(std::string* result);
+ // Get a string that contains the stack traces that caused growth in
+ // the addres sspace size. The format of the returned string is
+ // equivalent to the output of the heap profiler and can therefore
+ // be passed to "pprof".
+ //
+ // The generated data is *appended* to "*result". I.e., the old
+ // contents of "*result" are preserved.
+ virtual void GetHeapGrowthStacks(std::string* result);
+
// -------------------------------------------------------------------
// Control operations for getting and setting malloc implementation
// specific parameters. Some currently useful properties:
@@ -155,6 +164,10 @@ class MallocExtension {
// This is an internal extension. Callers should use the more
// convenient "GetHeapSample(string*)" method defined above.
virtual void** ReadStackTraces();
+
+ // Like ReadStackTraces(), but returns stack traces that caused growth
+ // in the address space size.
+ virtual void** ReadHeapGrowthStackTraces();
};
#endif // _GOOGLE_MALLOC_EXTENSION_H__
diff --git a/src/google/profiler.h b/src/google/profiler.h
index 249d7be..5eea0de 100644
--- a/src/google/profiler.h
+++ b/src/google/profiler.h
@@ -32,20 +32,28 @@
//
// Module for CPU profiling based on periodic pc-sampling.
//
-// To use this module, link it into your program. To activate it
-// at runtime, set the environment variable "CPUPROFILE" to be the
-// name of the file in which the profile data should be written.
-// (If you don't set the environment variable, no profiling will
-// happen, and the program should run without any slowdowns.)
+// To use this module, link it into your program. There should
+// be no slowdown caused by this unless you activate the profiler
+// using one of the steps given below.
//
-// Once you have done this, there are two ways to determine which
-// region(s) of code should be profiled:
+// To activate the profiler, do one of the following:
//
-// 1. If you set the "PROFILESELECTED" environment variable,
-// only regions of code that are surrounded with "ProfilerEnable()"
-// and "ProfilerDisable()" will be profiled.
-// 2. Otherwise, the main thread, and any thread that has had
-// ProfilerRegisterThread() called on it, will be profiled.
+// 1. Before starting the program, set the environment variable
+// "CPUPROFILE" to be the name of the file to which the profile
+// data should be written.
+//
+// 2. Programmatically, start and stop the profiler using
+// the routines "ProfilerStart(filename)" and "ProfilerStop()".
+//
+// All threads in the program are profiled whenever profiling is on.
+// There used to be a mechanism where a subset of the threads could be
+// profiled, but that functionality no longer exists (it would not
+// work correctly in new systems since the interval timer used by the
+// profiler is a per-address-space setting in new systems instead of
+// being a per-thread setting in 2.4 and earlier systems).
+//
+// Limitation: on 2.4 and earlier kernels, just the main thread will
+// be profiled.
//
// Use pprof to view the resulting profile output. If you have dot and
// gv installed, you can also get a graphical representation of CPU usage.
@@ -56,6 +64,8 @@
#ifndef _GOOGLE_PROFILER_H
#define _GOOGLE_PROFILER_H
+#include <time.h> // For time_t
+
// Start profiling and write profile info into fname.
extern bool ProfilerStart(const char* fname);
@@ -63,24 +73,35 @@ extern bool ProfilerStart(const char* fname);
// the currently accumulated profiling data will be cleared.
extern void ProfilerStop();
+// Flush any currently buffered profiling state to the profile file.
+// Has no effect if the profiler has not been started.
+extern void ProfilerFlush();
-// These functions have no effect if profiling has not been activated
-// globally (by specifying the "CPUPROFILE" environment variable or by
-// calling ProfilerStart() ).
-
-// Profile in the given thread. This is most usefully called when a
-// new thread is first entered. Note this may not work if
-// PROFILESELECTED is set.
-extern void ProfilerRegisterThread();
-// Turn profiling on and off, if PROFILESELECTED has been called.
+// DEPRECATED: these functions were used to enable/disable profiling
+// in the current thread, but no longer do anything.
extern void ProfilerEnable();
extern void ProfilerDisable();
-// Write out the current profile information to disk.
-extern void ProfilerFlush();
+// Returns true if profile is currently enabled
+extern bool ProfilingIsEnabledForAllThreads();
+
+// Routine for registering new threads with the profiler. This is
+// most usefully called when a new thread is first entered.
+extern void ProfilerRegisterThread();
+
+// Stores state about profiler's current status into "*state".
+struct ProfilerState {
+ bool enabled; // Is profiling currently enabled?
+ time_t start_time; // If enabled, when was profiling started?
+ char profile_name[1024]; // Name of profile file being written, or '\0'
+ int samples_gathered; // Number of samples gatheered to far (or 0)
+};
+extern void ProfilerGetCurrentState(ProfilerState* state);
// ------------------------- ProfilerThreadState -----------------------
+// DEPRECATED: this class is no longer needed.
+//
// A small helper class that allows a thread to periodically check if
// profiling has been enabled or disabled, and to react appropriately
// to ensure that activity in the current thread is included in the
@@ -92,15 +113,13 @@ extern void ProfilerFlush();
// profile_state.ThreadCheck();
// }
class ProfilerThreadState {
-public:
- ProfilerThreadState();
+ public:
+ ProfilerThreadState() { }
// Called in a thread to enable or disable profiling on the thread
// based on whether profiling is currently on or off.
- void ThreadCheck();
-
-private:
- bool was_enabled_; // True if profiling was on in our last call
+ // DEPRECATED: No longer needed
+ void ThreadCheck() { }
};
#endif /* _GOOGLE_PROFILER_H */
diff --git a/src/heap-checker.cc b/src/heap-checker.cc
index c0ea994..f081f97 100644
--- a/src/heap-checker.cc
+++ b/src/heap-checker.cc
@@ -451,12 +451,14 @@ static bool IsLibraryNamed(const char* library, const char* library_base) {
void HeapLeakChecker::DisableLibraryAllocs(const char* library,
void* start_address,
void* end_address) {
+ int depth = 0;
// TODO(maxim): maybe this should be extended to also use objdump
// and pick the text portion of the library more precisely.
if (IsLibraryNamed(library, "/libpthread") ||
- // pthread has a lot of small "system" leaks we don't care about
+ // libpthread has a lot of small "system" leaks we don't care about.
+ // In particular it allocates memory to store data supplied via
+ // pthread_setspecific (which can be the only pointer to a heap object).
IsLibraryNamed(library, "/libdl") ||
- IsLibraryNamed(library, "/ld") ||
// library loaders leak some "system" heap that we don't care about
IsLibraryNamed(library, "/libcrypto")
// Sometimes libcrypto of OpenSSH is compiled with -fomit-frame-pointer
@@ -464,16 +466,36 @@ void HeapLeakChecker::DisableLibraryAllocs(const char* library,
// is so important for making crypto usable). We ignore all its
// allocations because we can't see the call stacks.
) {
+ depth = 1; // only disable allocation calls directly from the library code
+ } else if (IsLibraryNamed(library, "/ld")
+ // library loader leaks some "system" heap
+ // (e.g. thread-local storage) that we don't care about
+ ) {
+ depth = 2; // disable allocation calls directly from the library code
+ // and at depth 2 from it.
+ // We need depth 2 here solely because of a libc bug that
+ // forces us to jump through __memalign_hook and MemalignOverride hoops
+ // in tcmalloc.cc.
+ // Those buggy __libc_memalign() calls are in ld-linux.so and happen for
+ // thread-local storage allocations that we want to ignore here.
+ // We go with the depth-2 hack as a workaround for this libc bug:
+ // otherwise we'd need to extend MallocHook interface
+ // so that correct stack depth adjustment can be propagated from
+ // the exceptional case of MemalignOverride.
+ // Using depth 2 here should not mask real leaks because ld-linux.so
+ // does not call user code.
+ }
+ if (depth) {
HeapProfiler::MESSAGE(1, "HeapChecker: "
- "Disabling direct allocations from %s :\n",
- library);
+ "Disabling allocations from %s at depth %d:\n",
+ library, depth);
DisableChecksFromTo(start_address, end_address,
- 1); // only disable allocation calls directly
- // from the library code
+ depth);
}
}
-void HeapLeakChecker::UseProcMaps(ProcMapsTask proc_maps_task) {
+HeapLeakChecker::ProcMapsResult
+HeapLeakChecker::UseProcMaps(ProcMapsTask proc_maps_task) {
FILE* const fp = fopen("/proc/self/maps", "r");
if (!fp) {
int errsv = errno;
@@ -481,27 +503,29 @@ void HeapLeakChecker::UseProcMaps(ProcMapsTask proc_maps_task) {
"Could not open /proc/self/maps: errno=%d. "
"Libraries will not be handled correctly.\n",
errsv);
- return;
+ return CANT_OPEN_PROC_MAPS;
}
char proc_map_line[1024];
+ bool saw_shared_lib = false;
while (fgets(proc_map_line, sizeof(proc_map_line), fp) != NULL) {
// All lines starting like
// "401dc000-4030f000 r??p 00132000 03:01 13991972 lib/bin"
// identify a data and code sections of a shared library or our binary
uint64 start_address, end_address, file_offset, inode;
int size;
- char permissions[5];
+ char permissions[5], *filename;
if (sscanf(proc_map_line, LLX"-"LLX" %4s "LLX" %*x:%*x "LLD" %n",
&start_address, &end_address, permissions,
&file_offset, &inode, &size) != 5) continue;
proc_map_line[strlen(proc_map_line) - 1] = '\0'; // zap the newline
+ filename = proc_map_line + size;
HeapProfiler::MESSAGE(4, "HeapChecker: "
"Looking at /proc/self/maps line:\n %s\n",
proc_map_line);
if (proc_maps_task == DISABLE_LIBRARY_ALLOCS &&
strncmp(permissions, "r-xp", 4) == 0 && inode != 0) {
if (start_address >= end_address) abort();
- DisableLibraryAllocs(proc_map_line + size,
+ DisableLibraryAllocs(filename,
reinterpret_cast<void*>(start_address),
reinterpret_cast<void*>(end_address));
}
@@ -514,8 +538,21 @@ void HeapLeakChecker::UseProcMaps(ProcMapsTask proc_maps_task) {
if (start_address >= end_address) abort();
RecordGlobalDataLocked(proc_map_line + size, start_address, file_offset);
}
+ // Determine if any shared libraries are present.
+ if (strstr(filename, "lib") && strstr(filename, ".so")) {
+ saw_shared_lib = true;
+ }
}
fclose(fp);
+
+ if (!saw_shared_lib) {
+ HeapProfiler::MESSAGE(-1, "HeapChecker: "
+ "No shared libs detected. "
+ "Will likely report false leak positives "
+ "for statically linked executables.\n");
+ return NO_SHARED_LIBS_IN_PROC_MAPS;
+ }
+ return PROC_MAPS_USED;
}
// Total number and size of live objects dropped from the profile.
@@ -527,11 +564,12 @@ static int64 live_bytes_total = 0;
static int last_num_threads = 0;
static pid_t* last_thread_pids = NULL;
-// Callback for GetAllProcessThreads to ignore
+// Callback for ListAllProcessThreads to ignore
// thread stacks and registers for all our threads.
static int IgnoreLiveThreads(void* parameter,
int num_threads,
- pid_t* thread_pids) {
+ pid_t* thread_pids,
+ va_list ap) {
last_num_threads = num_threads;
assert(last_thread_pids == NULL);
last_thread_pids = new pid_t[num_threads];
@@ -547,7 +585,7 @@ static int IgnoreLiveThreads(void* parameter,
i386_regs thread_regs;
#define sys_ptrace(r,p,a,d) syscall(SYS_ptrace, (r), (p), (a), (d))
// We use sys_ptrace to avoid thread locking
- // because this is called from GetAllProcessThreads
+ // because this is called from ListAllProcessThreads
// when all but this thread are suspended.
// (This does not seem to matter much though: allocations and
// logging with HeapProfiler::MESSAGE seem to work just fine.)
@@ -600,8 +638,8 @@ IgnoreAllLiveObjectsLocked(const StackExtent& self_stack) {
if (HeapProfiler::ignored_objects_) abort();
HeapProfiler::ignored_objects_ = new HeapProfiler::IgnoredObjectSet;
// Record global data as live:
- // We need to do it before we stop the threads in GetAllProcessThreads below;
- // otherwise deadlocks are possible
+ // We need to do it before we stop the threads in ListAllProcessThreads
+ // below; otherwise deadlocks are possible
// when we try to fork to execute objdump in UseProcMaps.
if (FLAGS_heap_check_ignore_global_live) {
library_live_objects = new LibraryLiveObjectsStacks;
@@ -613,7 +651,7 @@ IgnoreAllLiveObjectsLocked(const StackExtent& self_stack) {
// and keep them suspended for the whole time of liveness checking
// (they can't (de)allocate due to profiler's lock but they could still
// mess with the pointer graph while we walk it).
- int r = GetAllProcessThreads(NULL, IgnoreLiveThreads);
+ int r = ListAllProcessThreads(NULL, IgnoreLiveThreads);
if (r == -1) {
HeapProfiler::MESSAGE(0, "HeapChecker: Could not find thread stacks; "
"may get false leak reports\n");
@@ -1312,9 +1350,15 @@ void HeapLeakChecker::InternalInitStart(const string& heap_check_type) {
assert(heap_checker_pid == getpid());
heap_checker_on = true;
if (!HeapProfiler::is_on_) abort();
- UseProcMaps(DISABLE_LIBRARY_ALLOCS);
+ ProcMapsResult pm_result = UseProcMaps(DISABLE_LIBRARY_ALLOCS);
// might neeed to do this more than once
// if one later dynamically loads libraries that we want disabled
+ if (pm_result != HeapLeakChecker::PROC_MAPS_USED) {
+ heap_checker_on = false;
+ HeapProfiler::MESSAGE(0, "HeapChecker: Turning itself off\n");
+ HeapProfiler::StopForLeaks();
+ return;
+ }
// make a good place and name for heap profile leak dumps
profile_prefix = new string(dump_directory());
diff --git a/src/heap-profiler.cc b/src/heap-profiler.cc
index bfee34d..45eb908 100644
--- a/src/heap-profiler.cc
+++ b/src/heap-profiler.cc
@@ -134,6 +134,7 @@ void HeapProfiler::MESSAGE(int level, const char* format, ...) {
va_start(ap, format);
char buf[600];
vsnprintf(buf, sizeof(buf), format, ap);
+ va_end(ap);
write(STDERR_FILENO, buf, strlen(buf));
}
diff --git a/src/internal_logging.cc b/src/internal_logging.cc
index 16b040e..8c403c5 100644
--- a/src/internal_logging.cc
+++ b/src/internal_logging.cc
@@ -42,6 +42,7 @@ void TCMalloc_MESSAGE(const char* format, ...) {
va_start(ap, format);
char buf[800];
vsnprintf(buf, sizeof(buf), format, ap);
+ va_end(ap);
write(STDERR_FILENO, buf, strlen(buf));
}
@@ -50,6 +51,7 @@ void TCMalloc_Printer::printf(const char* format, ...) {
va_list ap;
va_start(ap, format);
const int r = vsnprintf(buf_, left_, format, ap);
+ va_end(ap);
if (r < 0) {
// Perhaps an old glibc that returns -1 on truncation?
left_ = 0;
diff --git a/src/internal_spinlock.h b/src/internal_spinlock.h
index 2015763..79c1279 100644
--- a/src/internal_spinlock.h
+++ b/src/internal_spinlock.h
@@ -45,7 +45,7 @@
#endif
#include <stdlib.h> /* for abort() */
-#if defined __i386__ && defined __GNUC__
+#if (defined __i386__ || defined __x86_64__) && defined __GNUC__
static void TCMalloc_SlowLock(volatile unsigned int* lockword);
diff --git a/src/malloc_extension.cc b/src/malloc_extension.cc
index 8ca58a7..1a42e6e 100644
--- a/src/malloc_extension.cc
+++ b/src/malloc_extension.cc
@@ -108,6 +108,10 @@ void** MallocExtension::ReadStackTraces() {
return NULL;
}
+void** MallocExtension::ReadHeapGrowthStackTraces() {
+ return NULL;
+}
+
// The current malloc extension object. We also keep a pointer to
// the default implementation so that the heap-leak checker does not
// complain about a memory leak.
@@ -178,9 +182,37 @@ struct StackTraceEqual {
typedef HASH_NAMESPACE::hash_set<void**, StackTraceHash, StackTraceEqual> StackTraceTable;
-void DebugStringWriter(const char* str, void* arg) {
- string* result = reinterpret_cast<string*>(arg);
- *result += str;
+void PrintHeader(string* result, const char* label, void** entries) {
+ // Compute the total count and total size
+ uintptr_t total_count = 0;
+ uintptr_t total_size = 0;
+ for (void** entry = entries; Count(entry) != 0; entry += 3 + Depth(entry)) {
+ total_count += Count(entry);
+ total_size += Size(entry);
+ }
+
+ char buf[200];
+ snprintf(buf, sizeof(buf),
+ "heap profile: %6lld: %8lld [%6lld: %8lld] @ %s\n",
+ static_cast<long long>(total_count),
+ static_cast<long long>(total_size),
+ static_cast<long long>(total_count),
+ static_cast<long long>(total_size),
+ label);
+ *result += buf;
+}
+
+void PrintStackEntry(string* result, void** entry) {
+ char buf[100];
+ snprintf(buf, sizeof(buf), "%6d: %8d [%6d: %8d] @",
+ int(Count(entry)), int(Size(entry)),
+ int(Count(entry)), int(Size(entry)));
+ *result += buf;
+ for (int i = 0; i < Depth(entry); i++) {
+ snprintf(buf, sizeof(buf), " %p", PC(entry, i));
+ *result += buf;
+ }
+ *result += "\n";
}
}
@@ -188,18 +220,16 @@ void DebugStringWriter(const char* str, void* arg) {
void MallocExtension::GetHeapSample(string* result) {
void** entries = ReadStackTraces();
if (entries == NULL) {
- *result += "this malloc implementation does not support sampling\n";
+ *result += "This malloc implementation does not support sampling.\n"
+ "As of 2005/01/26, only tcmalloc supports sampling, and you\n"
+ "are probably running a binary that does not use tcmalloc.\n";
return;
}
// Group together all entries with same stack trace
StackTraceTable table;
- int total_count = 0;
- int total_size = 0;
for (void** entry = entries; Count(entry) != 0; entry += 3 + Depth(entry)) {
StackTraceTable::iterator iter = table.find(entry);
- total_count += Count(entry);
- total_size += Size(entry);
if (iter == table.end()) {
// New occurrence
table.insert(entry);
@@ -210,27 +240,37 @@ void MallocExtension::GetHeapSample(string* result) {
}
}
- char buf[100];
- snprintf(buf, sizeof(buf), "heap profile: %6d: %8d [%6d: %8d] @\n",
- total_count, total_size, total_count, total_size);
- *result += buf;
+ PrintHeader(result, "heap", entries);
for (StackTraceTable::iterator iter = table.begin();
iter != table.end();
++iter) {
- void** entry = *iter;
- snprintf(buf, sizeof(buf), "%6d: %8d [%6d: %8d] @",
- int(Count(entry)), int(Size(entry)),
- int(Count(entry)), int(Size(entry)));
- *result += buf;
- for (int i = 0; i < Depth(entry); i++) {
- snprintf(buf, sizeof(buf), " %p", PC(entry, i));
- *result += buf;
- }
- *result += "\n";
+ PrintStackEntry(result, *iter);
}
// TODO(menage) Get this working in google-perftools
//DumpAddressMap(DebugStringWriter, result);
+}
+
+void MallocExtension::GetHeapGrowthStacks(std::string* result) {
+ void** entries = ReadHeapGrowthStackTraces();
+ if (entries == NULL) {
+ *result += "This malloc implementation does not support "
+ "ReadHeapGrowhStackTraces().\n"
+ "As of 2005/09/27, only tcmalloc supports this, and you\n"
+ "are probably running a binary that does not use tcmalloc.\n";
+ return;
+ }
+ // Do not canonicalize the stack entries, so that we get a
+ // time-ordered list of stack traces, which may be useful if the
+ // client wants to focus on the latest stack traces.
+
+ PrintHeader(result, "growth", entries);
+ for (void** entry = entries; Count(entry) != 0; entry += 3 + Depth(entry)) {
+ PrintStackEntry(result, entry);
+ }
delete[] entries;
+
+ // TODO(menage) Get this working in google-perftools
+ //DumpAddressMap(DebugStringWriter, result);
}
diff --git a/src/malloc_hook.cc b/src/malloc_hook.cc
index a238fd1..3047031 100644
--- a/src/malloc_hook.cc
+++ b/src/malloc_hook.cc
@@ -40,17 +40,24 @@ MallocHook::MunmapHook MallocHook::munmap_hook_ = NULL;
// On Linux/x86, we override mmap/munmap and provide support for
// calling the related hooks.
-#if defined(__i386__) && defined(__linux)
+//
+// We define mmap() and mmap64(), which somewhat reimplements libc's mmap
+// syscall stubs. Unfortunately libc only exports the stubs via weak symbols
+// (which we're overriding with our mmap64() and mmap() wrappers) so we can't
+// just call through to them.
+
+#if defined(__linux) && (defined(__i386__) || defined(__x86_64__))
#include <unistd.h>
#include <syscall.h>
#include <sys/mman.h>
#include <errno.h>
-// This somewhat reimplements libc's mmap syscall stubs. Unfortunately
-// libc only exports the stubs via weak symbols (which we're
-// overriding with our mmap64() and mmap() wrappers) so we can't just
-// call through to them.
+// The x86-32 case and the x86-64 case differ:
+// 32b has a mmap2() syscall, 64b does not.
+// 64b and 32b have different calling conventions for mmap().
+# if defined(__i386__)
+
extern "C" void* mmap64(void *start, size_t length,
int prot, int flags,
int fd, __off64_t offset) __THROW {
@@ -98,6 +105,29 @@ extern "C" void* mmap64(void *start, size_t length,
}
+//--------------------------------------------------------------------------//
+
+# elif defined(__x86_64__)
+
+#define __NR_wrapped_mmap __NR_mmap
+#define __NR_wrapped_munmap __NR_munmap
+static inline _syscall6(void *, wrapped_mmap, void *, start,
+ size_t, length, int, prot, int, flags, int,
+ fd, __off64_t, offset);
+static inline _syscall2(int, wrapped_munmap, void *, start, size_t, length);
+
+extern "C" void* mmap64(void *start, size_t length,
+ int prot, int flags,
+ int fd, __off64_t offset) __THROW {
+
+ void *result;
+ result = wrapped_mmap(start, length, prot, flags, fd, offset );
+ MallocHook::InvokeMmapHook(result, start, length, prot, flags, fd, offset);
+ return result;
+}
+
+# endif
+
extern "C" void* mmap(void *start, size_t length,
int prot, int flags,
int fd, off_t offset) __THROW {
diff --git a/src/pagemap.h b/src/pagemap.h
index 50ff1bf..1fdde99 100644
--- a/src/pagemap.h
+++ b/src/pagemap.h
@@ -76,6 +76,8 @@ class TCMalloc_PageMap1 {
return true;
}
+ void PreallocateMoreMemory() {}
+
// REQUIRES "k" is in range "[0,2^BITS-1]".
// REQUIRES "k" has been ensured before.
//
@@ -152,6 +154,11 @@ class TCMalloc_PageMap2 {
}
return true;
}
+
+ void PreallocateMoreMemory() {
+ // Allocate enough to keep track of all possible pages
+ Ensure(0, 1 << BITS);
+ }
};
// Three-level radix tree
@@ -236,6 +243,9 @@ class TCMalloc_PageMap3 {
}
return true;
}
+
+ void PreallocateMoreMemory() {
+ }
};
#endif // TCMALLOC_PAGEMAP_H__
diff --git a/src/pprof b/src/pprof
index 494235d..46366bc 100755
--- a/src/pprof
+++ b/src/pprof
@@ -34,7 +34,7 @@
# or by the heap profiler (common/debugallocation.cc)
#
# The profile contains a sequence of entries of the form:
-# <count> <stack trace>
+# <count> <stack trace>
# This program parses the profile, and generates user-readable
# output.
#
@@ -87,6 +87,7 @@ Usage: pprof [options] <program> <profile>
Options:
--cum Sort by cumulative data
--base=<base> Subtract <base> from <profile> before display
+ --interactive Run in interactive mode (interactive "help" gives help)
Reporting Granularity:
--addresses Report at address level
@@ -191,10 +192,14 @@ $main::opt_alloc_space = 0;
$main::opt_alloc_objects = 0;
$main::opt_show_bytes = 0;
$main::opt_drop_negative = 0;
+$main::opt_interactive = 0;
# Are we printing a heap profile?
$main::heap_profile = 0;
+# Are we printing a lock profile?
+$main::lock_profile = 0;
+
GetOptions("help!" => \$main::opt_help,
"version!" => \$main::opt_version,
"cum!" => \$main::opt_cum,
@@ -211,6 +216,7 @@ GetOptions("help!" => \$main::opt_help,
"ps!" => \$main::opt_ps,
"pdf!" => \$main::opt_pdf,
"gif!" => \$main::opt_gif,
+ "interactive!" => \$main::opt_interactive,
"nodecount=i" => \$main::opt_nodecount,
"nodefraction=f" => \$main::opt_nodefraction,
"edgefraction=f" => \$main::opt_edgefraction,
@@ -283,35 +289,43 @@ if ($modes == 0) {
}
my $prog = shift || fatal("Did not specify program");
-my $pfile = shift || fatal("Did not specify profile file");
+my $pfile_arg = shift || fatal("Did not specify profile file");
##### Main section #####
# Setup tmp-file name and handler to clean it up
$main::tmpfile_sym = "/tmp/pprof$$.sym";
-$main::tmpfile_ps = "/tmp/pprof$$.ps";
+$main::tmpfile_ps = "/tmp/pprof$$";
+$main::next_tmpfile = 0;
+$main::collected_profile = undef;
$SIG{'INT'} = \&sighandler;
# Read profile data
+my $pfile = FetchDynamicProfile($prog, $pfile_arg);
my $data = ReadProfile($prog, $pfile);
my $profile = $data->{profile};
-my $libs = $data->{libs}; # Info about main program and shared libraries
+my $libs = $data->{libs}; # Info about main program and shared libraries
# List of function names to skip
$main::skip = ();
if ($main::heap_profile) {
foreach my $name ('calloc',
- 'cfree',
- 'malloc',
- 'free',
- 'memalign',
- 'pvalloc',
- 'valloc',
- 'realloc',
- '__builtin_delete',
- '__builtin_new',
- '__builtin_vec_delete',
- '__builtin_vec_new') {
+ 'cfree',
+ 'malloc',
+ 'free',
+ 'memalign',
+ 'pvalloc',
+ 'valloc',
+ 'realloc',
+ '__builtin_delete',
+ '__builtin_new',
+ '__builtin_vec_delete',
+ '__builtin_vec_new') {
+ $main::skip{$name} = 1;
+ }
+}
+if ($main::lock_profile) {
+ foreach my $name ('Mutex::Unlock') {
$main::skip{$name} = 1;
}
}
@@ -347,41 +361,246 @@ my $flat = FlatProfile($reduced);
my $cumulative = CumulativeProfile($reduced);
# Print
-if ($main::opt_disasm) {
- PrintDisassembly($libs, $flat, $cumulative);
-} elsif ($main::opt_list) {
- PrintListing($libs, $flat, $cumulative);
-} elsif ($main::opt_text) {
- PrintText($symbols, $flat, $cumulative, $total);
-} else {
- PrintDot($prog, $symbols, $profile, $flat, $cumulative, $total);
- if ($main::opt_gv) {
- # Some versions of gv use -scale, and some use --scale. *sigh*
- # We use --help to determine if gv expects one dash or two.
- system("$GV --help >/dev/null 2>&1 " .
- "&& $GV --scale=$main::opt_scale $main::tmpfile_ps " .
- "|| $GV -scale $main::opt_scale $main::tmpfile_ps")
+if (!$main::opt_interactive) {
+ if ($main::opt_disasm) {
+ PrintDisassembly($libs, $flat, $cumulative, $main::opt_disasm);
+ } elsif ($main::opt_list) {
+ PrintListing($libs, $flat, $cumulative, $main::opt_list);
+ } elsif ($main::opt_text) {
+ PrintText($symbols, $flat, $cumulative, $total, -1);
+ } else {
+ if (PrintDot($prog, $symbols, $profile, $flat, $cumulative, $total)) {
+ if ($main::opt_gv) {
+ # Some versions of gv use -scale, and some use --scale. *sigh*
+ # We use --help to determine if gv expects one dash or two.
+ system("$GV --help >/dev/null 2>&1 " .
+ "&& $GV --scale=$main::opt_scale $main::tmpfile_ps " .
+ "|| $GV -scale $main::opt_scale $main::tmpfile_ps")
+ }
+ } else {
+ exit(1);
+ }
}
+} else {
+ InteractiveMode();
}
cleanup();
exit(0);
+
+##### Interactive helper routines #####
+sub InteractiveMode {
+ $| = 1; # Make output unbuffered for interactive mode
+ my $orig_profile = $profile;
+ while (1) {
+ print "(pprof) ";
+ $_ = <STDIN>;
+ if (!defined($_)) {
+ print "\n";
+ last;
+ }
+ if (m/^ *quit/) {
+ last;
+ }
+ if (m/^ *help/) {
+ InteractiveHelpMessage();
+ next;
+ }
+ # Clear all the options
+ $main::opt_lines = 0;
+ $main::opt_text = 0;
+ $main::opt_disasm = 0;
+ $main::opt_list = 0;
+ $main::opt_gv = 0;
+ $main::opt_cum = 0;
+
+ if (m/^ *(text|top)(\d*) *(.*)/) {
+ $main::opt_text = 1;
+
+ my $line_limit = ($2 ne "") ? int($2) : 10;
+
+ my $routine;
+ my $ignore;
+ ($routine, $ignore) = ParseInteractiveArgs($3);
+
+ my $profile = ProcessProfile($orig_profile, "", $ignore);
+ my $reduced = ReduceProfile($symbols, $profile);
+
+ # Get derived profiles
+ my $flat = FlatProfile($reduced);
+ my $cumulative = CumulativeProfile($reduced);
+
+ PrintText($symbols, $flat, $cumulative, $total, $line_limit);
+ next;
+ }
+ if (m/^ *list *(.+)/) {
+ $main::opt_list = 1;
+
+ my $routine;
+ my $ignore;
+ ($routine, $ignore) = ParseInteractiveArgs($1);
+
+ my $profile = ProcessProfile($orig_profile, "", $ignore);
+ my $reduced = ReduceProfile($symbols, $profile);
+
+ # Get derived profiles
+ my $flat = FlatProfile($reduced);
+ my $cumulative = CumulativeProfile($reduced);
+
+ PrintListing($libs, $flat, $cumulative, $routine);
+ next;
+ }
+ if (m/^ *disasm *(.+)/) {
+ $main::opt_disasm = 1;
+
+ my $routine;
+ my $ignore;
+ ($routine, $ignore) = ParseInteractiveArgs($1);
+
+ # Process current profile to account for various settings
+ my $profile = ProcessProfile($orig_profile, "", $ignore);
+ my $reduced = ReduceProfile($symbols, $profile);
+
+ # Get derived profiles
+ my $flat = FlatProfile($reduced);
+ my $cumulative = CumulativeProfile($reduced);
+
+ PrintDisassembly($libs, $flat, $cumulative, $routine);
+ next;
+ }
+ if (m/^ *gv *(.*)/) {
+ $main::opt_gv = 1;
+
+ my $focus;
+ my $ignore;
+ ($focus, $ignore) = ParseInteractiveArgs($1);
+
+ # Process current profile to account for various settings
+ my $profile = ProcessProfile($orig_profile, $focus, $ignore);
+ my $reduced = ReduceProfile($symbols, $profile);
+
+ # Get derived profiles
+ my $flat = FlatProfile($reduced);
+ my $cumulative = CumulativeProfile($reduced);
+
+ if (PrintDot($prog, $symbols, $profile, $flat, $cumulative, $total)) {
+ system("gv -scale $main::opt_scale -noresize " .
+ PsTempName($main::next_tmpfile) . " &");
+ $main::next_tmpfile++;
+ }
+ next;
+ }
+ }
+}
+
+
+sub ProcessProfile {
+ my $orig_profile = shift;
+ my $focus = shift;
+ my $ignore = shift;
+
+ # Process current profile to account for various settings
+ my $profile = $orig_profile;
+ my $total_count = TotalProfile($profile);
+ print "Total: ", $total_count, " samples\n";
+ if ($focus ne '') {
+ $profile = FocusProfile($symbols, $profile, $focus);
+ my $focus_count = TotalProfile($profile);
+ printf "After focusing on '%s': %d samples of %d (%0.1f%%)\n",
+ $focus, $focus_count, $total_count, ($focus_count*100.0) / $total_count;
+ }
+ if ($ignore ne '') {
+ $profile = IgnoreProfile($symbols, $profile, $ignore);
+ my $ignore_count = TotalProfile($profile);
+ printf "After ignoring '%s': %d samples of %d (%0.1f%%)\n",
+ $ignore, $ignore_count, $total_count,
+ ($ignore_count*100.0) / $total_count;
+ }
+
+ return $profile;
+}
+
+sub InteractiveHelpMessage {
+ print <<ENDOFHELP;
+Interactive pprof mode
+
+Commands:
+ gv
+ gv [focus] [-ignore1] [-ignore2]
+ Show graphical hierarchical display of current profile. Without
+ any arguments, shows all samples in the profile. With the optional
+ "focus" argument, restricts the samples shown to just those where
+ the "focus" regular expression matches a routine name on the stack
+ trace.
+
+ list [routine_regexp] [-ignore1] [-ignore2]
+ Show source listing of routines whose names match "routine_regexp"
+
+ top [--cum] [-ignore1] [-ignore2]
+ top20 [--cum] [-ignore1] [-ignore2]
+ top37 [--cum] [-ignore1] [-ignore2]
+ Show top lines ordered by flat profile count, or cumulative count
+ if --cum is specified. If a number is present after 'top', the
+ top K routines will be shown (defaults to showing the top 10)
+
+ disasm [routine_regexp] [-ignore1] [-ignore2]
+ Show disassembly of routines whose names match "routine_regexp",
+ annotated with sample counts.
+
+ help - This listing
+ quit or ^D - End pprof
+
+For commands that accept optional -ignore tags, samples where any routine in
+the stack trace matches the regular expression in any of the -ignore
+parameters will be ignored.
+
+ENDOFHELP
+}
+sub ParseInteractiveArgs {
+ my $args = shift;
+ my $focus = "";
+ my $ignore = "";
+ my @x = split(/ +/, $args);
+ foreach $a (@x) {
+ if ($a =~ m/^(--|-)lines$/) {
+ $main::opt_lines = 1;
+ } elsif ($a =~ m/^(--|-)cum$/) {
+ $main::opt_cum = 1;
+ } elsif ($a =~ m/^-(.*)/) {
+ $ignore .= (($ignore ne "") ? "|" : "" ) . $1;
+ } else {
+ $focus .= (($focus ne "") ? "|" : "" ) . $a;
+ }
+ }
+ if ($ignore ne "") {
+ print "Ignoring samples in call stacks that match '$ignore'\n";
+ }
+ return ($focus, $ignore);
+}
+
##### Output code #####
+sub PsTempName {
+ my $fnum = shift;
+ return "$main::tmpfile_ps" . "." . "$fnum" . ".ps";
+}
+
# Print text output
sub PrintText {
my $symbols = shift;
my $flat = shift;
my $cumulative = shift;
my $total = shift;
+ my $line_limit = shift;
# Which profile to sort by?
my $s = $main::opt_cum ? $cumulative : $flat;
my $running_sum = 0;
+ my $lines = 0;
foreach my $k (sort { GetEntry($s,$b) <=> GetEntry($s, $a) }
- keys(%{$cumulative})) {
+ keys(%{$cumulative})) {
my $f = GetEntry($flat, $k);
my $c = GetEntry($cumulative, $k);
$running_sum += $f;
@@ -390,7 +609,7 @@ sub PrintText {
if (exists($symbols->{$k})) {
$sym = $symbols->{$k}->[0] . " " . $symbols->{$k}->[1];
if ($main::opt_addresses) {
- $sym = $k . " " . $sym;
+ $sym = $k . " " . $sym;
}
}
@@ -403,6 +622,8 @@ sub PrintText {
Percent($c, $total),
$sym);
}
+ $lines++;
+ last if ($line_limit >= 0 && $lines > $line_limit);
}
}
@@ -411,9 +632,10 @@ sub PrintDisassembly {
my $libs = shift;
my $flat = shift;
my $cumulative = shift;
+ my $disasm_opts = shift;
foreach my $lib (@{$libs}) {
- my $symbol_table = GetProcedureBoundaries($lib->[0], $main::opt_disasm);
+ my $symbol_table = GetProcedureBoundaries($lib->[0], $disasm_opts);
my $offset = $lib->[1] - $lib->[3];
foreach my $routine (keys(%{$symbol_table})) {
my $start_addr = $symbol_table->{$routine}->[0];
@@ -422,8 +644,8 @@ sub PrintDisassembly {
my $total_flat = 0;
my $total_cum = 0;
for (my $addr = $start_addr; $addr < $end_addr; $addr++) {
- $total_flat += GetEntry($flat, sprintf("0x%x", $addr+$offset));
- $total_cum += GetEntry($cumulative, sprintf("0x%x", $addr+$offset));
+ $total_flat += GetEntry($flat, sprintf("0x%x", $addr+$offset));
+ $total_cum += GetEntry($cumulative, sprintf("0x%x", $addr+$offset));
}
# Skip disassembly if there are no samples in routine
@@ -431,26 +653,26 @@ sub PrintDisassembly {
print "ROUTINE ====================== $routine\n";
printf "%6s %6s Total samples (flat / cumulative)\n",
- Unparse($total_flat), Unparse($total_cum);
+ Unparse($total_flat), Unparse($total_cum);
my @instructions = Disassemble($lib->[0], $offset,
- $start_addr, $end_addr);
+ $start_addr, $end_addr);
foreach my $e (@instructions) {
- my $location = ($e->[2] >= 0) ? "$e->[1]:$e->[2]" : "";
- $location =~ s|.*/||; # Remove directory portion, if any
- if (length($location) >= 20) {
- # For long locations, just show the last 20 characters
- $location = substr($location, -20);
- }
- my $f = GetEntry($flat, $e->[0]);
- my $c = GetEntry($cumulative, $e->[0]);
- my $address = $e->[0]; $address =~ s/^0x//;
- printf("%6s %6s %-20s %8s: %6s\n",
- UnparseAlt($f),
- UnparseAlt($c),
- $location,
- $address,
- $e->[3]);
+ my $location = ($e->[2] >= 0) ? "$e->[1]:$e->[2]" : "";
+ $location =~ s|.*/||; # Remove directory portion, if any
+ if (length($location) >= 20) {
+ # For long locations, just show the last 20 characters
+ $location = substr($location, -20);
+ }
+ my $f = GetEntry($flat, $e->[0]);
+ my $c = GetEntry($cumulative, $e->[0]);
+ my $address = $e->[0]; $address =~ s/^0x//;
+ printf("%6s %6s %-20s %8s: %6s\n",
+ UnparseAlt($f),
+ UnparseAlt($c),
+ $location,
+ $address,
+ $e->[3]);
}
close(OBJDUMP);
}
@@ -458,9 +680,9 @@ sub PrintDisassembly {
}
# Return reference to array of tuples of the form:
-# [address, filename, linenumber, instruction]
+# [address, filename, linenumber, instruction]
# E.g.,
-# ["0x806c43d", "/foo/bar.cc", 131, "ret"]
+# ["0x806c43d", "/foo/bar.cc", 131, "ret"]
sub Disassemble {
my $prog = shift;
my $offset = shift;
@@ -468,8 +690,8 @@ sub Disassemble {
my $end_addr = shift;
my $cmd = sprintf("$OBJDUMP -d -l --no-show-raw-insn " .
- "--start-address=%d --stop-address=%d $prog",
- $start_addr, $end_addr);
+ "--start-address=%d --stop-address=%d $prog",
+ $start_addr, $end_addr);
open(OBJDUMP, "$cmd |") || error("$OBJDUMP: $!\n");
my @result = ();
my $filename = "";
@@ -501,26 +723,38 @@ sub PrintListing {
my $libs = shift;
my $flat = shift;
my $cumulative = shift;
+ my $list_opts = shift;
foreach my $lib (@{$libs}) {
- my $symbol_table = GetProcedureBoundaries($lib->[0], $main::opt_list);
+ my $symbol_table = GetProcedureBoundaries($lib->[0], $list_opts);
my $offset = $lib->[1] - $lib->[3];
foreach my $routine (sort ByName keys(%{$symbol_table})) {
# Print if there are any samples in this routine
my $start_addr = $symbol_table->{$routine}->[0];
my $end_addr = $symbol_table->{$routine}->[1];
for (my $addr = $start_addr; $addr < $end_addr; $addr++) {
- if (defined($cumulative->{sprintf("0x%x", $addr+$offset)})) {
- PrintSource($lib->[0], $offset,
- $routine, $flat, $cumulative,
- $start_addr, $end_addr);
- last;
- }
+ if (defined($cumulative->{sprintf("0x%x", $addr+$offset)})) {
+ PrintSource($lib->[0], $offset,
+ $routine, $flat, $cumulative,
+ $start_addr, $end_addr);
+ last;
+ }
}
}
}
}
+# Returns the indentation of the line, if it has any non-whitespace
+# characters. Otherwise, returns -1.
+sub Indentation {
+ my $line = shift;
+ if (m/^(\s*)\S/) {
+ return length($1);
+ } else {
+ return -1;
+ }
+}
+
# Print source-listing for one routine
sub PrintSource {
my $prog = shift;
@@ -560,15 +794,45 @@ sub PrintSource {
}
}
+ # Hack 3: Extend last line forward until its indentation is less than
+ # the indentation we saw on $firstline
+ my $oldlastline = $lastline;
+ {
+ if (!open(FILE, "<$filename")) {
+ print STDERR "$filename: $!\n";
+ return;
+ }
+ my $l = 0;
+ my $first_indentation = -1;
+ while (<FILE>) {
+ $l++;
+ my $indent = Indentation($_);
+ if ($l >= $firstline) {
+ if ($first_indentation < 0 && $indent >= 0) {
+ $first_indentation = $indent;
+ last if ($first_indentation == 0);
+ }
+ }
+ if ($l >= $lastline && $indent >= 0) {
+ if ($indent >= $first_indentation) {
+ $lastline = $l+1;
+ } else {
+ last;
+ }
+ }
+ }
+ close(FILE);
+ }
+
# Assign all samples to the range $firstline,$lastline,
- # Hack 3: If an instruction does not occur in the range, its samples
+ # Hack 4: If an instruction does not occur in the range, its samples
# are moved to the next instruction that occurs in the range.
my $samples1 = {};
my $samples2 = {};
- my $running1 = 0; # Unassigned flat counts
- my $running2 = 0; # Unassigned cumulative counts
- my $total1 = 0; # Total flat counts
- my $total2 = 0; # Total cumulative counts
+ my $running1 = 0; # Unassigned flat counts
+ my $running2 = 0; # Unassigned cumulative counts
+ my $total1 = 0; # Total flat counts
+ my $total2 = 0; # Total cumulative counts
foreach my $e (@instructions) {
my $c1 = GetEntry($flat, $e->[0]);
my $c2 = GetEntry($cumulative, $e->[0]);
@@ -594,12 +858,12 @@ sub PrintSource {
AddEntry($samples2, $lastline, $running2);
printf("ROUTINE ====================== %s in %s\n" .
- "%6s %6s Total %s (flat / cumulative)\n",
- ShortFunctionName($routine),
- $filename,
- Units(),
- Unparse($total1),
- Unparse($total2));
+ "%6s %6s Total %s (flat / cumulative)\n",
+ ShortFunctionName($routine),
+ $filename,
+ Units(),
+ Unparse($total1),
+ Unparse($total2));
if (!open(FILE, "<$filename")) {
print STDERR "$filename: $!\n";
return;
@@ -607,14 +871,15 @@ sub PrintSource {
my $l = 0;
while (<FILE>) {
$l++;
- if ($l >= $firstline - 5 && $l <= $lastline + 5) {
+ if ($l >= $firstline - 5 &&
+ (($l <= $oldlastline + 5) || ($l <= $lastline))) {
chop;
my $text = $_;
printf("%6s %6s %4d: %s\n",
- UnparseAlt(GetEntry($samples1, $l)),
- UnparseAlt(GetEntry($samples2, $l)),
- $l,
- $text);
+ UnparseAlt(GetEntry($samples1, $l)),
+ UnparseAlt(GetEntry($samples2, $l)),
+ $l,
+ $text);
};
}
close(FILE);
@@ -638,7 +903,7 @@ sub PrintDot {
# Find nodes to include
my @list = (sort { abs(GetEntry($cumulative, $b)) <=>
abs(GetEntry($cumulative, $a)) }
- keys(%{$cumulative}));
+ keys(%{$cumulative}));
my $last = $nodecount - 1;
if ($last > $#list) {
$last = $#list;
@@ -649,17 +914,20 @@ sub PrintDot {
}
if ($last < 0) {
print STDERR "No nodes to print\n";
- exit(1);
+ cleanup();
+ return 0;
}
- printf STDERR ("Dropping nodes with <= %s %s; edges with <= %s abs(%s)\n",
- Unparse($nodelimit), Units(),
- Unparse($edgelimit), Units());
+ if ($nodelimit > 0 || $edgelimit > 0) {
+ printf STDERR ("Dropping nodes with <= %s %s; edges with <= %s abs(%s)\n",
+ Unparse($nodelimit), Units(),
+ Unparse($edgelimit), Units());
+ }
# Open DOT output file
my $output;
if ($main::opt_gv) {
- $output = "| $DOT -Tps >$main::tmpfile_ps";
+ $output = "| $DOT -Tps >" . PsTempName($main::next_tmpfile);
} elsif ($main::opt_ps) {
$output = "| $DOT -Tps";
} elsif ($main::opt_pdf) {
@@ -673,26 +941,26 @@ sub PrintDot {
# Title
printf DOT ("digraph \"%s; %s %s\" {\n",
- $prog,
- Unparse($overall_total),
- Units());
+ $prog,
+ Unparse($overall_total),
+ Units());
if ($main::opt_pdf) {
- # The output is more printable if we set the page size for dot.
+ # The output is more printable if we set the page size for dot.
printf DOT ("size=\"8,11\"\n");
- }
+ }
printf DOT ("node [width=0.375,height=0.25];\n");
# Print legend
printf DOT ("Legend [shape=box,fontsize=24,shape=plaintext," .
- "label=\"%s\\l%s\\l%s\\l%s\\l%s\\l\"];\n",
- $prog,
- sprintf("Total %s: %s", Units(), Unparse($overall_total)),
- sprintf("Focusing on: %s", Unparse($local_total)),
- sprintf("Dropped nodes with <= %s abs(%s)",
- Unparse($nodelimit), Units()),
- sprintf("Dropped edges with <= %s %s",
- Unparse($edgelimit), Units())
- );
+ "label=\"%s\\l%s\\l%s\\l%s\\l%s\\l\"];\n",
+ $prog,
+ sprintf("Total %s: %s", Units(), Unparse($overall_total)),
+ sprintf("Focusing on: %s", Unparse($local_total)),
+ sprintf("Dropped nodes with <= %s abs(%s)",
+ Unparse($nodelimit), Units()),
+ sprintf("Dropped edges with <= %s %s",
+ Unparse($edgelimit), Units())
+ );
# Print nodes
my %node = ();
@@ -739,13 +1007,13 @@ sub PrintDot {
for (my $i = 1; $i <= $#addrs; $i++) {
my $src = OutputKey($symbols, $addrs[$i]);
my $dst = OutputKey($symbols, $addrs[$i-1]);
- #next if ($src eq $dst); # Avoid self-edges?
+ #next if ($src eq $dst); # Avoid self-edges?
if (exists($node{$src}) && exists($node{$dst})) {
- my $e = "$src\001$dst";
- if (!exists($edge{$e})) {
- $edge{$e} = 0;
- }
- $edge{$e} += $n;
+ my $e = "$src\001$dst";
+ if (!exists($edge{$e})) {
+ $edge{$e} = 0;
+ }
+ $edge{$e} += $n;
}
}
}
@@ -757,25 +1025,26 @@ sub PrintDot {
if (abs($n) > $edgelimit) {
# Compute line width based on edge count
- my $fraction = $local_total ? (3 * ($n / $local_total)) : 0;
+ my $fraction = abs($local_total ? (3 * ($n / $local_total)) : 0);
if ($fraction > 1) { $fraction = 1; }
my $w = $fraction * 2;
#if ($w < 1) { $w = 1; }
# Use a slightly squashed function of the edge count as the weight
printf DOT ("N%s -> N%s [label=%s, weight=%d, " .
- "style=\"setlinewidth(%f)\"];\n",
- $node{$x[0]},
- $node{$x[1]},
- Unparse($n),
- int($n ** 0.7),
- $w);
+ "style=\"setlinewidth(%f)\"];\n",
+ $node{$x[0]},
+ $node{$x[1]},
+ Unparse($n),
+ int(abs($n) ** 0.7),
+ $w);
}
}
print DOT ("}\n");
close(DOT);
+ return 1;
}
# Generate the key under which a given address should be counted
@@ -919,14 +1188,14 @@ sub ReduceProfile {
my @addrs = split(/\n/, $k);
my @path = ();
my %seen = ();
- $seen{''} = 1; # So that empty keys are skipped
+ $seen{''} = 1; # So that empty keys are skipped
foreach my $a (@addrs) {
# To avoid double-counting due to recursion, skip a stack-trace
# entry if it has already been seen
my $key = OutputKey($symbols, $a);
if (!$seen{$key}) {
- $seen{$key} = 1;
- push(@path, $key);
+ $seen{$key} = 1;
+ push(@path, $key);
}
}
my $reduced_path = join("\n", @path);
@@ -946,12 +1215,12 @@ sub FocusProfile {
my @addrs = split(/\n/, $k);
foreach my $a (@addrs) {
# Reply if it matches either the address/shortname/fileline
- if (($a =~ m/$focus/o) ||
- (exists($symbols->{$a}) &&
- (($symbols->{$a}->[0] =~ m/$focus/o) ||
- ($symbols->{$a}->[1] =~ m/$focus/o)))) {
- AddEntry($result, $k, $count);
- last;
+ if (($a =~ m/$focus/) ||
+ (exists($symbols->{$a}) &&
+ (($symbols->{$a}->[0] =~ m/$focus/) ||
+ ($symbols->{$a}->[1] =~ m/$focus/)))) {
+ AddEntry($result, $k, $count);
+ last;
}
}
}
@@ -970,12 +1239,12 @@ sub IgnoreProfile {
my $matched = 0;
foreach my $a (@addrs) {
# Reply if it matches either the address/shortname/fileline
- if (($a =~ m/$ignore/o) ||
- (exists($symbols->{$a}) &&
- (($symbols->{$a}->[0] =~ m/$ignore/o) ||
- ($symbols->{$a}->[1] =~ m/$ignore/o)))) {
- $matched = 1;
- last;
+ if (($a =~ m/$ignore/) ||
+ (exists($symbols->{$a}) &&
+ (($symbols->{$a}->[0] =~ m/$ignore/) ||
+ ($symbols->{$a}->[1] =~ m/$ignore/)))) {
+ $matched = 1;
+ last;
}
}
if (!$matched) {
@@ -1041,6 +1310,16 @@ sub AddEntry {
$profile->{$k} += $n;
}
+##### Code to profile a server dynamically #####
+
+sub FetchDynamicProfile {
+ my $binary_name = shift;
+ my $profile_name = shift;
+
+ # TODO: Add support for fetching profiles dynamically from a server
+ return $profile_name;
+}
+
##### Parsing code #####
# Parse profile generated by common/profiler.cc and return a reference
@@ -1054,19 +1333,30 @@ sub ReadProfile {
my $prog = shift;
my $fname = shift;
+ $main::heap_profile = 0;
+ $main::lock_profile = 0;
+
# Look at first line to see if it is a heap or a CPU profile
open(PROFILE, "<$fname") || error("$fname: $!\n");
- binmode PROFILE; # New perls do UTF-8 processing
+ binmode PROFILE; # New perls do UTF-8 processing
my $header = <PROFILE>;
if ($header =~ m/^heap profile:/) {
$main::heap_profile = 1;
- return ReadHeapProfile($prog, $fname);
+ return ReadHeapProfile($prog, $fname, $header);
+ } elsif ($header =~ m/^--- *contentionz/ ) {
+ $main::lock_profile = 1;
+ return ReadSynchProfile($prog, $fname);
+ } elsif ($header =~ m/^--- *Stacks:/ ) {
+ print STDERR
+ "Old format contention profile: mistakenly reports " .
+ "condition variable signals as lock contentions.\n";
+ $main::lock_profile = 1;
+ return ReadSynchProfile($prog, $fname);
} else {
# Need to unread the line we just read
close(PROFILE);
open(PROFILE, "<$fname") || error("$fname: $!\n");
- binmode PROFILE; # New perls do UTF-8 processing
- $main::heap_profile = 0;
+ binmode PROFILE; # New perls do UTF-8 processing
return ReadCPUProfile($prog, $fname);
}
}
@@ -1143,6 +1433,7 @@ sub ReadCPUProfile {
sub ReadHeapProfile {
my $prog = shift;
my $fname = shift;
+ my $header = shift;
my $index = 1;
if ($main::opt_inuse_space) {
@@ -1155,6 +1446,15 @@ sub ReadHeapProfile {
$index = 2;
}
+ # Find the type of this profile
+ chomp($header);
+ my $type = "unknown";
+ if ($header =~ m/^heap profile:\s*(\d+):\s+(\d+)\s+\[\s*(\d+):\s+(\d+)\](\s*@\s*(.*))?/) {
+ if (defined($6) && ($6 ne '')) {
+ $type = $6;
+ }
+ }
+
my $profile = {};
my $pcs = {};
my $map = "";
@@ -1162,15 +1462,24 @@ sub ReadHeapProfile {
if (/^MAPPED_LIBRARIES:/) {
# Read the /proc/self/maps data
while (<PROFILE>) {
- $map .= $_;
+ $map .= $_;
}
last;
}
if (/^--- Memory map:/) {
# Read /proc/self/maps data as formatted by DumpAddressMap()
+ my $buildvar = "";
while (<PROFILE>) {
- $map .= $_;
+ # Parse "build=<dir>" specification if supplied
+ if (m/^\s*build=(.*)\n/) {
+ $buildvar = $1;
+ }
+
+ # Expand "$build" variable if available
+ $_ =~ s/\$build\b/$buildvar/g;
+
+ $map .= $_;
}
last;
}
@@ -1181,7 +1490,9 @@ sub ReadHeapProfile {
s/\s*$//;
if (m/^\s*(\d+):\s+(\d+)\s+\[\s*(\d+):\s+(\d+)\]\s+@\s+(.*)$/) {
my $stack = $5;
- my @counts = ($1, $2, $3, $4);
+ my ($n1, $s1, $n2, $s2) = ($1, $2, $3, $4);
+
+ my @counts = ($n1, $s1, $n2, $s2);
my $n = $counts[$index];
my $k = "";
foreach my $e (split(/\s+/, $stack)) {
@@ -1203,6 +1514,46 @@ sub ReadHeapProfile {
return $r;
}
+sub ReadSynchProfile {
+ my ($prog, $fname, $header) = @_;
+ my ($line, $map, $pc, @k, $count, $stack);
+
+ $map = '';
+ my $profile = {};
+ my $pcs = {};
+ my $sampling_period = 1;
+
+ while ( $line = <PROFILE> ) {
+ if ( $line =~ /^(slow release).*thread \d+ \@\s*(.*?)\s*$/ ||
+ $line =~ /^\s*(\d+) \@\s*(.*?)\s*$/ ) {
+ # Sample entry
+ ($count, $stack) = ($1, $2);
+ $count = 1 if $count !~ /^\d+$/;
+
+ @k = ();
+ foreach $pc (split /\s+/, $stack) {
+ $pcs->{hex($pc)} = 1;
+ push @k, $pc;
+ }
+ AddEntry($profile, (join "\n", @k), $count);
+ } elsif ( $line =~ /sampling period = (\d+)/ ) {
+ $sampling_period = $1;
+ } else {
+ # Memory map entry
+ $map .= $line;
+ }
+ }
+ close PROFILE;
+
+ my $r = {};
+ $r->{'version'} = 0;
+ $r->{'period'} = $sampling_period;
+ $r->{'profile'} = $profile;
+ $r->{'libs'} = ParseLibraries($prog, $map, $pcs);
+ $r->{'pcs'} = $pcs;
+ return $r;
+}
+
##### Symbol extraction #####
# Split /proc/pid/maps dump into a list of libraries
@@ -1227,7 +1578,7 @@ sub ParseLibraries {
$lib = $4;
} elsif ($l =~ /^\s*($h)-($h):\s*(\S+\.so(\.\d+)*)/) {
# Cooked line from DumpAddressMap. Example:
- # 40000000-40015000: /lib/ld-2.3.2.so
+ # 40000000-40015000: /lib/ld-2.3.2.so
$start = hex($1);
$finish = hex($2);
$offset = 0;
@@ -1244,10 +1595,10 @@ sub ParseLibraries {
# 10 .text 00104b2c 420156f0 420156f0 000156f0 2**4
my @x = split;
if (($#x >= 6) && ($x[1] eq '.text')) {
- my $vma = hex($x[3]);
- my $file_offset = hex($x[5]);
- $offset += $vma - $file_offset;
- last;
+ my $vma = hex($x[3]);
+ my $file_offset = hex($x[5]);
+ $offset += $vma - $file_offset;
+ last;
}
}
close(OBJDUMP);
@@ -1285,8 +1636,8 @@ sub ExtractSymbols {
my $contained = [];
foreach my $pc (keys(%{$pcset})) {
if (!$seen{$pc} && ($pc >= $start) && ($pc <= $finish)) {
- $seen{$pc} = 1;
- push(@{$contained}, $pc);
+ $seen{$pc} = 1;
+ push(@{$contained}, $pc);
}
}
# Map to symbols
@@ -1307,7 +1658,10 @@ sub MapToSymbols {
if ($#{$pclist} < 0) { return; }
MapSymbolsWithNM($image, $offset, $pclist, $symbols);
- if ($main::opt_lines || $main::opt_files || $main::opt_list) {
+ if ($main::opt_interactive ||
+ $main::opt_lines ||
+ $main::opt_files ||
+ $main::opt_list) {
GetLineNumbers($image, $offset, $pclist, $symbols);
}
}
@@ -1396,7 +1750,7 @@ sub ShortFunctionName {
my $function = shift;
while ($function =~ s/\([^()]*\)//g) { } # Remove argument types
while ($function =~ s/<[^<>]*>//g) { } # Remove template arguments
- $function =~ s/^.*\s+(\w+::)/$1/; # Remove leading type
+ $function =~ s/^.*\s+(\w+::)/$1/; # Remove leading type
return $function;
}
@@ -1404,7 +1758,20 @@ sub ShortFunctionName {
sub cleanup {
unlink($main::tmpfile_sym);
- unlink($main::tmpfile_ps);
+ for (my $i = 0; $i < $main::next_tmpfile; $i++) {
+ unlink(PsTempName($i));
+ }
+ # We leave any collected profiles in $HOME/pprof in case the user wants
+ # to look at them later. We print a message informing them of this.
+ if (defined($main::collected_profile)) {
+ print STDERR "Dynamically gathered profile is in $main::collected_profile\n";
+ print STDERR "If you want to investigate this profile further, you can do:\n";
+ print STDERR "\n";
+ print STDERR " pprof \\\n";
+ print STDERR " $prog \\\n";
+ print STDERR " $main::collected_profile\n";
+ print STDERR "\n";
+ }
}
sub sighandler {
@@ -1421,11 +1788,11 @@ sub error {
# Return a list of all routines that match $regexp.
# For each routine, the following list is returned:
-# $result->[i]->[0] Routine name
-# $result->[i]->[1] Start address
-# $result->[i]->[2] Finish address
-# $result->[i]->[3] Image file name (program or shared library)
-# $result->[i]->[4] Offset for image in address space
+# $result->[i]->[0] Routine name
+# $result->[i]->[1] Start address
+# $result->[i]->[2] Finish address
+# $result->[i]->[3] Image file name (program or shared library)
+# $result->[i]->[4] Offset for image in address space
sub GetMatchingRoutines {
}
@@ -1446,7 +1813,7 @@ sub GetProcedureBoundaries {
my $start_val = $1;
my $this_routine = $2;
if (defined($routine) && $routine =~ m/$regexp/) {
- $symbol_table->{$routine} = [hex($last_start), hex($start_val)];
+ $symbol_table->{$routine} = [hex($last_start), hex($start_val)];
}
$last_start = $start_val;
$routine = $this_routine;
diff --git a/src/profiler.cc b/src/profiler.cc
index 2597917..7197d42 100644
--- a/src/profiler.cc
+++ b/src/profiler.cc
@@ -31,6 +31,11 @@
// Author: Sanjay Ghemawat
//
// Profile current program by sampling stack-trace every so often
+//
+// TODO: Detect whether or not setitimer() applies to all threads in
+// the process. If so, instead of starting and stopping by changing
+// the signal handler, start and stop by calling setitimer() and
+// do nothing in the per-thread registration code.
#include "config.h"
#include <assert.h>
@@ -137,9 +142,6 @@ class ProfileData {
// Is profiling turned on at all
inline bool enabled() { return out_ >= 0; }
- // Should we automatically profile all threads
- inline bool profile_all() { return (out_ >= 0) && profile_all_; }
-
// What is the frequency of interrupts (ticks per second)
inline int frequency() { return frequency_; }
@@ -152,6 +154,8 @@ class ProfileData {
bool Start(const char* fname);
// Stop profiling and flush the data
void Stop();
+
+ void GetCurrentState(ProfilerState* state);
private:
static const int kMaxStackDepth = 64; // Max stack depth profiled
@@ -177,20 +181,24 @@ class ProfileData {
};
#ifdef HAVE_PTHREAD
- pthread_mutex_t lock_; // Cannot use "Mutex" in signal handlers
- pthread_mutex_t flush_lock_; // Acquired during explicit flushes
+ // Invariant: table_lock_ is only grabbed by handler, or by other code
+ // when the signal is being ignored (via SIG_IGN).
+ //
+ // Locking order is "state_lock_" first, and then "table_lock_"
+ pthread_mutex_t state_lock_; // Protects filename, etc.(not used in handler)
+ pthread_mutex_t table_lock_; // Cannot use "Mutex" in signal handlers
#endif
Bucket* hash_; // hash table
Slot* evict_; // evicted entries
int num_evicted_; // how many evicted entries?
int out_; // fd for output file
- bool profile_all_; // profile all threads automatically?
int count_; // How many interrupts recorded
int evictions_; // How many evictions
size_t total_bytes_; // How much output
char* fname_; // Profile file name
int frequency_; // Interrupts per second
+ time_t start_time_; // Start time, or 0
// Add "pc -> count" to eviction buffer
void Evict(const Entry& entry);
@@ -226,19 +234,15 @@ ProfileData::ProfileData() :
evict_(0),
num_evicted_(0),
out_(-1),
- profile_all_(false),
count_(0),
evictions_(0),
total_bytes_(0),
fname_(0),
- frequency_(0) {
+ frequency_(0),
+ start_time_(0) {
- PCALL(pthread_mutex_init(&lock_, NULL));
- PCALL(pthread_mutex_init(&flush_lock_, NULL));
-
- if (getenv("PROFILESELECTED") == NULL) {
- profile_all_ = true;
- }
+ PCALL(pthread_mutex_init(&state_lock_, NULL));
+ PCALL(pthread_mutex_init(&table_lock_, NULL));
// Get frequency of interrupts (if specified)
char junk;
@@ -251,7 +255,12 @@ ProfileData::ProfileData() :
frequency_ = kDefaultFrequency;
}
- // Should profiling be enabled?
+ // Ignore signals until we decide to turn profiling on
+ SetHandler(SIG_IGN);
+
+ ProfilerRegisterThread();
+
+ // Should profiling be enabled automatically at start?
char* cpuprofile = getenv("CPUPROFILE");
if (!cpuprofile || cpuprofile[0] == '\0') {
return;
@@ -294,10 +303,10 @@ ProfileData::ProfileData() :
}
bool ProfileData::Start(const char* fname) {
- LOCK(&lock_);
+ LOCK(&state_lock_);
if (enabled()) {
// profiling is already enabled
- UNLOCK(&lock_);
+ UNLOCK(&state_lock_);
return false;
}
@@ -305,19 +314,23 @@ bool ProfileData::Start(const char* fname) {
int fd = open(fname, O_CREAT | O_WRONLY | O_TRUNC, 0666);
if (fd < 0) {
// Can't open outfile for write
- UNLOCK(&lock_);
+ UNLOCK(&state_lock_);
return false;
}
+
+ start_time_ = time(NULL);
+ fname_ = strdup(fname);
+
+ LOCK(&table_lock_);
// Reset counters
num_evicted_ = 0;
count_ = 0;
evictions_ = 0;
total_bytes_ = 0;
- // But leave profile_all_ and frequency_ alone (i.e., ProfilerStart()
- // doesn't affect their values originally set in the constructor)
+ // But leave frequency_ alone (i.e., ProfilerStart() doesn't affect
+ // their values originally set in the constructor)
- fname_ = strdup(fname);
out_ = fd;
hash_ = new Bucket[kBuckets];
@@ -331,13 +344,12 @@ bool ProfileData::Start(const char* fname) {
evict_[num_evicted_++] = 1000000 / frequency_; // Period (microseconds)
evict_[num_evicted_++] = 0; // Padding
+ UNLOCK(&table_lock_);
+
// Setup handler for SIGPROF interrupts
SetHandler((void (*)(int)) prof_handler);
- // Start profiling on this thread if automatic profiling is on
- ProfilerRegisterThread();
-
- UNLOCK(&lock_);
+ UNLOCK(&state_lock_);
return true;
}
@@ -348,15 +360,18 @@ ProfileData::~ProfileData() {
// Stop profiling and write out any collected profile data
void ProfileData::Stop() {
+ LOCK(&state_lock_);
+
// Prevent handler from running anymore
SetHandler(SIG_IGN);
// This lock prevents interference with signal handlers in other threads
- LOCK(&lock_);
+ LOCK(&table_lock_);
if (out_ < 0) {
// Profiling is not enabled
- UNLOCK(&lock_);
+ UNLOCK(&table_lock_);
+ UNLOCK(&state_lock_);
return;
}
@@ -401,15 +416,35 @@ void ProfileData::Stop() {
evict_ = 0;
free(fname_);
fname_ = 0;
+ start_time_ = 0;
out_ = -1;
- UNLOCK(&lock_);
+ UNLOCK(&table_lock_);
+ UNLOCK(&state_lock_);
+}
+
+void ProfileData::GetCurrentState(ProfilerState* state) {
+ LOCK(&state_lock_);
+ if (enabled()) {
+ state->enabled = true;
+ state->start_time = start_time_;
+ state->samples_gathered = count_;
+ int buf_size = sizeof(state->profile_name);
+ strncpy(state->profile_name, fname_, buf_size);
+ state->profile_name[buf_size-1] = '\0';
+ } else {
+ state->enabled = false;
+ state->start_time = 0;
+ state->samples_gathered = 0;
+ state->profile_name[0] = '\0';
+ }
+ UNLOCK(&state_lock_);
}
void ProfileData::SetHandler(void (*handler)(int)) {
struct sigaction sa;
sa.sa_handler = handler;
- sa.sa_flags = 0;
+ sa.sa_flags = SA_RESTART;
sigemptyset(&sa.sa_mask);
if (sigaction(SIGPROF, &sa, NULL) != 0) {
perror("sigaction(SIGPROF)");
@@ -423,9 +458,9 @@ void ProfileData::FlushTable() {
return;
}
- LOCK(&flush_lock_); {
+ LOCK(&state_lock_); {
SetHandler(SIG_IGN); // Disable timer interrupts while we're flushing
- LOCK(&lock_); {
+ LOCK(&table_lock_); {
// Move data from hash table to eviction buffer
for (int b = 0; b < kBuckets; b++) {
Bucket* bucket = &hash_[b];
@@ -440,9 +475,9 @@ void ProfileData::FlushTable() {
// Write out all pending data
FlushEvicted();
- } UNLOCK(&lock_);
+ } UNLOCK(&table_lock_);
SetHandler((void (*)(int)) prof_handler);
- } UNLOCK(&flush_lock_);
+ } UNLOCK(&state_lock_);
}
// Record the specified "pc" in the profile data
@@ -456,12 +491,12 @@ void ProfileData::Add(unsigned long pc) {
// Make hash-value
Slot h = 0;
for (int i = 0; i < depth; i++) {
- Slot pc = reinterpret_cast<Slot>(stack[i]);
+ Slot slot = reinterpret_cast<Slot>(stack[i]);
h = (h << 8) | (h >> (8*(sizeof(h)-1)));
- h += (pc * 31) + (pc * 7) + (pc * 3);
+ h += (slot * 31) + (slot * 7) + (slot * 3);
}
- LOCK(&lock_);
+ LOCK(&table_lock_);
count_++;
// See if table already has an entry for this stack trace
@@ -505,7 +540,7 @@ void ProfileData::Add(unsigned long pc) {
e->stack[i] = reinterpret_cast<Slot>(stack[i]);
}
}
- UNLOCK(&lock_);
+ UNLOCK(&table_lock_);
}
// Write all evicted data to the profile file
@@ -538,49 +573,29 @@ void ProfileData::prof_handler(int sig, SigStructure sig_structure) {
errno = saved_errno;
}
-// Start interval timer for the current thread
-void ProfilerEnable() {
- // Generate periodic interrupts
- if (pdata.enabled()) {
- // TODO: Randomize the initial interrupt value?
- // TODO: Randmize the inter-interrupt period on every interrupt?
- struct itimerval timer;
- timer.it_interval.tv_sec = 0;
- timer.it_interval.tv_usec = 1000000 / pdata.frequency();
- timer.it_value = timer.it_interval;
- setitimer(ITIMER_PROF, &timer, 0);
- }
-}
-
-static void ProfilerTurnOffIntervalTimer() {
+// Start interval timer for the current thread. We do this for
+// every known thread. If profiling is off, the generated signals
+// are ignored, otherwise they are captured by prof_handler().
+void ProfilerRegisterThread() {
+ // TODO: Randomize the initial interrupt value?
+ // TODO: Randomize the inter-interrupt period on every interrupt?
struct itimerval timer;
timer.it_interval.tv_sec = 0;
- timer.it_interval.tv_usec = 0;
+ timer.it_interval.tv_usec = 1000000 / pdata.frequency();
timer.it_value = timer.it_interval;
setitimer(ITIMER_PROF, &timer, 0);
}
-// Stop interval timer for the current thread
-void ProfilerDisable() {
- if (pdata.enabled()) {
- ProfilerTurnOffIntervalTimer();
- }
-}
+// DEPRECATED routines
+void ProfilerEnable() { }
+void ProfilerDisable() { }
void ProfilerFlush() {
- if (pdata.enabled()) {
- pdata.FlushTable();
- }
-}
-
-void ProfilerRegisterThread() {
- if (pdata.profile_all()) {
- ProfilerEnable();
- }
+ pdata.FlushTable();
}
bool ProfilingIsEnabledForAllThreads() {
- return pdata.profile_all();
+ return pdata.enabled();
}
bool ProfilerStart(const char* fname) {
@@ -591,24 +606,10 @@ void ProfilerStop() {
pdata.Stop();
}
-
-ProfilerThreadState::ProfilerThreadState() {
- was_enabled_ = pdata.profile_all();
+void ProfilerGetCurrentState(ProfilerState* state) {
+ pdata.GetCurrentState(state);
}
-void ProfilerThreadState::ThreadCheck() {
- bool is_enabled = pdata.profile_all();
- if (was_enabled_ != is_enabled) {
- if (is_enabled) {
- LOG("Enabling profiling in thread");
- ProfilerRegisterThread();
- } else {
- LOG("Profiling disabled in thread");
- ProfilerTurnOffIntervalTimer();
- }
- was_enabled_ = is_enabled;
- }
-}
REGISTER_MODULE_INITIALIZER(profiler, {
if (!FLAGS_cpu_profile.empty()) {
diff --git a/src/stacktrace.cc b/src/stacktrace.cc
index dcc8bc3..708d6ce 100644
--- a/src/stacktrace.cc
+++ b/src/stacktrace.cc
@@ -46,6 +46,26 @@
#include <stdint.h> // for uintptr_t
+// Given a pointer to a stack frame, locate and return the calling
+// stackframe, or return NULL if no stackframe can be found. Perform
+// sanity checks to reduce the chance that a bad pointer is returned.
+static void **NextStackFrame(void **old_sp) {
+ void **new_sp = (void **) *old_sp;
+
+ // Check that the transition from frame pointer old_sp to frame
+ // pointer new_sp isn't clearly bogus
+ if (new_sp <= old_sp) return NULL;
+ if ((uintptr_t)new_sp & (sizeof(void *) - 1)) return NULL;
+#ifdef __i386__
+ // On 64-bit machines, the stack pointer can be very close to
+ // 0xffffffff, so we explicitly check for a pointer into the
+ // last two pages in the address space
+ if ((uintptr_t)new_sp >= 0xffffe000) return NULL;
+#endif
+ if ((uintptr_t)new_sp - (uintptr_t)old_sp > 100000) return NULL;
+ return new_sp;
+}
+
// Note: the code for GetStackExtent below is pretty similar to this one;
// change both if chaning one.
int GetStackTrace(void** result, int max_depth, int skip_count) {
@@ -68,18 +88,18 @@ int GetStackTrace(void** result, int max_depth, int skip_count) {
int n = 0;
skip_count++; // Do not include the "GetStackTrace" frame
while (sp && n < max_depth) {
+ if (*(sp+1) == (void *)0) {
+ // In 64-bit code, we often see a frame that
+ // points to itself and has a return address of 0.
+ break;
+ }
if (skip_count > 0) {
skip_count--;
} else {
result[n++] = *(sp+1);
}
- void** new_sp = (void**) *sp;
-
- // A little bit of sanity checking to avoid crashes
- if (new_sp < sp ||
- (uintptr_t)new_sp - (uintptr_t)sp > 100000) {
- break;
- }
+ void** new_sp = NextStackFrame(sp);
+ if (!new_sp) break;
sp = new_sp;
}
return n;
@@ -112,16 +132,14 @@ bool GetStackExtent(void* sp, void** stack_top, void** stack_bottom) {
}
while (cur_sp) {
- void** new_sp = (void**)*cur_sp;
- // A little bit of sanity checking to avoid crashes
- if (new_sp < cur_sp ||
- (uintptr_t)new_sp - (uintptr_t)cur_sp > 100000) {
+ void** new_sp = NextStackFrame(cur_sp);
+ if (!new_sp) {
*stack_bottom = (void*)cur_sp;
return true;
}
cur_sp = new_sp;
if (*stack_top == NULL) *stack_top = (void*)cur_sp;
- // get out of the stack frame for this call
+ // get out of the stack frame for this call
}
return false;
}
diff --git a/src/system-alloc.cc b/src/system-alloc.cc
index 40f0046..dee2490 100644
--- a/src/system-alloc.cc
+++ b/src/system-alloc.cc
@@ -83,6 +83,10 @@ DEFINE_int32(malloc_devmem_limit, 0,
#ifdef HAVE_SBRK
static void* TrySbrk(size_t size, size_t alignment) {
+ // sbrk will release memory if passed a negative number, so we do
+ // a strict check here
+ if (static_cast<ptrdiff_t>(size + alignment) < 0) return NULL;
+
size = ((size + alignment - 1) / alignment) * alignment;
void* result = sbrk(size);
if (result == reinterpret_cast<void*>(-1)) {
@@ -131,6 +135,11 @@ static void* TryMmap(size_t size, size_t alignment) {
if (alignment > pagesize) {
extra = alignment - pagesize;
}
+
+ // Note: size + extra does not overflow since:
+ // size + alignment < (1<<NBITS).
+ // and extra <= alignment
+ // therefore size + extra < (1<<NBITS)
void* result = mmap(NULL, size + extra,
PROT_READ|PROT_WRITE,
MAP_PRIVATE|MAP_ANONYMOUS,
@@ -200,10 +209,16 @@ static void* TryDevMem(size_t size, size_t alignment) {
}
// check to see if we have any memory left
- if (physmem_limit != 0 && physmem_base + size + extra > physmem_limit) {
+ if (physmem_limit != 0 &&
+ ((size + extra) > (physmem_limit - physmem_base))) {
devmem_failure = true;
return NULL;
}
+
+ // Note: size + extra does not overflow since:
+ // size + alignment < (1<<NBITS).
+ // and extra <= alignment
+ // therefore size + extra < (1<<NBITS)
void *result = mmap(0, size + extra, PROT_WRITE|PROT_READ,
MAP_SHARED, physmem_fd, physmem_base);
if (result == reinterpret_cast<void*>(MAP_FAILED)) {
@@ -233,6 +248,9 @@ static void* TryDevMem(size_t size, size_t alignment) {
}
void* TCMalloc_SystemAlloc(size_t size, size_t alignment) {
+ // Discard requests that overflow
+ if (size + alignment < size) return NULL;
+
if (TCMallocDebug::level >= TCMallocDebug::kVerbose) {
MESSAGE("TCMalloc_SystemAlloc(%" PRIuS ", %" PRIuS")\n",
size, alignment);
diff --git a/src/tcmalloc.cc b/src/tcmalloc.cc
index 748f8f5..2eb9ef4 100644
--- a/src/tcmalloc.cc
+++ b/src/tcmalloc.cc
@@ -110,6 +110,10 @@ static const size_t kAlignShift = 3;
static const size_t kAlignment = 1 << kAlignShift;
static const size_t kNumClasses = 170;
+// Allocates a big block of memory for the pagemap once we reach more than
+// 128MB
+static const size_t kPageMapBigAllocationThreshold = 128 << 20;
+
// Minimum number of pages to fetch from system at a time. Must be
// significantly bigger than kBlockSize to amortize system-call
// overhead, and also to reduce external fragementation. Also, we
@@ -167,12 +171,12 @@ static size_t class_to_size[kNumClasses];
static size_t class_to_pages[kNumClasses];
// Return floor(log2(n)) for n > 0.
-#if defined __i386__ && defined __GNUC__
+#if (defined __i386__ || defined __x86_64__) && defined __GNUC__
static inline int LgFloor(size_t n) {
// "ro" for the input spec means the input can come from either a
// register ("r") or offsetable memory ("o").
- int result;
- __asm__("bsrl %1, %0"
+ size_t result;
+ __asm__("bsr %1, %0"
: "=r" (result) // Output spec
: "ro" (n) // Input spec
: "cc" // Clobbers condition-codes
@@ -307,7 +311,7 @@ template <class T>
class PageHeapAllocator {
private:
// How much to allocate from system at a time
- static const int kAllocIncrement = 32 << 10;
+ static const int kAllocIncrement = 128 << 10;
// Aligned size of T
static const size_t kAlignedSize
@@ -330,6 +334,8 @@ class PageHeapAllocator {
free_area_ = NULL;
free_avail_ = 0;
free_list_ = NULL;
+ // Reserve some space at the beginning to avoid fragmentation.
+ Delete(New());
}
T* New() {
@@ -517,6 +523,12 @@ struct StackTrace {
static PageHeapAllocator<StackTrace> stacktrace_allocator;
static Span sampled_objects;
+// Linked list of stack traces recorded every time we allocated memory
+// from the system. Useful for finding allocation sites that cause
+// increase in the footprint of the system. The linked list pointer
+// is stored in trace->stack[kMaxStackDepth-1].
+static StackTrace* growth_stacks = NULL;
+
// -------------------------------------------------------------------------
// Map from page-id to per-page data
// -------------------------------------------------------------------------
@@ -548,6 +560,8 @@ class TCMalloc_PageHeap {
TCMalloc_PageHeap();
// Allocate a run of "n" pages. Returns zero if out of memory.
+ // Caller should not pass "n == 0" -- instead, n should have
+ // been rounded up already.
Span* New(Length n);
// Delete the span "[p, p+n-1]".
@@ -635,10 +649,12 @@ TCMalloc_PageHeap::TCMalloc_PageHeap() : pagemap_(MetaDataAlloc),
Span* TCMalloc_PageHeap::New(Length n) {
ASSERT(Check());
- if (n == 0) n = 1;
+
+ // n==0 occurs iff pages() overflowed when we added kPageSize-1 to n
+ if (n == 0) return NULL;
// Find first size >= n that has a non-empty list
- for (int s = n; s < kMaxPages; s++) {
+ for (Length s = n; s < kMaxPages; s++) {
if (!DLL_IsEmpty(&free_[s])) {
Span* result = free_[s].next;
Carve(result, n);
@@ -815,6 +831,14 @@ void TCMalloc_PageHeap::Dump(TCMalloc_Printer* out) {
(cumulative << kPageShift) / 1048576.0);
}
+static void RecordGrowth(size_t growth) {
+ StackTrace* t = stacktrace_allocator.New();
+ t->depth = GetStackTrace(t->stack, kMaxStackDepth-1, 4);
+ t->size = growth;
+ t->stack[kMaxStackDepth-1] = reinterpret_cast<void*>(growth_stacks);
+ growth_stacks = t;
+}
+
bool TCMalloc_PageHeap::GrowHeap(Length n) {
ASSERT(kMaxPages >= kMinSystemAlloc);
Length ask = (n>kMinSystemAlloc) ? n : static_cast<Length>(kMinSystemAlloc);
@@ -827,10 +851,22 @@ bool TCMalloc_PageHeap::GrowHeap(Length n) {
}
if (ptr == NULL) return false;
}
+ RecordGrowth(ask << kPageShift);
+
+ uint64_t old_system_bytes = system_bytes_;
system_bytes_ += (ask << kPageShift);
const PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift;
ASSERT(p > 0);
+ // If we have already a lot of pages allocated, just pre allocate a bunch of
+ // memory for the page map. This prevents fragmentation by pagemap metadata
+ // when a program keeps allocating and freeing large blocks.
+
+ if (old_system_bytes < kPageMapBigAllocationThreshold
+ && system_bytes_ >= kPageMapBigAllocationThreshold) {
+ pagemap_.PreallocateMoreMemory();
+ }
+
// Make sure pagemap_ has entries for all of the new pages.
// Plus ensure one before and one after so coalescing code
// does not need bounds-checking.
@@ -928,7 +964,7 @@ class TCMalloc_ThreadCache {
size_t size_; // Combined size of data
pthread_t tid_; // Which thread owns it
- bool setspecific_; // Called pthread_setspecific?
+ bool in_setspecific_; // In call to pthread_setspecific?
FreeList list_[kNumClasses]; // Array indexed by size-class
// We sample allocations, biased by the size of the allocation
@@ -1193,7 +1229,7 @@ void TCMalloc_ThreadCache::Init(pthread_t tid) {
next_ = NULL;
prev_ = NULL;
tid_ = tid;
- setspecific_ = false;
+ in_setspecific_ = false;
for (size_t cl = 0; cl < kNumClasses; ++cl) {
list_[cl].Init();
}
@@ -1409,12 +1445,13 @@ void* TCMalloc_ThreadCache::CreateCacheIfNecessary() {
}
// We call pthread_setspecific() outside the lock because it may
- // call malloc() recursively. The recursive call will never get
- // here again because it will find the already allocated heap in the
- // linked list of heaps.
- if (!heap->setspecific_ && tsd_inited) {
- heap->setspecific_ = true;
+ // call malloc() recursively. We check for the recursive call using
+ // the "in_setspecific_" flag so that we can avoid calling
+ // pthread_setspecific() if we are already inside pthread_setspecific().
+ if (!heap->in_setspecific_ && tsd_inited) {
+ heap->in_setspecific_ = true;
perftools_pthread_setspecific(heap_key, heap);
+ heap->in_setspecific_ = false;
}
return heap;
}
@@ -1600,6 +1637,50 @@ static void** DumpStackTraces() {
return result;
}
+static void** DumpHeapGrowthStackTraces() {
+ // Count how much space we need
+ int needed_slots = 0;
+ {
+ SpinLockHolder h(&pageheap_lock);
+ for (StackTrace* t = growth_stacks;
+ t != NULL;
+ t = reinterpret_cast<StackTrace*>(t->stack[kMaxStackDepth-1])) {
+ needed_slots += 3 + t->depth;
+ }
+ needed_slots += 100; // Slop in case list grows
+ needed_slots += needed_slots/8; // An extra 12.5% slop
+ }
+
+ void** result = new void*[needed_slots];
+ if (result == NULL) {
+ MESSAGE("tcmalloc: could not allocate %d slots for stack traces\n",
+ needed_slots);
+ return NULL;
+ }
+
+ SpinLockHolder h(&pageheap_lock);
+ int used_slots = 0;
+ for (StackTrace* t = growth_stacks;
+ t != NULL;
+ t = reinterpret_cast<StackTrace*>(t->stack[kMaxStackDepth-1])) {
+ ASSERT(used_slots < needed_slots); // Need to leave room for terminator
+ if (used_slots + 3 + t->depth >= needed_slots) {
+ // No more room
+ break;
+ }
+
+ result[used_slots+0] = reinterpret_cast<void*>(1);
+ result[used_slots+1] = reinterpret_cast<void*>(t->size);
+ result[used_slots+2] = reinterpret_cast<void*>(t->depth);
+ for (int d = 0; d < t->depth; d++) {
+ result[used_slots+3+d] = t->stack[d];
+ }
+ used_slots += 3 + t->depth;
+ }
+ result[used_slots] = reinterpret_cast<void*>(0);
+ return result;
+}
+
// TCMalloc's support for extra malloc interfaces
class TCMallocImplementation : public MallocExtension {
public:
@@ -1619,6 +1700,10 @@ class TCMallocImplementation : public MallocExtension {
return DumpStackTraces();
}
+ virtual void** ReadHeapGrowthStackTraces() {
+ return DumpHeapGrowthStackTraces();
+ }
+
virtual bool GetNumericProperty(const char* name, size_t* value) {
ASSERT(name != NULL);
@@ -1681,15 +1766,6 @@ class TCMallocImplementation : public MallocExtension {
}
};
-// RedHat 9's pthread manager allocates an object directly by calling
-// a __libc_XXX() routine. This memory block is not known to tcmalloc.
-// At cleanup time, the pthread manager calls free() on this
-// pointer, which then crashes.
-//
-// We hack around this problem by disabling all deallocations
-// after a global object destructor in this module has been called.
-static bool tcmalloc_is_destroyed = false;
-
//-------------------------------------------------------------------
// Helpers for the exported routines below
//-------------------------------------------------------------------
@@ -1744,22 +1820,11 @@ static inline void* do_malloc(size_t size) {
static inline void do_free(void* ptr) {
if (TCMallocDebug::level >= TCMallocDebug::kVerbose)
MESSAGE("In tcmalloc do_free(%p)\n", ptr);
- if (ptr == NULL || tcmalloc_is_destroyed) return;
+ if (ptr == NULL) return;
ASSERT(pageheap != NULL); // Should not call free() before malloc()
const PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift;
Span* span = pageheap->GetDescriptor(p);
- if (span == NULL) {
- // We've seen systems where a piece of memory allocated using the
- // allocator built in to libc is deallocated using free() and
- // therefore ends up inside tcmalloc which can't find the
- // corresponding span. We silently throw this object on the floor
- // instead of crashing.
- MESSAGE("tcmalloc: ignoring potential glibc-2.3.5 induced free "
- "of an unknown object %p\n", ptr);
- return;
- }
-
ASSERT(span != NULL);
ASSERT(!span->free);
const size_t cl = span->sizeclass;
@@ -1796,6 +1861,8 @@ static inline void do_free(void* ptr) {
static void* do_memalign(size_t align, size_t size) {
ASSERT((align & (align - 1)) == 0);
ASSERT(align > 0);
+ if (size + align < size) return NULL; // Overflow
+
if (pageheap == NULL) TCMalloc_ThreadCache::InitModule();
// Allocate at least one byte to avoid boundary conditions below
@@ -1920,11 +1987,15 @@ extern "C" void free(void* ptr) {
}
extern "C" void* calloc(size_t n, size_t elem_size) {
- void* result = do_malloc(n * elem_size);
+ // Overflow check
+ const size_t size = n * elem_size;
+ if (elem_size != 0 && size / elem_size != n) return NULL;
+
+ void* result = do_malloc(size);
if (result != NULL) {
- memset(result, 0, n * elem_size);
+ memset(result, 0, size);
}
- MallocHook::InvokeNewHook(result, n * elem_size);
+ MallocHook::InvokeNewHook(result, size);
return result;
}
@@ -2118,3 +2189,17 @@ extern "C" {
}
#endif
}
+
+// Override __libc_memalign in libc on linux boxes specially.
+// They have a bug in libc that causes them to (very rarely) allocate
+// with __libc_memalign() yet deallocate with free() and the
+// definitions above don't catch it.
+// This function is an exception to the rule of calling MallocHook method
+// from the stack frame of the allocation function;
+// heap-checker handles this special case explicitly.
+static void *MemalignOverride(size_t align, size_t size, const void *caller) {
+ void* result = do_memalign(align, size);
+ MallocHook::InvokeNewHook(result, size);
+ return result;
+}
+void *(*__memalign_hook)(size_t, size_t, const void *) = MemalignOverride;
diff --git a/src/tests/tcmalloc_large_unittest.cc b/src/tests/tcmalloc_large_unittest.cc
new file mode 100644
index 0000000..fd9f885
--- /dev/null
+++ b/src/tests/tcmalloc_large_unittest.cc
@@ -0,0 +1,137 @@
+// Copyright (c) 2005, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// ---
+// Author: Michael Chastain
+//
+// This is a unit test for large allocations in malloc and friends.
+// "Large" means "so large that they overflow the address space".
+// For 32 bits, this means allocations near 2^32 bytes and 2^31 bytes.
+// For 64 bits, this means allocations near 2^64 bytes and 2^63 bytes.
+
+#include <stddef.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <set>
+
+#define CHECK(b) do { \
+ if (b) {} else { fprintf(stderr, "TEST FAILED: " #b); exit(1); } \
+} while (0)
+
+// Alloc a size that should always fail.
+
+void TryAllocExpectFail(size_t size) {
+ void* p1 = malloc(size);
+ CHECK(p1 == NULL);
+
+ void* p2 = malloc(1);
+ CHECK(p2 != NULL);
+
+ void* p3 = realloc(p2, size);
+ CHECK(p3 == NULL);
+
+ free(p2);
+}
+
+// Alloc a size that might work and might fail.
+// If it does work, touch some pages.
+
+void TryAllocMightFail(size_t size) {
+ unsigned char* p = static_cast<unsigned char*>(malloc(size));
+ if ( p != NULL ) {
+ unsigned char volatile* vp = p; // prevent optimizations
+ static const size_t kPoints = 1024;
+
+ for ( size_t i = 0; i < kPoints; ++i ) {
+ vp[i * (size / kPoints)] = static_cast<unsigned char>(i);
+ }
+
+ for ( size_t i = 0; i < kPoints; ++i ) {
+ CHECK(vp[i * (size / kPoints)] == static_cast<unsigned char>(i));
+ }
+
+ vp[size-1] = 'M';
+ CHECK(vp[size-1] == 'M');
+ }
+
+ free(p);
+}
+
+int main (int argc, char** argv) {
+ // Allocate some 0-byte objects. They better be unique.
+ // 0 bytes is not large but it exercises some paths related to
+ // large-allocation code.
+ {
+ static const int kZeroTimes = 1024;
+ printf("Test malloc(0) x %d\n", kZeroTimes);
+ std::set<char*> p_set;
+ for ( int i = 0; i < kZeroTimes; ++i ) {
+ char* p = new char;
+ CHECK(p != NULL);
+ CHECK(p_set.find(p) == p_set.end());
+ p_set.insert(p_set.end(), p);
+ }
+ // Just leak the memory.
+ }
+
+ // Grab some memory so that some later allocations are guaranteed to fail.
+ printf("Test small malloc\n");
+ void* p_small = malloc(4*1048576);
+ CHECK(p_small != NULL);
+
+ // Test sizes up near the maximum size_t.
+ // These allocations test the wrap-around code.
+ printf("Test malloc(0 - N)\n");
+ const size_t zero = 0;
+ static const size_t kMinusNTimes = 16384;
+ for ( size_t i = 1; i < kMinusNTimes; ++i ) {
+ TryAllocExpectFail(zero - i);
+ }
+
+ // Test sizes a bit smaller.
+ // The small malloc above guarantees that all these return NULL.
+ printf("Test malloc(0 - 1048576 - N)\n");
+ static const size_t kMinusMBMinusNTimes = 16384;
+ for ( size_t i = 0; i < kMinusMBMinusNTimes; ++i) {
+ TryAllocExpectFail(zero - 1048576 - i);
+ }
+
+ // Test sizes at half of size_t.
+ // These might or might not fail to allocate.
+ printf("Test malloc(max/2 +- N)\n");
+ static const size_t kHalfPlusMinusTimes = 64;
+ const size_t half = (zero - 2) / 2 + 1;
+ for ( size_t i = 0; i < kHalfPlusMinusTimes; ++i) {
+ TryAllocMightFail(half - i);
+ TryAllocMightFail(half + i);
+ }
+
+ printf("PASS\n");
+ return 0;
+}
diff --git a/src/tests/tcmalloc_unittest.cc b/src/tests/tcmalloc_unittest.cc
index 91da8b3..22a6c29 100644
--- a/src/tests/tcmalloc_unittest.cc
+++ b/src/tests/tcmalloc_unittest.cc
@@ -35,13 +35,12 @@
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
+#include <stdint.h>
#include "google/malloc_extension.h"
#define BUFSIZE (100 << 10)
int main(int argc, char **argv) {
-
-
char *buf1 = (char *)malloc(BUFSIZE);
memset(buf1, 0, BUFSIZE);
printf("Allocated buf1 via malloc() at %p\n", buf1);