diff options
author | Mark Benvenuto <mark.benvenuto@mongodb.com> | 2016-04-12 08:41:55 -0400 |
---|---|---|
committer | Mark Benvenuto <mark.benvenuto@mongodb.com> | 2016-04-12 08:41:55 -0400 |
commit | 53a594d4a683a523d9223229bcf3c7ad3ef8b103 (patch) | |
tree | dd44726aeab63fa0394a4f18099c9b4b32de1a9a | |
parent | ccb7a4d80f9243a917b9a6182bcc9243f61d3057 (diff) | |
download | mongo-53a594d4a683a523d9223229bcf3c7ad3ef8b103.tar.gz |
SERVER-17788 GPerfTools 2.5
179 files changed, 59336 insertions, 0 deletions
diff --git a/src/third_party/gperftools-2.5/AUTHORS b/src/third_party/gperftools-2.5/AUTHORS new file mode 100644 index 00000000000..3995ed4cf57 --- /dev/null +++ b/src/third_party/gperftools-2.5/AUTHORS @@ -0,0 +1,2 @@ +google-perftools@googlegroups.com + diff --git a/src/third_party/gperftools-2.5/COPYING b/src/third_party/gperftools-2.5/COPYING new file mode 100644 index 00000000000..e4956cfd9fd --- /dev/null +++ b/src/third_party/gperftools-2.5/COPYING @@ -0,0 +1,28 @@ +Copyright (c) 2005, Google Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/src/third_party/gperftools-2.5/ChangeLog b/src/third_party/gperftools-2.5/ChangeLog new file mode 100644 index 00000000000..4b334beaade --- /dev/null +++ b/src/third_party/gperftools-2.5/ChangeLog @@ -0,0 +1,646 @@ +Fri Feb 03 15:40:45 2012 Google Inc. <google-perftools@googlegroups.com> + + * gperftools: version 2.0 + * Renamed the project from google-perftools to gperftools (csilvers) + * Renamed the .deb/.rpm packagse from google-perftools to gperftools too + * Renamed include directory from google/ to gperftools/ (csilvers) + * Changed the 'official' perftools email in setup.py/etc + * Renamed google-perftools.sln to gperftools.sln + * PORTING: Removed bash-isms & grep -q in heap-checker-death_unittest.sh + * Changed copyright text to reflect Google's relinquished ownership + +Tue Jan 31 10:43:50 2012 Google Inc. <opensource@google.com> + + * google-perftools: version 1.10 release + * PORTING: Support for patching assembly on win x86_64! (scott.fr...) + * PORTING: Work around atexit-execution-order bug on freebsd (csilvers) + * PORTING: Patch _calloc_crt for windows (roger orr) + * PORTING: Add C++11 compatibility method for stl allocator (jdennett) + * PORTING: use MADV_FREE, not MADV_DONTNEED, on freebsd (csilvers) + * PORTING: Don't use SYS_open when not supported on solaris (csilvers) + * PORTING: Do not assume uname() returns 0 on success (csilvers) + * LSS: Improved ARM support in linux-syscall-support (dougkwan) + * LSS: Get rid of unused syscalls in linux-syscall-support (csilvers) + * LSS: Fix broken mmap wrapping for ppc (markus) + * LSS: Emit .cfi_adjust_cfa_offset when appropriate (ppluzhnikov) + * LSS: Be more accurate in register use in __asm__ (markus) + * LSS: Fix __asm__ calls to compile under clang (chandlerc) + * LSS: Fix ARM inline assembly bug around r7 and swi (lcwu) + * No longer log when an allocator fails (csilvers) + * void* -> const void* for MallocExtension methods (llib) + * Improve HEAP_PROFILE_MMAP and fix bugs with it (dmikurube) + * Replace int-based abs with more correct fabs in a test (pmurin) + +Thu Dec 22 16:22:45 2011 Google Inc. <opensource@google.com> + + * google-perftools: version 1.9 release + * Lightweight check for double-frees (blount) + * BUGFIX: Fix pprof to exit properly if run with no args (dagitses) + * Suggest ASan as a way to diagnose buggy code (ppluzhnikov) + * Get rid of unused CACHELINE_SIZE (csilvers) + * Replace atexit() calls with global dtors; helps freebsd (csilvers) + * Disable heap-checker under AddressSanitizer (kcc) + * Fix bug in powerpc stacktracing (ppluzhnikov) + * PERF: Use exponential backoff waiting for spinlocks (m3b) + * Fix 64-bit nm on 32-bit binaries in pprof (csilvers) + * Add ProfileHandlerDisallowForever (rsc) + * BUGFIX: Shell escape when forking in pprof (csilvers) + * No longer combine overloaded functions in pprof (csilvers) + * Fix address-normalizing bug in pprof (csilvers) + * More consistently call abort() instead of exit() on failure (csilvers) + * Allow NoGlobalLeaks to be safely called more than once (csilvers) + * PORTING/BUGFIX: Fix ARM cycleclock to use volatile asm (dougkwan) + * PORTING: 64-bit atomic ops for ARMv7 (dougkwan) + * PORTING: Implement stacktrace for ARM (dougkwan) + * PORTING: Fix malloc_hook_mmap_linux for ARM (dougkwan) + * PORTING: Update linux_syscall_support.h for ARM/etc (evannier, sanek) + * PORTING: Fix freebsd to work on x86_64 (chapp...@gmail.com) + * PORTING: Added additional SYS_mmap fixes for FreeBSD (chappedm) + * PORTING: Allow us to compile on OS X 10.6 and run on 10.5 (raltherr) + * PORTING: Check for mingw compilers that *do* define timespec + * PORTING: Add "support" for MIPS cycletimer + * PORTING: Fix fallback cycle-timer to work with Now (dougkwan) + * PERF: Move stack trace collecting out of the mutex (taylorc) + * PERF: Get the deallocation stack trace outside the mutex (sean) + * Make PageHeap dynamically allocated for leak checks (maxim) + * BUGFIX: Fix probing of nm -f behavior in pprof (dpeng) + * BUGFIX: Fix a race with the CentralFreeList lock before main (sanjay) + * Support /pprof/censusprofile url arguments (rajatjain) + * Change IgnoreObject to return its argument (nlewycky) + * Update malloc-hook files to support more CPUs + * BUGFIX: write our own strstr to avoid libc problems (csilvers) + * Use simple callgrind compression facility in pprof + * Print an error message when we can't run pprof to symbolize (csilvers) + * Die in configure when g++ is't installed (csilvers) + * DOC: Beef up the documentation a bit about using libunwind (csilvers) + +Fri Aug 26 13:29:25 2011 Google Inc. <opensource@google.com> + + * google-perftools: version 1.8.3 release + * Added back the 'pthreads unsafe early' #define, needed for FreeBSD + +Thu Aug 11 15:01:47 2011 Google Inc. <opensource@google.com> + + * google-perftools: version 1.8.2 release + * Fixed calculation of patchlevel, 'make check' should all pass again + +Tue Jul 26 20:57:51 2011 Google Inc. <opensource@google.com> + + * google-perftools: version 1.8.1 release + * Added an #include to fix compile breakage on latest gcc's + * Removed an extra , in the configure.ac script + +Fri Jul 15 16:10:51 2011 Google Inc. <opensource@google.com> + + * google-perftools: version 1.8 release + * PORTING: (Disabled) support for patching mmap on freebsd (chapp...) + * PORTING: Support volatile __malloc_hook for glibc 2.14 (csilvers) + * PORTING: Use _asm rdtsc and __rdtsc to get cycleclock in windows (koda) + * PORTING: Fix fd vs. HANDLE compiler error on cygwin (csilvers) + * PORTING: Do not test memalign or double-linking on OS X (csilvers) + * PORTING: Actually enable TLS on windows (jontra) + * PORTING: Some work to compile under Native Client (krasin) + * PORTING: deal with pthread_once w/o -pthread on freebsd (csilvers) + * Rearrange libc-overriding to make it easier to port (csilvers) + * Display source locations in pprof disassembly (sanjay) + * BUGFIX: Actually initialize allocator name (mec) + * BUGFIX: Keep track of 'overhead' bytes in malloc reporting (csilvers) + * Allow ignoring one object twice in the leak checker (glider) + * BUGFIX: top10 in pprof should print 10 lines, not 11 (rsc) + * Refactor vdso source files (tipp) + * Some documentation cleanups + * Document MAX_TOTAL_THREAD_CACHE_SIZE <= 1Gb (nsethi) + * Add MallocExtension::GetOwnership(ptr) (csilvers) + * BUGFIX: We were leaving out a needed $(top_srcdir) in the Makefile + * PORTING: Support getting argv0 on OS X + * Add 'weblist' command to pprof: like 'list' but html (sanjay) + * Improve source listing in pprof (sanjay) + * Cap cache sizes to reduce fragmentation (ruemmler) + * Improve performance by capping or increasing sizes (ruemmler) + * Add M{,un}mapReplacmenet hooks into MallocHook (ribrdb) + * Refactored system allocator logic (gangren) + * Include cleanups (csilvers) + * Add TCMALLOC_SMALL_BUT_SLOW support (ruemmler) + * Clarify that tcmalloc stats are MiB (robinson) + * Remove support for non-tcmalloc debugallocation (blount) + * Add a new test: malloc_hook_test (csilvers) + * Change the configure script to be more crosstool-friendly (mcgrathr) + * PORTING: leading-underscore changes to support win64 (csilvers) + * Improve debugallocation tc_malloc_size (csilvers) + * Extend atomicops.h and cyceclock to use ARM V6+ optimized code (sanek) + * Change malloc-hook to use a list-like structure (llib) + * Add flag to use MAP_PRIVATE in memfs_malloc (gangren) + * Windows support for pprof: nul and /usr/bin/file (csilvers) + * TESTING: add test on strdup to tcmalloc_test (csilvers) + * Augment heap-checker to deal with no-inode maps (csilvers) + * Count .dll/.dylib as shared libs in heap-checker (csilvers) + * Disable sys_futex for arm; it's not always reliable (sanek) + * PORTING: change lots of windows/port.h macros to functions + * BUGFIX: Generate correct version# in tcmalloc.h on windows (csilvers) + * PORTING: Some casting to make solaris happier about types (csilvers) + * TESTING: Disable debugallocation_test in 'minimal' mode (csilvers) + * Rewrite debugallocation to be more modular (csilvers) + * Don't try to run the heap-checker under valgrind (ppluzhnikov) + * BUGFIX: Make focused stat %'s relative, not absolute (sanjay) + * BUGFIX: Don't use '//' comments in a C file (csilvers) + * Quiet new-gcc compiler warnings via -Wno-unused-result, etc (csilvers) + +Fri Feb 04 15:54:31 2011 Google Inc. <opensource@google.com> + + * google-perftools: version 1.7 release + * Reduce page map key size under x86_64 by 4.4MB (rus) + * Remove a flaky malloc-extension test (fdabek) + * Improve the performance of PageHeap::New (ond..., csilvers) + * Improve sampling_test with no-inline additions/etc (fdabek) + * 16-byte align debug allocs (jyasskin) + * Change FillProcSelfMaps to detect out-of-buffer-space (csilvers) + * Document the need for sampling to use GetHeapSample (csilvers) + * Try to read TSC frequency from tsc_freq_khs (adurbin) + * Do better at figuring out if tests are running under gdb (ppluzhnikov) + * Improve spinlock contention performance (ruemmler) + * Better internal-function list for pprof's /contention (ruemmler) + * Speed up GoogleOnce (m3b) + * Limit number of incoming/outgoing edges in pprof (sanjay) + * Add pprof --evince to go along with --gv (csilvers) + * Document the various ways to get heap-profiling information (csilvers) + * Separate out synchronization profiling routines (ruemmler) + * Improve malloc-stats output to be more understandable (csilvers) + * Add support for census profiler in pporf (nabeelmian) + * Document how pprof's /symbol must support GET requests (csilvers) + * Improve acx_pthread.m4 (ssuomi, liujisi) + * Speed up pprof's ExtractSymbols (csilvers) + * Ignore some known-leaky (java) libraries in the heap checker (davidyu) + * Make kHideMask use all 64 bits in tests (ppluzhnikov) + * Clean up pprof input-file handling (csilvers) + * BUGFIX: Don't crash if __environ is NULL (csilvers) + * BUGFIX: Fix totally broken debugallocation tests (csilvers) + * BUGFIX: Fix up fake_VDSO handling for unittest (ppluzhnikov) + * BUGFIX: Suppress all large allocs when report threshold is 0 (lexie) + * BUGFIX: mmap2 on i386 takes an off_t, not off64_t (csilvers) + * PORTING: Add missing PERFTOOLS_DLL_DECL (csilvers) + * PORTING: Add stddef.h to make newer gcc's happy (csilvers) + * PORTING: Document some tricks for working under OS X (csilvers) + * PORTING: Don't try to check valgrind for windows (csilvers) + * PORTING: Make array-size a var to compile under clang (chandlerc) + * PORTING: No longer hook _aligned_malloc and _aligned_free (csilvers) + * PORTING: Quiet some gcc warnings (csilvers) + * PORTING: Replace %PRIxPTR with %p to be more portable (csilvers) + * PORTING: Support systems that capitalize /proc weirdly (sanek) + * PORTING: Treat arm3 the same as arm5t in cycletimer (csilvers) + * PORTING: Update windows logging to not allocate memory (csilvers) + * PORTING: avoid double-patching newer windows DLLs (roger.orr) + * PORTING: get dynamic_annotations.c to work on windows (csilvers) + * Add pkg-config .pc files for the 5 libraries we produce (csilvers) + * Added proper libtool versioning, so this lib will be 0.1.0 (csilvers) + * Moved from autoconf 2.64 to 2.65 + +Thu Aug 5 12:48:03 PDT 2010 Google Inc. <opensource@google.com> + + * google-perftools: version 1.6 release + * Add tc_malloc_usable_size for compatibility with glibc (csilvers) + * Override malloc_usable_size with tc_malloc_usable_size (csilvers) + * Default to no automatic heap sampling in tcmalloc (csilvers) + * Add -DTCMALLOC_LARGE_PAGES, a possibly faster tcmalloc (rus) + * Make some functions extern "C" to avoid false ODR warnings (jyasskin) + * pprof: Add SVG-based output (rsc) + * pprof: Extend pprof --tools to allow per-tool configs (csilvers) + * pprof: Improve support of 64-bit and big-endian profiles (csilvers) + * pprof: Add interactive callgrind suport (weidenri...) + * pprof: Improve address->function mapping a bit (dpeng) + * Better detection of when we're running under valgrind (csilvers) + * Better CPU-speed detection under valgrind (saito) + * Use, and recommend, -fno-builtin-malloc when compiling (csilvers) + * Avoid false-sharing of memory between caches (bmaurer) + * BUGFIX: Fix heap sampling to use correct alloc size (bmauer) + * BUGFIX: Avoid gcc 4.0.x bug by making hook-clearing atomic (csilvers) + * BUGFIX: Avoid gcc 4.5.x optimization bug (csilvers) + * BUGFIX: Work around deps-determining bug in libtool 1.5.26 (csilvers) + * BUGFIX: Fixed test to use HAVE_PTHREAD, not HAVE_PTHREADS (csilvers) + * BUGFIX: Fix tls callback behavior on windows when using wpo (wtc) + * BUGFIX: properly align allocation sizes on Windows (antonm) + * BUGFIX: Fix prototypes for tcmalloc/debugalloc wrt throw() (csilvers) + * DOC: Updated heap-checker doc to match reality better (fischman) + * DOC: Document ProfilerFlush, ProfilerStartWithOptions (csilvers) + * DOC: Update docs for heap-profiler functions (csilvers) + * DOC: Clean up documentation around tcmalloc.slack_bytes (fikes) + * DOC: Renamed README.windows to README_windows.txt (csilvers) + * DOC: Update the NEWS file to be non-empty (csilvers) + * PORTING: Fix windows addr2line and nm with proper rc code (csilvers) + * PORTING: Add CycleClock and atomicops support for arm 5 (sanek) + * PORTING: Improve PC finding on cygwin and redhat 7 (csilvers) + * PORTING: speed up function-patching under windows (csilvers) + +Tue Jan 19 14:46:12 2010 Google Inc. <opensource@google.com> + + * google-perftools: version 1.5 release + * Add tc_set_new_mode (willchan) + * Make memalign functions + realloc respect tc_set_new_mode (willchan) + * Add ReleaseToSystem(num_bytes) (kash) + * Handle zero-length symbols a bit better in pprof (csilvers) + * Prefer __environ to /proc/self/environ in cpu profiler (csilvers) + * Add HEAP_CHECK_MAX_LEAKS flag to control #leaks to report (glider) + * Add two new numeric pageheap properties to MallocExtension (fikes) + * Print alloc size when mmap fails (hakon) + * Add ITIMER_REAL support to cpu profiler (csilvers, nabeelmian) + * Speed up symbolizer in heap-checker reporting (glider) + * Speed up futexes with FUTEX_PRIVATE_FLAG (m3b) + * Speed up tcmalloc but doing better span coalescing (sanjay) + * Better support for different wget's and addr2maps in pprof (csilvres) + * Implement a nothrow version of delete and delete[] (csilvers) + * BUGFIX: fix a race on module_libcs[i] in windows patching (csilvers) + * BUGFIX: Fix debugallocation to call cpp_alloc for new (willchan) + * BUGFIX: A simple bugfix for --raw mode (mrabkin) + * BUGFIX: Fix C shims to actually be valid C (csilvers) + * BUGFIX: Fix recursively-unmapped-region accounting (ppluzhnikov) + * BUGFIX: better distinguish real and fake vdso (ppluzhnikov) + * WINDOWS: replace debugmodule with more reliable psai (andrey) + * PORTING: Add .bundle as another shared library extension (csilvers) + * PORTING: Fixed a typo bug in the ocnfigure PRIxx m4 macro (csilvers) + * PORTING: Augment sysinfo to work on 64-bit OS X (csilvers) + * PORTING: Use sys/ucontext.h to fix compiing on OS X 10.6 (csilvers) + * PORTING: Fix sysinfo libname reporting for solaris x86 (jeffrey) + * PORTING: Use libunwind for i386 when using --omitfp (ppluzhnikov) + +Thu Sep 10 13:51:15 2009 Google Inc. <opensource@google.com> + + * google-perftools: version 1.4 release + * Add debugallocation library, to catch memory leaks, stomping, etc + * Add --raw mode to allow for delayed processing of pprof files + * Use less memory when reading CPU profiles + * New environment variables to control kernel-allocs (sbrk, memfs, etc) + * Add MarkThreadBusy(): performance improvement + * Remove static thread-cache-size code; all is dynamic now + * Add new HiddenPointer class to heap checker + * BUGFIX: pvalloc(0) allocates now (found by new debugalloc library) + * BUGFIX: valloc test (not implementation) no longer overruns memory + * BUGFIX: GetHeapProfile no longer deadlocks + * BUGFIX: Support unmapping memory regions before main + * BUGFIX: Fix some malloc-stats formatting + * BUGFIX: Don't crash as often when freeing libc-allocated memory + * BUGFIX: Deal better with incorrect PPROF_PATH when symbolizing + * BUGFIX: weaken new/delete/etc in addition to malloc/free/etc + * BUGFIX: Fix return value of GetAllocatedSize + * PORTING: Fix mmap-#define problem on some 64-bit systems + * PORTING: Call ranlib again (some OS X versions need it) + * PORTING: Fix a leak when building with LLVM + * PORTING: Remove some unneeded bash-ishs from testing scripts + * WINDOWS: Support library unloading as well as loading + * WINDOWS/BUGFIX: Set page to 'xrw' instead of 'rw' when patching + +Tue Jun 9 18:19:06 2009 Google Inc. <opensource@google.com> + + * google-perftools: version 1.3 release + * Provide our own name for memory functions: tc_malloc, etc (csilvers) + * Weaken memory-alloc functions so user can override them (csilvers) + * Remove meaningless delete(nothrow) and delete[](nothrow) (csilvers) + * BUILD: replace clever libtcmalloc/profiler.a with a new .a (csilvers) + * PORTING: improve windows port by using google spinlocks (csilvers) + * PORTING: Fix RedHat 9 memory allocation in heapchecker (csilvers) + * PORTING: Rename OS_WINDOWS macro to PLATFORM_WINDOWS (mbelshe) + * PORTING/BUGFIX: Make sure we don't clobber GetLastError (mbelshe) + * BUGFIX: get rid of useless data for callgrind (weidenrinde) + * BUGFIX: Modify windows patching to deadlock sometimes (csilvers) + * BUGFIX: an improved fix for hook handling during fork (csilvers) + * BUGFIX: revamp profiler_unittest.sh, which was very broken (csilvers) + +Fri Apr 17 16:40:48 2009 Google Inc. <opensource@google.com> + + * google-perftools: version 1.2 release + * Allow large_alloc_threshold=0 to turn it off entirely (csilvers) + * Die more helpfully when out of memory for internal data (csilvers) + * Refactor profile-data gathering, add a new unittest (cgd, nabeelmian) + * BUGFIX: fix rounding errors with static thread-size caches (addi) + * BUGFIX: disable hooks better when forking in leak-checker (csilvers) + * BUGFIX: fix realloc of crt pointers on windows (csilvers) + * BUGFIX: do a better job of finding binaries in .sh tests (csilvers) + * WINDOWS: allow overriding malloc/etc instead of patching (mbelshe) + * PORTING: fix compilation error in a ppc-specific file (csilvers) + * PORTING: deal with quirks in cygwin's /proc/self/maps (csilvers) + * PORTING: use 'A' version of functions for ascii input (mbelshe) + * PORTING: generate .so's on cygwin and mingw (ajenjo) + * PORTING: disable profiler methods on cygwin (jperkins) + * Updated autoconf version to 2.61 and libtool version to 1.5.26 + +Wed Mar 11 11:25:34 2009 Google Inc. <opensource@google.com> + + * google-perftools: version 1.1 release + * Dynamically resize thread caches -- nice perf. improvement (kash) + * Add VDSO support to give better stacktraces in linux (ppluzhnikov) + * Improve heap-profiling sampling algorithm (ford) + * Rewrite leak-checking code: should be faster and more robust (sanjay) + * Use ps2 instead of ps for dot: better page cropping for gv (csilvers) + * Disable malloc-failure warning messages by default (csilvers) + * Update config/Makefile to disable tests on a per-OS basis (csilvers) + * PORTING: Get perftools compiling under MSVC 7.1 again (csilvers) + * PORTING: Get perftools compiling under cygwin again (csilvers) + * PORTING: automatically set library flags for solaris x86 (csilvers) + * Add TCMALLOC_SKIP_SBRK to mirror TCMALLOC_SKIP_MMAP (csilvers) + * Add --enable flags to allow selective building (csilvers) + * Put addr2line-pdb and nm-pdb in proper output directory (csilvers) + * Remove deprecated DisableChecksIn (sanjay) + * DOCUMENTATION: Document most MallocExtension routines (csilvers) + +Tue Jan 6 13:58:56 2009 Google Inc. <opensource@google.com> + + * google-perftools: version 1.0 release + * Exactly the same as 1.0rc2 + +Sun Dec 14 17:10:35 2008 Google Inc. <opensource@google.com> + + * google-perftools: version 1.0rc2 release + * Fix compile error on 64-bit systems (casting ptr to int) (csilvers) + +Thu Dec 11 16:01:32 2008 Google Inc. <opensource@google.com> + + * google-perftools: version 1.0rc1 release + * Replace API for selectively disabling heap-checker in code (sanjay) + * Add a pre-mmap hook (daven, adlr) + * Add MallocExtension interface to set memory-releasing rate (fikes) + * Augment pprof to allow any string ending in /pprof/profile (csilvers) + * PORTING: Rewrite -- and fix -- malloc patching for windows (dvitek) + * PORTING: Add nm-pdb and addr2line-pdb for use by pprof (dvitek) + * PORTING: Improve cygwin and mingw support (jperkins, csilvers) + * PORTING: Fix pprof for mac os x, other pprof improvements (csilvers) + * PORTING: Fix some PPC bugs in our locking code (anton.blanchard) + * A new unittest, smapling_test, to verify tcmalloc-profiles (csilvers) + * Turn off TLS for gcc < 4.1.2, due to a TLS + -fPIC bug (csilvers) + * Prefer __builtin_frame_address to assembly for stacktraces (nlewycky) + * Separate tcmalloc.cc out into multiple files -- finally! (kash) + * Make our locking code work with -fPIC on 32-bit x86 (aruns) + * Fix an initialization-ordering bug for tcmalloc/profiling (csilvers) + * Use "initial exec" model of TLS to speed up tcmalloc (csilvers) + * Enforce 16-byte alignment for tcmalloc, for SSE (sanjay) + +Tue Sep 23 08:56:31 2008 Google Inc. <opensource@google.com> + + * google-perftools: version 0.99.2 release + * COMPILE FIX: add #include needed for FreeBSD and OS X (csilvers) + +Sat Sep 20 09:37:18 2008 Google Inc. <opensource@google.com> + + * google-perftools: version 0.99.1 release + * BUG FIX: look for nm, etc in /usr/bin, not /usr/crosstool (csilvers) + +Thu Sep 18 16:00:27 2008 Google Inc. <opensource@google.com> + + * google-perftools: version 0.99 release + * Add IsHeapProfileRunning (csilvers) + * Add C shims for some of the C++ header files (csilvers) + * Fix heap profile file clean-up logic (maxim) + * Rename linuxthreads.c to .cc for better compiler support (csilvers) + * Add source info to disassembly in pprof (sanjay) + * Use open instead of fopen to avoid memory alloc (csilvers) + * Disable malloc extensions when running under valgrind (kcc) + * BUG FIX: Fix out-of-bound error by reordering a check (larryz) + * Add Options struct to ProfileData (cgd) + * Correct PC-handling of --base in pprof (csilvers) + * Handle 1 function occurring twice in an image (sanjay) + * Improve stack-data cleaning (maxim) + * Use 'struct Foo' to make header C compatible (csilvers) + * Add 'total' line to pprof --text (csilvers) + * Pre-allocate buffer for heap-profiler to avoid OOM errors (csilvers) + * Allow a few more env-settings to control tcmalloc (csilvers) + * Document some of the issues involving thread-local storage (csilvers) + * BUG FIX: Define strtoll and friends for windows (csilvers) + +Mon Jun 9 16:47:03 2008 Google Inc. <opensource@google.com> + + * google-perftools: version 0.98 release + * Add ProfilerStartWithOptions() (cgd) + * Change tcmalloc_minimal to not do any stack-tracing at all (csilvers) + * Prefer mmap to sbrk for 64-buit debug mode (sanjay) + * Fix accounting for some tcmalloc stats (sanjay) + * Use setrlimit() to keep unittests from killing the machine (odo) + * Fix a bug when sbrk-ing near address 4G (csilvers) + * Make MallocHook thread-safe (jyasskin) + * Fix windows build for MemoryBarrier (jyasskin) + * Fix CPU-profiler docs to mention correct libs (csilvers) + * Fix for GetHeapProfile() when heap-profiling is off (maxim) + * Avoid realloc resizing ping-pongs using hysteresis (csilvers) + * Add --callgrind output support to pprof (klimek) + * Fix profiler.h and heap-profiler.h to be C-compatible (csilvers) + * Break malloc_hook.h into two parts to reduce dependencies (csilvers) + * Better handle systems that don't implement mmap (csilvers) + * PORTING: disable system_alloc_unittest for msvc (csilvers) + * PORTING: Makefile tweaks to build better on cygwin (csilvers) + +Mon Apr 21 15:20:52 2008 Google Inc. <opensource@google.com> + + * google-perftools: version 0.97 release + * Refactor GetHeapProfile to avoid using malloc (maxim) + * Fix heap-checker and heap-profiler hook interactions (maxim) + * Fix a data race in MemoryRegionMap::Lock (jyasskin) + * Improve thread-safety of leak checker (maxim) + * Fix mmap profile to no longer deadlock (maxim) + * Fix rpm to have devel package depend on non-devel (csilvers) + * PORTING: Fix clock-speed detection for Mac OS X (csilvers) + +Tue Mar 18 14:30:44 2008 Google Inc. <opensource@google.com> + + * google-perftools: version 0.96 release + * major atomicops rewrite; fixed atomic ops code for linux/ppc (vchen) + * nix the stacktrace library; now build structure is simpler (csilvers) + * Speed up heap-checker, and reduce extraneous logging (maxim) + * Improve itimer code for NPTL case (cgd) + * Add source code annotations for use by valgrind, etc (kcc) + * PORTING: Fix high resolution timers for Mac OS X (adlr) + +Tue Feb 19 12:01:31 2008 Google Inc. <opensource@google.com> + + * google-perftools: version 0.95.1 release (bugfix release) + * x86_64 compile-fix: nix pread64 and pwrite64 (csilvers) + * more heap-checker debug logging (maxim) + * minor improvement to x86_64 CycleClock (gpike) + +Tue Feb 12 12:28:32 2008 Google Inc. <opensource@google.com> + + * google-perftools: version 0.95 release + * Better -- not perfect -- support for linux-ppc (csilvers) + * Fix race condition in libunwind stacktrace (aruns) + * Speed up x86 spinlock locking (m3b) + * Improve heap-checker performance (maxim) + * Heap checker traverses more ptrs inside heap-alloced objects (maxim) + * Remove deprecated ProfilerThreadState function (cgd) + * Update libunwind documentation for statically linked binaries (aruns) + +Mon Dec 3 23:51:54 2007 Google Inc. <opensource@google.com> + + * google-perftools: version 0.94.1 release (bugfix release) + * Fix missing #includes for x86_64 compile using libunwind (csilvers) + +Thu Nov 29 07:59:43 2007 Google Inc. <opensource@google.com> + + * google-perftools: version 0.94 release + * PORTING: MinGW/Msys support -- runs same code as MSVC does (csilvers) + * PORTING: Add NumCPUs support for Mac OS X (csilvers) + * Work around a sscanf bug in glibc(?) (waldemar) + * Fix Windows MSVC bug triggered by thread deletion (csilvers) + * Fix bug that triggers in MSVC /O2: missing volatile (gpike) + * March-of-time support: quiet warnings/errors for gcc 4.2, OS X 10.5 + * Modify pprof so it works without nm: useful for windows (csilvers) + * pprof: Support filtering for CPU profiles (cgd) + * Bugfix: have realloc report to hooks in all situations (maxim) + * Speed improvement: replace slow memcpy with std::copy (soren) + * Speed: better iterator efficiency in RecordRegionRemoval (soren) + * Speed: minor speed improvements via better bitfield alignment (gpike) + * Documentation: add documentation of binary profile output (cgd) + +Fri Aug 17 12:32:56 2007 Google Inc. <opensource@google.com> + + * google-perftools: version 0.93 release + * PORTING: everything compiles on Solaris, OS X, FreeBSD (see INSTALL) + * PORTING: cpu-profiler works on most platforms (much better GetPC()) + * PORTING: heap-profiler works on most platforms + * PORTING: improved windows support, including release builds + * No longer build or run ptmalloc tests by default + * Add support for using memfs filesystem to allocate memory in linux + * WINDOWS: give debug library and release library different names + +Tue Jul 17 22:26:27 2007 Google Inc. <opensource@google.com> + + * google-perftools: version 0.92 release + * PERFORMANCE: use a packed cache to speed up tcmalloc + * PORTING: preliminary windows support! (see README.windows) + * PORTING: better support for solaris, OS X, FreeBSD (see INSTALL) + * Envvar support for running the heap-checker under gdb + * Add weak declarations to maybe_threads to fix no-pthreads compile bugs + * Some 64bit fixes, especially with pprof + * Better heap-checker support for some low-level allocations + * Fix bug where heap-profiles would sometimes get truncated + * New documentation about how to handle common heap leak situations + * Use computed includes for hash_map/set: easier config + * Added all used .m4 templates to the distribution + +Wed Apr 18 16:43:55 2007 Google Inc. <opensource@google.com> + + * google-perftools: version 0.91 release + * Brown-paper-bag bugfix: compilation error on some x86-64 machines + +Fri Apr 13 14:50:51 2007 Google Inc. <opensource@google.com> + + * google-perftools: version 0.90 release + * (As the version-number jump hints, this is a major new release: + almost every piece of functionality was rewritten. I can't do + justice to all the changes, but will concentrate on highlights.) + *** USER-VISIBLE CHANGES: + * Ability to "release" unused memory added to tcmalloc + * Exposed more tweaking knobs via environment variables (see docs) + * pprof tries harder to map addresses to functions + * tcmalloc_minimal compiles and runs on FreeBSD 6.0 and Solaris 10 + *** INTERNAL CHANGES: + * Much better 64-bit support + * Better multiple-processor support (e.g. multicore contention tweaks) + * Support for recent kernel ABI changes (e.g. new arg to mremap) + * Addition of spinlocks to tcmalloc to reduce contention cost + * Speed up tcmalloc by using __thread on systems that support TLS + * Total redesign of heap-checker to improve liveness checking + * More portable stack-frame analysis -- no more hard-coded constants! + * Disentangled heap-profiler code and heap-checker code + * Several new unittests to test, e.g., thread-contention costs + * Lots of small (but important!) bug fixes: e.g., fixing GetPC on amd64 + *** KNOWN PROBLEMS: + * CPU-profiling may crash on x86_64 (64-bit) systems. See the README + * Profiling/heap-checking may deadlock on x86_64 systems. See README + +Wed Jun 14 15:11:14 2006 Google Inc. <opensource@google.com> + + * google-perftools: version 0.8 release + * Experimental support for remote profiling added to pprof (many) + * Fixed race condition in ProfileData::FlushTable (etune) + * Better support for weird /proc maps (maxim, mec) + * Fix heap-checker interaction with gdb (markus) + * Better 64-bit support in pprof (aruns) + * Reduce scavenging cost in tcmalloc by capping NumMoveSize (sanjay) + * Cast syscall(SYS_mmap); works on more 64-bit systems now (menage) + * Document the text output of pprof! (csilvers) + * Better compiler support for no-THREADS and for old compilers (csilvers) + * Make libunwind the default stack unwinder for x86-64 (aruns) + * Somehow the COPYING file got erased. Regenerate it (csilvers) + +Thu Apr 13 20:59:09 2006 Google Inc. <opensource@google.com> + + * google-perftools: version 0.7 release + * Major rewrite of thread introspection for new kernels (markus) + * Major rewrite of heap-checker to use new thread tools (maxim) + * Add proper support for following data in thread registers (maxim) + * Syscall support for older kernels, including _syscall6 (markus) + * Support PIC mode (markus, mbland, iant) + * Better support for running in non-threaded contexts (csilvers) + +Fri Jan 27 14:04:27 2006 Google Inc. <opensource@google.com> + + * google-perftools: version 0.6 release + * More sophisticated stacktrace usage, possibly using libunwind (aruns) + * Update pprof to handle 64-bit profiles (dehnert) + * Fix GetStackTrace to correctly return top stackframe (sanjay) + * Add ANSI compliance for new and new[], including new_handler (jkearney) + * More accuracy by reading ELF files directly rather than objdump (mec) + * Add readline support for pprof (addi) + * Add #includes for PPC (csilvers) + * New PC-detection routine for ibook powerpc (asbestoshead) + * Vastly improved tcmalloc unittest (csilvers) + * Move documentation from /usr/doc to /usr/share/doc + +Mon Nov 14 17:28:59 2005 Google Inc. <opensource@google.com> + + * google-perftools: version 0.5 release + * Add va_start/va_end calls around vsnprintf() (csilvers) + * Write our own __syscall_return(), since it's not defined + consistently on all 64-bit linux distros (markus) + +Wed Oct 26 15:19:16 2005 Google Inc. <opensource@google.com> + + * google-perftools: version 0.4 release + * Decrease fragmentation in tcmalloc (lefevere) + * Support for ARM in some of the thread-specific code (markus) + * Turn off heap-checker for statically-linked binaries, which + cause error leak reports now (etune) + * Many pprof improvements, including a command-line interface (jeff) + * CPU profiling now automatically affects all threads in linux 2.6. + (Kernel bugs break CPU profiling and threads in linux 2.4 a bit.) + ProfilerEnable() and ProfilerDisable() are deprecated. (sanjay) + * tcmalloc now correctly intercepts memalign (m3b, maxim) + * Syntax fix: added missing va_end()s. Helps non-gcc compiling (etune) + * Fixed a few coredumper bugs: race condition after PTRACE_DETACH, + ignore non-aligned stackframe pointers (markus, menage) + * 64-bit cleanup, especially for spinlock code (etune) and mmap (sanjay) + * Better support for finding threads in linux (markus) + * tcmalloc now tracks those stack traces that allocate memory (sanjay) + * Work around a weird setspecific problem (sanjay) + * Fix tcmalloc overflow problems when an alloc is close to 2G/4G (sanjay) + +Fri Jun 24 18:02:26 2005 Google Inc. <opensource@google.com> + + * google-perftools: version 0.3 release + * Add missing errno include for one of the unittests (csilvers) + * Reduce tcmalloc startup memory from 5M to 256K (sanjay) + * Add support for mallopt() and mallinfo (sanjay) + * Improve stacktrace's performance on some 64-bit systems (etune) + * Improve the stacktrace unittest (etune) + +Tue May 31 08:14:38 2005 Google Inc. <opensource@google.com> + + * google-perftools: version 0.2 release + * Use mmap2() instead of mmap(), to map more memory (menage) + * Do correct pthread-local checking in heap-checker! (maxim) + * Avoid overflow on 64-bit machines in pprof (sanjay) + * Add a few more GetPC() functions, including for AMD (csilvers) + * Better method for overriding pthread functions (menage) + * (Hacky) fix to avoid overwriting profile files after fork() (csilvers) + * Crashing bugfix involving dumping heaps on small-stack threads (tudor) + * Allow library versions with letters at the end (csilvers) + * Config fixes for systems that don't define PATH_MAX (csilvers) + * Confix fixes so we no longer need config.h after install (csilvers) + * Fix to pprof to correctly read very big cpu profiles (csilvers) + * Fix to pprof to deal with new commandline flags in modern gv's + * Better error reporting when we can't access /proc/maps (etune) + * Get rid of the libc-preallocate code (which could crash on some + systems); no longer needed with local-threads fix (csilvers) + +Tue Feb 8 09:57:17 2005 Google Inc. <opensource@google.com> + + * google-perftools: initial release: + The google-perftools package contains some utilities to improve + and analyze the performance of C++ programs. This includes an + optimized thread-caching malloc() and cpu and heap profiling + utilities. diff --git a/src/third_party/gperftools-2.5/INSTALL b/src/third_party/gperftools-2.5/INSTALL new file mode 100644 index 00000000000..f9a6a117289 --- /dev/null +++ b/src/third_party/gperftools-2.5/INSTALL @@ -0,0 +1,563 @@ +Copyright 1994, 1995, 1996, 1999, 2000, 2001, 2002 Free Software +Foundation, Inc. + + This file is free documentation; the Free Software Foundation gives +unlimited permission to copy, distribute and modify it. + + +Perftools-Specific Install Notes +================================ + +*** Building from source repository + +As of 2.1 gperftools does not have configure and other autotools +products checked into it's source repository. This is common practice +for projects using autotools. + +NOTE: Source releases (.tar.gz that you download from +code.google.com/p/gperftools) still have all required files just as +before. Nothing has changed w.r.t. building from .tar.gz releases. + +But, in order to build gperftools checked out from subversion +repository you need to have autoconf, automake and libtool +installed. And before running ./configure you have to generate it (and +a bunch of other files) by running ./autogen.sh script. That script +will take care of calling correct autotools programs in correct order. + +If you're maintainer then it's business as usual too. Just run make +dist (or, preferably, make distcheck) and it'll produce .tar.gz or +.tar.bz2 with all autotools magic already included. So that users can +build our software without having autotools. + + +*** NOTE FOR 64-BIT LINUX SYSTEMS + +The glibc built-in stack-unwinder on 64-bit systems has some problems +with the perftools libraries. (In particular, the cpu/heap profiler +may be in the middle of malloc, holding some malloc-related locks when +they invoke the stack unwinder. The built-in stack unwinder may call +malloc recursively, which may require the thread to acquire a lock it +already holds: deadlock.) + +For that reason, if you use a 64-bit system, we strongly recommend you +install libunwind before trying to configure or install gperftools. +libunwind can be found at + + http://download.savannah.gnu.org/releases/libunwind/libunwind-0.99-beta.tar.gz + +Even if you already have libunwind installed, you should check the +version. Versions older than this will not work properly; too-new +versions introduce new code that does not work well with perftools +(because libunwind can call malloc, which will lead to deadlock). + +There have been reports of crashes with libunwind 0.99 (see +http://code.google.com/p/gperftools/issues/detail?id=374). +Alternately, you can use a more recent libunwind (e.g. 1.0.1) at the +cost of adding a bit of boilerplate to your code. For details, see +http://groups.google.com/group/google-perftools/msg/2686d9f24ac4365f + + CAUTION: if you install libunwind from the url above, be aware that + you may have trouble if you try to statically link your binary with + perftools: that is, if you link with 'gcc -static -lgcc_eh ...'. + This is because both libunwind and libgcc implement the same C++ + exception handling APIs, but they implement them differently on + some platforms. This is not likely to be a problem on ia64, but + may be on x86-64. + + Also, if you link binaries statically, make sure that you add + -Wl,--eh-frame-hdr to your linker options. This is required so that + libunwind can find the information generated by the compiler + required for stack unwinding. + + Using -static is rare, though, so unless you know this will affect + you it probably won't. + +If you cannot or do not wish to install libunwind, you can still try +to use the built-in stack unwinder. The built-in stack unwinder +requires that your application, the tcmalloc library, and system +libraries like libc, all be compiled with a frame pointer. This is +*not* the default for x86-64. + +If you are on x86-64 system, know that you have a set of system +libraries with frame-pointers enabled, and compile all your +applications with -fno-omit-frame-pointer, then you can enable the +built-in perftools stack unwinder by passing the +--enable-frame-pointers flag to configure. + +Even with the use of libunwind, there are still known problems with +stack unwinding on 64-bit systems, particularly x86-64. See the +"64-BIT ISSUES" section in README. + +If you encounter problems, try compiling perftools with './configure +--enable-frame-pointers'. Note you will need to compile your +application with frame pointers (via 'gcc -fno-omit-frame-pointer +...') in this case. + + +*** TCMALLOC LARGE PAGES: TRADING TIME FOR SPACE + +You can set a compiler directive that makes tcmalloc faster, at the +cost of using more space (due to internal fragmentation). + +Internally, tcmalloc divides its memory into "pages." The default +page size is chosen to minimize memory use by reducing fragmentation. +The cost is that keeping track of these pages can cost tcmalloc time. +We've added a new flag to tcmalloc that enables a larger page size. +In general, this will increase the memory needs of applications using +tcmalloc. However, in many cases it will speed up the applications +as well, particularly if they allocate and free a lot of memory. We've +seen average speedups of 3-5% on Google applications. + +To build libtcmalloc with large pages you need to use the +--with-tcmalloc-pagesize=ARG configure flag, e.g.: + + ./configure <other flags> --with-tcmalloc-pagesize=32 + +The ARG argument can be 8, 32 or 64 which sets the internal page size to +8K, 32K and 64K repectively. The default is 8K. + + +*** SMALL TCMALLOC CACHES: TRADING SPACE FOR TIME + +You can set a compiler directive that makes tcmalloc use less memory +for overhead, at the cost of some time. + +Internally, tcmalloc keeps information about some of its internal data +structures in a cache. This speeds memory operations that need to +access this internal data. We've added a new, experimental flag to +tcmalloc that reduces the size of this cache, decresaing the memory +needs of applications using tcmalloc. + +This feature is still very experimental; it's not even a configure +flag yet. To build libtcmalloc with smaller internal caches, run + + ./configure <normal flags> CXXFLAGS=-DTCMALLOC_SMALL_BUT_SLOW + +(or add -DTCMALLOC_SMALL_BUT_SLOW to your existing CXXFLAGS argument). + + +*** NOTE FOR ___tls_get_addr ERROR + +When compiling perftools on some old systems, like RedHat 8, you may +get an error like this: + ___tls_get_addr: symbol not found + +This means that you have a system where some parts are updated enough +to support Thread Local Storage, but others are not. The perftools +configure script can't always detect this kind of case, leading to +that error. To fix it, just comment out the line + #define HAVE_TLS 1 +in your config.h file before building. + + +*** TCMALLOC AND DLOPEN + +To improve performance, we use the "initial exec" model of Thread +Local Storage in tcmalloc. The price for this is the library will not +work correctly if it is loaded via dlopen(). This should not be a +problem, since loading a malloc-replacement library via dlopen is +asking for trouble in any case: some data will be allocated with one +malloc, some with another. If, for some reason, you *do* need to use +dlopen on tcmalloc, the easiest way is to use a version of tcmalloc +with TLS turned off; see the ___tls_get_addr note above. + + +*** COMPILING ON NON-LINUX SYSTEMS + +Perftools has been tested on the following systems: + FreeBSD 6.0 (x86) + FreeBSD 8.1 (x86_64) + Linux CentOS 5.5 (x86_64) + Linux Debian 4.0 (PPC) + Linux Debian 5.0 (x86) + Linux Fedora Core 3 (x86) + Linux Fedora Core 4 (x86) + Linux Fedora Core 5 (x86) + Linux Fedora Core 6 (x86) + Linux Fedora Core 13 (x86_64) + Linux Fedora Core 14 (x86_64) + Linux RedHat 9 (x86) + Linux Slackware 13 (x86_64) + Linux Ubuntu 6.06.1 (x86) + Linux Ubuntu 6.06.1 (x86_64) + Linux Ubuntu 10.04 (x86) + Linux Ubuntu 10.10 (x86_64) + Mac OS X 10.3.9 (Panther) (PowerPC) + Mac OS X 10.4.8 (Tiger) (PowerPC) + Mac OS X 10.4.8 (Tiger) (x86) + Mac OS X 10.5 (Leopard) (x86) + Mac OS X 10.6 (Snow Leopard) (x86) + Solaris 10 (x86_64) + Windows XP, Visual Studio 2003 (VC++ 7.1) (x86) + Windows XP, Visual Studio 2005 (VC++ 8) (x86) + Windows XP, Visual Studio 2005 (VC++ 9) (x86) + Windows XP, Visual Studio 2005 (VC++ 10) (x86) + Windows XP, MinGW 5.1.3 (x86) + Windows XP, Cygwin 5.1 (x86) + +It works in its full generality on the Linux systems +tested (though see 64-bit notes above). Portions of perftools work on +the other systems. The basic memory-allocation library, +tcmalloc_minimal, works on all systems. The cpu-profiler also works +fairly widely. However, the heap-profiler and heap-checker are not +yet as widely supported. In general, the 'configure' script will +detect what OS you are building for, and only build the components +that work on that OS. + +Note that tcmalloc_minimal is perfectly usable as a malloc/new +replacement, so it is possible to use tcmalloc on all the systems +above, by linking in libtcmalloc_minimal. + +** FreeBSD: + + The following binaries build and run successfully (creating + libtcmalloc_minimal.so and libprofile.so in the process): + % ./configure + % make tcmalloc_minimal_unittest tcmalloc_minimal_large_unittest \ + addressmap_unittest atomicops_unittest frag_unittest \ + low_level_alloc_unittest markidle_unittest memalign_unittest \ + packed_cache_test stacktrace_unittest system_alloc_unittest \ + thread_dealloc_unittest profiler_unittest.sh + % ./tcmalloc_minimal_unittest # to run this test + % [etc] # to run other tests + + Three caveats: first, frag_unittest tries to allocate 400M of memory, + and if you have less virtual memory on your system, the test may + fail with a bad_alloc exception. + + Second, profiler_unittest.sh sometimes fails in the "fork" test. + This is because stray SIGPROF signals from the parent process are + making their way into the child process. (This may be a kernel + bug that only exists in older kernels.) The profiling code itself + is working fine. This only affects programs that call fork(); for + most programs, the cpu profiler is entirely safe to use. + + Third, perftools depends on /proc to get shared library + information. If you are running a FreeBSD system without proc, + perftools will not be able to map addresses to functions. Some + unittests will fail as a result. + + Finally, the new test introduced in perftools-1.2, + profile_handler_unittest, fails on FreeBSD. It has something to do + with how the itimer works. The cpu profiler test passes, so I + believe the functionality is correct and the issue is with the test + somehow. If anybody is an expert on itimers and SIGPROF in + FreeBSD, and would like to debug this, I'd be glad to hear the + results! + + libtcmalloc.so successfully builds, and the "advanced" tcmalloc + functionality all works except for the leak-checker, which has + Linux-specific code: + % make heap-profiler_unittest.sh maybe_threads_unittest.sh \ + tcmalloc_unittest tcmalloc_both_unittest \ + tcmalloc_large_unittest # THESE WORK + % make -k heap-checker_unittest.sh \ + heap-checker-death_unittest.sh # THESE DO NOT + + Note that unless you specify --enable-heap-checker explicitly, + 'make' will not build the heap-checker unittests on a FreeBSD + system. + + I have not tested other *BSD systems, but they are probably similar. + +** Mac OS X: + + I've tested OS X 10.5 [Leopard], OS X 10.4 [Tiger] and OS X 10.3 + [Panther] on both intel (x86) and PowerPC systems. For Panther + systems, perftools does not work at all: it depends on a header + file, OSAtomic.h, which is new in 10.4. (It's possible to get the + code working for Panther/i386 without too much work; if you're + interested in exploring this, drop an e-mail.) + + For the other seven systems, the binaries and libraries that + successfully build are exactly the same as for FreeBSD. See that + section for a list of binaries and instructions on building them. + + In addition, it appears OS X regularly fails profiler_unittest.sh + in the "thread" test (in addition to occassionally failing in the + "fork" test). It looks like OS X often delivers the profiling + signal to the main thread, even when it's sleeping, rather than + spawned threads that are doing actual work. If anyone knows + details of how OS X handles SIGPROF (via setitimer()) events with + threads, and has insight into this problem, please send mail to + google-perftools@googlegroups.com. + +** Solaris 10 x86: + + I've only tested using the GNU C++ compiler, not the Sun C++ + compiler. Using g++ requires setting the PATH appropriately when + configuring. + + % PATH=${PATH}:/usr/sfw/bin/:/usr/ccs/bin ./configure + % PATH=${PATH}:/usr/sfw/bin/:/usr/ccs/bin make [...] + + Again, the binaries and libraries that successfully build are + exactly the same as for FreeBSD. (However, while libprofiler.so can + be used to generate profiles, pprof is not very successful at + reading them -- necessary helper programs like nm don't seem + to be installed by default on Solaris, or perhaps are only + installed as part of the Sun C++ compiler package.) See that + section for a list of binaries, and instructions on building them. + +** Windows (MSVC, Cygwin, and MinGW): + + Work on Windows is rather preliminary: only tcmalloc_minimal is + supported. + + We haven't found a good way to get stack traces in release mode on + windows (that is, when FPO is enabled), so the heap profiling may + not be reliable in that case. Also, heap-checking and CPU profiling + do not yet work at all. But as in other ports, the basic tcmalloc + library functionality, overriding malloc and new and such (and even + windows-specific functions like _aligned_malloc!), is working fine, + at least with VC++ 7.1 (Visual Studio 2003) through VC++ 10.0, + in both debug and release modes. See README.windows for + instructions on how to install on Windows using Visual Studio. + + Cygwin can compile some but not all of perftools. Furthermore, + there is a problem with exception-unwinding in cygwin (it can call + malloc, which can call the exception-unwinding-setup code, which + can lead to an infinite loop). I've comitted a workaround to the + exception unwinding problem, but it only works in debug mode and + when statically linking in tcmalloc. I hope to have a more proper + fix in a later release. To configure under cygwin, run + + ./configure --disable-shared CXXFLAGS=-g && make + + Most of cygwin will compile (cygwin doesn't allow weak symbols, so + the heap-checker and a few other pieces of functionality will not + compile). 'make' will compile those libraries and tests that can + be compiled. You can run 'make check' to make sure the basic + functionality is working. I've heard reports that some versions of + cygwin fail calls to pthread_join() with EINVAL, causing several + tests to fail. If you have any insight into this, please mail + google-perftools@googlegroups.com. + + This Windows functionality is also available using MinGW and Msys, + In this case, you can use the regular './configure && make' + process. 'make install' should also work. The Makefile will limit + itself to those libraries and binaries that work on windows. + + +Basic Installation +================== + + These are generic installation instructions. + + The `configure' shell script attempts to guess correct values for +various system-dependent variables used during compilation. It uses +those values to create a `Makefile' in each directory of the package. +It may also create one or more `.h' files containing system-dependent +definitions. Finally, it creates a shell script `config.status' that +you can run in the future to recreate the current configuration, and a +file `config.log' containing compiler output (useful mainly for +debugging `configure'). + + It can also use an optional file (typically called `config.cache' +and enabled with `--cache-file=config.cache' or simply `-C') that saves +the results of its tests to speed up reconfiguring. (Caching is +disabled by default to prevent problems with accidental use of stale +cache files.) + + If you need to do unusual things to compile the package, please try +to figure out how `configure' could check whether to do them, and mail +diffs or instructions to the address given in the `README' so they can +be considered for the next release. If you are using the cache, and at +some point `config.cache' contains results you don't want to keep, you +may remove or edit it. + + The file `configure.ac' (or `configure.in') is used to create +`configure' by a program called `autoconf'. You only need +`configure.ac' if you want to change it or regenerate `configure' using +a newer version of `autoconf'. + +The simplest way to compile this package is: + + 1. `cd' to the directory containing the package's source code and type + `./configure' to configure the package for your system. If you're + using `csh' on an old version of System V, you might need to type + `sh ./configure' instead to prevent `csh' from trying to execute + `configure' itself. + + Running `configure' takes awhile. While running, it prints some + messages telling which features it is checking for. + + 2. Type `make' to compile the package. + + 3. Optionally, type `make check' to run any self-tests that come with + the package. + + 4. Type `make install' to install the programs and any data files and + documentation. + + 5. You can remove the program binaries and object files from the + source code directory by typing `make clean'. To also remove the + files that `configure' created (so you can compile the package for + a different kind of computer), type `make distclean'. There is + also a `make maintainer-clean' target, but that is intended mainly + for the package's developers. If you use it, you may have to get + all sorts of other programs in order to regenerate files that came + with the distribution. + +Compilers and Options +===================== + + Some systems require unusual options for compilation or linking that +the `configure' script does not know about. Run `./configure --help' +for details on some of the pertinent environment variables. + + You can give `configure' initial values for configuration parameters +by setting variables in the command line or in the environment. Here +is an example: + + ./configure CC=c89 CFLAGS=-O2 LIBS=-lposix + + *Note Defining Variables::, for more details. + +Compiling For Multiple Architectures +==================================== + + You can compile the package for more than one kind of computer at the +same time, by placing the object files for each architecture in their +own directory. To do this, you must use a version of `make' that +supports the `VPATH' variable, such as GNU `make'. `cd' to the +directory where you want the object files and executables to go and run +the `configure' script. `configure' automatically checks for the +source code in the directory that `configure' is in and in `..'. + + If you have to use a `make' that does not support the `VPATH' +variable, you have to compile the package for one architecture at a +time in the source code directory. After you have installed the +package for one architecture, use `make distclean' before reconfiguring +for another architecture. + +Installation Names +================== + + By default, `make install' will install the package's files in +`/usr/local/bin', `/usr/local/man', etc. You can specify an +installation prefix other than `/usr/local' by giving `configure' the +option `--prefix=PATH'. + + You can specify separate installation prefixes for +architecture-specific files and architecture-independent files. If you +give `configure' the option `--exec-prefix=PATH', the package will use +PATH as the prefix for installing programs and libraries. +Documentation and other data files will still use the regular prefix. + + In addition, if you use an unusual directory layout you can give +options like `--bindir=PATH' to specify different values for particular +kinds of files. Run `configure --help' for a list of the directories +you can set and what kinds of files go in them. + + If the package supports it, you can cause programs to be installed +with an extra prefix or suffix on their names by giving `configure' the +option `--program-prefix=PREFIX' or `--program-suffix=SUFFIX'. + +Optional Features +================= + + Some packages pay attention to `--enable-FEATURE' options to +`configure', where FEATURE indicates an optional part of the package. +They may also pay attention to `--with-PACKAGE' options, where PACKAGE +is something like `gnu-as' or `x' (for the X Window System). The +`README' should mention any `--enable-' and `--with-' options that the +package recognizes. + + For packages that use the X Window System, `configure' can usually +find the X include and library files automatically, but if it doesn't, +you can use the `configure' options `--x-includes=DIR' and +`--x-libraries=DIR' to specify their locations. + +Specifying the System Type +========================== + + There may be some features `configure' cannot figure out +automatically, but needs to determine by the type of machine the package +will run on. Usually, assuming the package is built to be run on the +_same_ architectures, `configure' can figure that out, but if it prints +a message saying it cannot guess the machine type, give it the +`--build=TYPE' option. TYPE can either be a short name for the system +type, such as `sun4', or a canonical name which has the form: + + CPU-COMPANY-SYSTEM + +where SYSTEM can have one of these forms: + + OS KERNEL-OS + + See the file `config.sub' for the possible values of each field. If +`config.sub' isn't included in this package, then this package doesn't +need to know the machine type. + + If you are _building_ compiler tools for cross-compiling, you should +use the `--target=TYPE' option to select the type of system they will +produce code for. + + If you want to _use_ a cross compiler, that generates code for a +platform different from the build platform, you should specify the +"host" platform (i.e., that on which the generated programs will +eventually be run) with `--host=TYPE'. + +Sharing Defaults +================ + + If you want to set default values for `configure' scripts to share, +you can create a site shell script called `config.site' that gives +default values for variables like `CC', `cache_file', and `prefix'. +`configure' looks for `PREFIX/share/config.site' if it exists, then +`PREFIX/etc/config.site' if it exists. Or, you can set the +`CONFIG_SITE' environment variable to the location of the site script. +A warning: not all `configure' scripts look for a site script. + +Defining Variables +================== + + Variables not defined in a site shell script can be set in the +environment passed to `configure'. However, some packages may run +configure again during the build, and the customized values of these +variables may be lost. In order to avoid this problem, you should set +them in the `configure' command line, using `VAR=value'. For example: + + ./configure CC=/usr/local2/bin/gcc + +will cause the specified gcc to be used as the C compiler (unless it is +overridden in the site shell script). + +`configure' Invocation +====================== + + `configure' recognizes the following options to control how it +operates. + +`--help' +`-h' + Print a summary of the options to `configure', and exit. + +`--version' +`-V' + Print the version of Autoconf used to generate the `configure' + script, and exit. + +`--cache-file=FILE' + Enable the cache: use and save the results of the tests in FILE, + traditionally `config.cache'. FILE defaults to `/dev/null' to + disable caching. + +`--config-cache' +`-C' + Alias for `--cache-file=config.cache'. + +`--quiet' +`--silent' +`-q' + Do not print messages saying which checks are being made. To + suppress all normal output, redirect it to `/dev/null' (any error + messages will still be shown). + +`--srcdir=DIR' + Look for the package's source code in directory DIR. Usually + `configure' can determine that directory automatically. + +`configure' also accepts some other, not widely useful, options. Run +`configure --help' for more details. diff --git a/src/third_party/gperftools-2.5/NEWS b/src/third_party/gperftools-2.5/NEWS new file mode 100644 index 00000000000..fc81a7a6922 --- /dev/null +++ b/src/third_party/gperftools-2.5/NEWS @@ -0,0 +1,724 @@ +== 12 Mar 2016 == + +gperftools 2.5 is out! + +Just single bugfix was merged after rc2. Which was fix for issue #777. + +== 5 Mar 2016 == + +gperftools 2.5rc2 is out! + +New release contains just few commits on top of first release +candidate. One of them is build fix for Visual Studio. Another +significant change is that dynamic sized delete is now disabled by +default. It turned out that IFUNC relocations are not supporting our +advanced use case on all platforms and in all cases. + +== 21 Feb 2016 == + +gperftools 2.5rc is out! + +Here are major changes since 2.4: + +* we've moved to github! + +* Bryan Chan has contributed s390x support + +* stacktrace capturing via libgcc's _Unwind_Backtrace was implemented + (for architectures with missing or broken libunwind). + +* "emergency malloc" was implemented. Which unbreaks recursive calls + to malloc/free from stacktrace capturing functions (such us glib'c + backtrace() or libunwind on arm). It is enabled by + --enable-emergency-malloc configure flag or by default on arm when + --enable-stacktrace-via-backtrace is given. It is another fix for a + number common issues people had on platforms with missing or broken + libunwind. + +* C++14 sized-deallocation is now supported (on gcc 5 and recent + clangs). It is off by default and can be enabled at configure time + via --enable-sized-delete. On GNU/Linux it can also be enabled at + run-time by either TCMALLOC_ENABLE_SIZED_DELETE environment variable + or by defining tcmalloc_sized_delete_enabled function which should + return 1 to enable it. + +* we've lowered default value of transfer batch size to 512. Previous + value (bumped up in 2.1) was too high and caused performance + regression for some users. 512 should still give us performance + boost for workloads that need higher transfer batch size while not + penalizing other workloads too much. + +* Brian Silverman's patch finally stopped arming profiling timer + unless profiling is started. + +* Andrew Morrow has contributed support for obtaining cache size of the + current thread and softer idling (for use in MongoDB). + +* we've implemented few minor performance improvements, particularly + on malloc fast-path. + +A number of smaller fixes were made. Many of them were contributed: + +* issue that caused spurious profiler_unittest.sh failures was fixed. + +* Jonathan Lambrechts contributed improved callgrind format support to + pprof. + +* Matt Cross contributed better support for debug symbols in separate + files to pprof. + +* Matt Cross contributed support for printing collapsed stack frame + from pprof aimed at producing flame graphs. + +* Angus Gratton has contributed documentation fix mentioning that on + windows only tcmalloc_minimal is supported. + +* Anton Samokhvalov has made tcmalloc use mi_force_{un,}lock on OSX + instead of pthread_atfork. Which apparently fixes forking + issues tcmalloc had on OSX. + +* Milton Chiang has contributed support for building 32-bit gperftools + on arm8. + +* Patrick LoPresti has contributed support for specifying alternative + profiling signal via CPUPROFILE_TIMER_SIGNAL environment variable. + +* Paolo Bonzini has contributed support configuring filename for + sending malloc tracing output via TCMALLOC_TRACE_FILE environment + variable. + +* user spotrh has enabled use of futex on arm. + +* user mitchblank has contributed better declaration for arg-less + profiler functions. + +* Tom Conerly contributed proper freeing of memory allocated in + HeapProfileTable::FillOrderedProfile on error paths. + +* user fdeweerdt has contributed curl arguments handling fix in pprof + +* Frederik Mellbin fixed tcmalloc's idea of mangled new and delete + symbols on windows x64 + +* Dair Grant has contributed cacheline alignment for ThreadCache + objects + +* Fredrik Mellbin has contributed updated windows/config.h for Visual + Studio 2015 and other windows fixes. + +* we're not linking libpthread to libtcmalloc_minimal anymore. Instead + libtcmalloc_minimal links to pthread symbols weakly. As a result + single-threaded programs remain single-threaded when linking to or + preloading libtcmalloc_minimal.so. + +* Boris Sazonov has contributed mips compilation fix and printf misue + in pprof. + +* Adhemerval Zanella has contributed alignment fixes for statically + allocated variables. + +* Jens Rosenboom has contributed fixes for heap-profiler_unittest.sh + +* gshirishfree has contributed better description for GetStats method. + +* cyshi has contributed spinlock pause fix. + +* Chris Mayo has contributed --docdir argument support for configure. + +* Duncan Sands has contributed fix for function aliases. + +* Simon Que contributed better include for malloc_hook_c.h + +* user wmamrak contributed struct timespec fix for Visual Studio 2015. + +* user ssubotin contributed typo in PrintAvailability code. + + +== 10 Jan 2015 == + +gperftools 2.4 is out! The code is exactly same as 2.4rc. + +== 28 Dec 2014 == + +gperftools 2.4rc is out! + +Here are changes since 2.3: + +* enabled aggressive decommit option by default. It was found to + significantly improve memory fragmentation with negligible impact on + performance. (Thanks to investigation work performed by Adhemerval + Zanella) + +* added ./configure flags for tcmalloc pagesize and tcmalloc + allocation alignment. Larger page sizes have been reported to + improve performance occasionally. (Patch by Raphael Moreira Zinsly) + +* sped-up hot-path of malloc/free. By about 5% on static library and + about 10% on shared library. Mainly due to more efficient checking + of malloc hooks. + +* improved stacktrace capturing in cpu profiler (due to issue found by + Arun Sharma). As part of that issue pprof's handling of cpu profiles + was also improved. + +== 7 Dec 2014 == + +gperftools 2.3 is out! + +Here are changes since 2.3rc: + +* (issue 658) correctly close socketpair fds on failure (patch by glider) + +* libunwind integration can be disabled at configure time (patch by + Raphael Moreira Zinsly) + +* libunwind integration is disabled by default for ppc64 (patch by + Raphael Moreira Zinsly) + +* libunwind integration is force-disabled for OSX. It was not used by + default anyways. Fixes compilation issue I saw. + +== 2 Nov 2014 == + +gperftools 2.3rc is out! + +Most small improvements in this release were made to pprof tool. + +New experimental Linux-only (for now) cpu profiling mode is a notable +big improvement. + +Here are notable changes since 2.2.1: + +* (issue-631) fixed debugallocation miscompilation on mmap-less + platforms (courtesy of user iamxujian) + +* (issue-630) reference to wrong PROFILE (vs. correct CPUPROFILE) + environment variable was fixed (courtesy of WenSheng He) + +* pprof now has option to display stack traces in output for heap + checker (courtesy of Michael Pasieka) + +* (issue-636) pprof web command now works on mingw + +* (issue-635) pprof now handles library paths that contain spaces + (courtesy of user mich...@sebesbefut.com) + +* (issue-637) pprof now has an option to not strip template arguments + (patch by jiakai) + +* (issue-644) possible out-of-bounds access in GetenvBeforeMain was + fixed (thanks to user abyss.7) + +* (issue-641) pprof now has an option --show_addresses (thanks to user + yurivict). New option prints instruction address in addition to + function name in stack traces + +* (issue-646) pprof now works around some issues of addr2line + reportedly when DWARF v4 format is used (patch by Adam McNeeney) + +* (issue-645) heap profiler exit message now includes remaining memory + allocated info (patch by user yurivict) + +* pprof code that finds location of /proc/<pid>/maps in cpu profile + files is now fixed (patch by Ricardo M. Correia) + +* (issue-654) pprof now handles "split text segments" feature of + Chromium for Android. (patch by simonb) + +* (issue-655) potential deadlock on windows caused by early call to + getenv in malloc initialization code was fixed (bug reported and fix + proposed by user zndmitry) + +* incorrect detection of arm 6zk instruction set support + (-mcpu=arm1176jzf-s) was fixed. (Reported by pedronavf on old + issue-493) + +* new cpu profiling mode on Linux is now implemented. It sets up + separate profiling timers for separate threads. Which improves + accuracy of profiling on Linux a lot. It is off by default. And is + enabled if both librt.f is loaded and CPUPROFILE_PER_THREAD_TIMERS + environment variable is set. But note that all threads need to be + registered via ProfilerRegisterThread. + +== 21 Jun 2014 == + +gperftools 2.2.1 is out! + +Here's list of fixes: + +* issue-626 was closed. Which fixes initialization statically linked + tcmalloc. + +* issue 628 was closed. It adds missing header file into source + tarball. This fixes for compilation on PPC Linux. + +== 3 May 2014 == + +gperftools 2.2 is out! + +Here are notable changes since 2.2rc: + +* issue 620 (crash on windows when c runtime dll is reloaded) was + fixed + +== 19 Apr 2014 == + +gperftools 2.2rc is out! + +Here are notable changes since 2.1: + +* a number of fixes for a number compilers and platforms. Notably + Visual Studio 2013, recent mingw with c++ threads and some OSX + fixes. + +* we now have mips and mips64 support! (courtesy of Jovan Zelincevic, + Jean Lee, user xiaoyur347 and others) + +* we now have aarch64 (aka arm64) support! (contributed by Riku + Voipio) + +* there's now support for ppc64-le (by Raphael Moreira Zinsly and + Adhemerval Zanella) + +* there's now some support of uclibc (contributed by user xiaoyur347) + +* google/ headers will now give you deprecation warning. They are + deprecated since 2.0 + +* there's now new api: tc_malloc_skip_new_handler (ported from chromium + fork) + +* issue-557: added support for dumping heap profile via signal (by + Jean Lee) + +* issue-567: Petr Hosek contributed SysAllocator support for windows + +* Joonsoo Kim contributed several speedups for central freelist code + +* TCMALLOC_MAX_TOTAL_THREAD_CACHE_BYTES environment variable now works + +* configure scripts are now using AM_MAINTAINER_MODE. It'll only + affect folks who modify source from .tar.gz and want automake to + automatically rebuild Makefile-s. See automake documentation for + that. + +* issue-586: detect main executable even if PIE is active (based on + patch by user themastermind1). Notably, it fixes profiler use with + ruby. + +* there is now support for switching backtrace capturing method at + runtime (via TCMALLOC_STACKTRACE_METHOD and + TCMALLOC_STACKTRACE_METHOD_VERBOSE environment variables) + +* there is new backtrace capturing method using -finstrument-functions + prologues contributed by user xiaoyur347 + +* few cases of crashes/deadlocks in profiler were addressed. See + (famous) issue-66, issue-547 and issue-579. + +* issue-464 (memory corruption in debugalloc's realloc after + memallign) is now fixed + +* tcmalloc is now able to release memory back to OS on windows + (issue-489). The code was ported from chromium fork (by a number of + authors). + +* Together with issue-489 we ported chromium's "aggressive decommit" + mode. In this mode (settable via malloc extension and via + environment variable TCMALLOC_AGGRESSIVE_DECOMMIT), free pages are + returned back to OS immediately. + +* MallocExtension::instance() is now faster (based on patch by + Adhemerval Zanella) + +* issue-610 (hangs on windows in multibyte locales) is now fixed + +The following people helped with ideas or patches (based on git log, +some contributions purely in bugtracker might be missing): Andrew +C. Morrow, yurivict, Wang YanQing, Thomas Klausner, +davide.italiano@10gen.com, Dai MIKURUBE, Joon-Sung Um, Jovan +Zelincevic, Jean Lee, Petr Hosek, Ben Avison, drussel, Joonsoo Kim, +Hannes Weisbach, xiaoyur347, Riku Voipio, Adhemerval Zanella, Raphael +Moreira Zinsly + +== 30 July 2013 == + +gperftools 2.1 is out! + +Just few fixes where merged after rc. Most notably: + +* Some fixes for debug allocation on POWER/Linux + +== 20 July 2013 == + +gperftools 2.1rc is out! + +As a result of more than a year of contributions we're ready for 2.1 +release. + +But before making that step I'd like to create RC and make sure people +have chance to test it. + +Here are notable changes since 2.0: + +* fixes for building on newer platforms. Notably, there's now initial + support for x32 ABI (--enable-minimal only at this time)) + +* new getNumericProperty stats for cache sizes + +* added HEAP_PROFILER_TIME_INTERVAL variable (see documentation) + +* added environment variable to control heap size (TCMALLOC_HEAP_LIMIT_MB) + +* added environment variable to disable release of memory back to OS + (TCMALLOC_DISABLE_MEMORY_RELEASE) + +* cpu profiler can now be switched on and off by sending it a signal + (specified in CPUPROFILESIGNAL) + +* (issue 491) fixed race-ful spinlock wake-ups + +* (issue 496) added some support for fork-ing of process that is using + tcmalloc + +* (issue 368) improved memory fragmentation when large chunks of + memory are allocated/freed + +== 03 February 2012 == + +I've just released gperftools 2.0 + +The `google-perftools` project has been renamed to `gperftools`. I +(csilvers) am stepping down as maintainer, to be replaced by +David Chappelle. Welcome to the team, David! David has been an +an active contributor to perftools in the past -- in fact, he's the +only person other than me that already has commit status. I am +pleased to have him take over as maintainer. + +I have both renamed the project (the Google Code site renamed a few +weeks ago), and bumped the major version number up to 2, to reflect +the new community ownership of the project. Almost all the +[http://gperftools.googlecode.com/svn/tags/gperftools-2.0/ChangeLog changes] +are related to the renaming. + +The main functional change from google-perftools 1.10 is that +I've renamed the `google/` include-directory to be `gperftools/` +instead. New code should `#include <gperftools/tcmalloc.h>`/etc. +(Most users of perftools don't need any perftools-specific includes at +all, so this is mostly directed to "power users.") I've kept the old +names around as forwarding headers to the new, so `#include +<google/tcmalloc.h>` will continue to work. + +(The other functional change which I snuck in is getting rid of some +bash-isms in one of the unittest driver scripts, so it could run on +Solaris.) + +Note that some internal names still contain the text `google`, such as +the `google_malloc` internal linker section. I think that's a +trickier transition, and can happen in a future release (if at all). + + +=== 31 January 2012 === + +I've just released perftools 1.10 + +There is an API-incompatible change: several of the methods in the +`MallocExtension` class have changed from taking a `void*` to taking a +`const void*`. You should not be affected by this API change +unless you've written your own custom malloc extension that derives +from `MallocExtension`, but since it is a user-visible change, I have +upped the `.so` version number for this release. + +This release focuses on improvements to linux-syscall-support.h, +including ARM and PPC fixups and general cleanups. I hope this will +magically fix an array of bugs people have been seeing. + +There is also exciting news on the porting front, with support for +patching win64 assembly contributed by IBM Canada! This is an +important step -- perhaps the most difficult -- to getting perftools +to work on 64-bit windows using the patching technique (it doesn't +affect the libc-modification technique). `premable_patcher_test` has +been added to help test these changes; it is meant to compile under +x86_64, and won't work under win32. + +For the full list of changes, including improved `HEAP_PROFILE_MMAP` +support, see the +[http://gperftools.googlecode.com/svn/tags/google-perftools-1.10/ChangeLog ChangeLog]. + + +=== 24 January 2011 === + +The `google-perftools` Google Code page has been renamed to +`gperftools`, in preparation for the project being renamed to +`gperftools`. In the coming weeks, I'll be stepping down as +maintainer for the perftools project, and as part of that Google is +relinquishing ownership of the project; it will now be entirely +community run. The name change reflects that shift. The 'g' in +'gperftools' stands for 'great'. :-) + +=== 23 December 2011 === + +I've just released perftools 1.9.1 + +I missed including a file in the tarball, that is needed to compile on +ARM. If you are not compiling on ARM, or have successfully compiled +perftools 1.9, there is no need to upgrade. + + +=== 22 December 2011 === + +I've just released perftools 1.9 + +This change has a slew of improvements, from better ARM and freebsd +support, to improved performance by moving some code outside of locks, +to better pprof reporting of code with overloaded functions. + +The full list of changes is in the +[http://google-perftools.googlecode.com/svn/tags/google-perftools-1.9/ChangeLog ChangeLog]. + + +=== 26 August 2011 === + +I've just released perftools 1.8.3 + +The star-crossed 1.8 series continues; in 1.8.1, I had accidentally +removed some code that was needed for FreeBSD. (Without this code +many apps would crash at startup.) This release re-adds that code. +If you are not on FreeBSD, or are using FreeBSD with perftools 1.8 or +earlier, there is no need to upgrade. + +=== 11 August 2011 === + +I've just released perftools 1.8.2 + +I was incorrectly calculating the patch-level in the configuration +step, meaning the TC_VERSION_PATCH #define in tcmalloc.h was wrong. +Since the testing framework checks for this, it was failing. Now it +should work again. This time, I was careful to re-run my tests after +upping the version number. :-) + +If you don't care about the TC_VERSION_PATCH #define, there's no +reason to upgrae. + +=== 26 July 2011 === + +I've just released perftools 1.8.1 + +I was missing an #include that caused the build to break under some +compilers, especially newer gcc's, that wanted it. This only affects +people who build from source, so only the .tar.gz file is updated from +perftools 1.8. If you didn't have any problems compiling perftools +1.8, there's no reason to upgrade. + +=== 15 July 2011 === + +I've just released perftools 1.8 + +Of the many changes in this release, a good number pertain to porting. +I've revamped OS X support to use the malloc-zone framework; it should +now Just Work to link in tcmalloc, without needing +`DYLD_FORCE_FLAT_NAMESPACE` or the like. (This is a pretty major +change, so please feel free to report feedback at +google-perftools@googlegroups.com.) 64-bit Windows support is also +improved, as is ARM support, and the hooks are in place to improve +FreeBSD support as well. + +On the other hand, I'm seeing hanging tests on Cygwin. I see the same +hanging even with (the old) perftools 1.7, so I'm guessing this is +either a problem specific to my Cygwin installation, or nobody is +trying to use perftools under Cygwin. If you can reproduce the +problem, and even better have a solution, you can report it at +google-perftools@googlegroups.com. + +Internal changes include several performance and space-saving tweaks. +One is user-visible (but in "stealth mode", and otherwise +undocumented): you can compile with `-DTCMALLOC_SMALL_BUT_SLOW`. In +this mode, tcmalloc will use less memory overhead, at the cost of +running (likely not noticeably) slower. + +There are many other changes as well, too numerous to recount here, +but present in the +[http://google-perftools.googlecode.com/svn/tags/google-perftools-1.8/ChangeLog ChangeLog]. + + +=== 7 February 2011 === + +Thanks to endlessr..., who +[http://code.google.com/p/google-perftools/issues/detail?id=307 identified] +why some tests were failing under MSVC 10 in release mode. It does not look +like these failures point toward any problem with tcmalloc itself; rather, the +problem is with the test, which made some assumptions that broke under the +some aggressive optimizations used in MSVC 10. I'll fix the test, but in +the meantime, feel free to use perftools even when compiled under MSVC +10. + +=== 4 February 2011 === + +I've just released perftools 1.7 + +I apologize for the delay since the last release; so many great new +patches and bugfixes kept coming in (and are still coming in; I also +apologize to those folks who have to slip until the next release). I +picked this arbitrary time to make a cut. + +Among the many new features in this release is a multi-megabyte +reduction in the amount of tcmalloc overhead uder x86_64, improved +performance in the case of contention, and many many bugfixes, +especially architecture-specific bugfixes. See the +[http://google-perftools.googlecode.com/svn/tags/google-perftools-1.7/ChangeLog ChangeLog] +for full details. + +One architecture-specific change of note is added comments in the +[http://google-perftools.googlecode.com/svn/tags/perftools-1.7/README README] +for using tcmalloc under OS X. I'm trying to get my head around the +exact behavior of the OS X linker, and hope to have more improvements +for the next release, but I hope these notes help folks who have been +having trouble with tcmalloc on OS X. + +*Windows users*: I've heard reports that some unittests fail on +Windows when compiled with MSVC 10 in Release mode. All tests pass in +Debug mode. I've not heard of any problems with earlier versions of +MSVC. I don't know if this is a problem with the runtime patching (so +the static patching discussed in README_windows.txt will still work), +a problem with perftools more generally, or a bug in MSVC 10. Anyone +with windows expertise that can debug this, I'd be glad to hear from! + + +=== 5 August 2010 === + +I've just released perftools 1.6 + +This version also has a large number of minor changes, including +support for `malloc_usable_size()` as a glibc-compatible alias to +`malloc_size()`, the addition of SVG-based output to `pprof`, and +experimental support for tcmalloc large pages, which may speed up +tcmalloc at the cost of greater memory use. To use tcmalloc large +pages, see the +[http://google-perftools.googlecode.com/svn/tags/perftools-1.6/INSTALL +INSTALL file]; for all changes, see the +[http://google-perftools.googlecode.com/svn/tags/perftools-1.6/ChangeLog +ChangeLog]. + +OS X NOTE: improvements in the profiler unittest have turned up an OS +X issue: in multithreaded programs, it seems that OS X often delivers +the profiling signal (from sigitimer()) to the main thread, even when +it's sleeping, rather than spawned threads that are doing actual work. +If anyone knows details of how OS X handles SIGPROF events (from +setitimer) in threaded programs, and has insight into this problem, +please send mail to google-perftools@googlegroups.com. + +To see if you're affected by this, look for profiling time that pprof +attributes to `___semwait_signal`. This is work being done in other +threads, that is being attributed to sleeping-time in the main thread. + + +=== 20 January 2010 === + +I've just released perftools 1.5 + +This version has a slew of changes, leading to somewhat faster +performance and improvements in portability. It adds features like +`ITIMER_REAL` support to the cpu profiler, and `tc_set_new_mode` to +mimic the windows function of the same name. Full details are in the +[http://google-perftools.googlecode.com/svn/tags/perftools-1.5/ChangeLog +ChangeLog]. + + +=== 11 September 2009 === + +I've just released perftools 1.4 + +The major change this release is the addition of a debugging malloc +library! If you link with `libtcmalloc_debug.so` instead of +`libtcmalloc.so` (and likewise for the `minimal` variants) you'll get +a debugging malloc, which will catch double-frees, writes to freed +data, `free`/`delete` and `delete`/`delete[]` mismatches, and even +(optionally) writes past the end of an allocated block. + +We plan to do more with this library in the future, including +supporting it on Windows, and adding the ability to use the debugging +library with your default malloc in addition to using it with +tcmalloc. + +There are also the usual complement of bug fixes, documented in the +ChangeLog, and a few minor user-tunable knobs added to components like +the system allocator. + + +=== 9 June 2009 === + +I've just released perftools 1.3 + +Like 1.2, this has a variety of bug fixes, especially related to the +Windows build. One of my bugfixes is to undo the weird `ld -r` fix to +`.a` files that I introduced in perftools 1.2: it caused problems on +too many platforms. I've reverted back to normal `.a` files. To work +around the original problem that prompted the `ld -r` fix, I now +provide `libtcmalloc_and_profiler.a`, for folks who want to link in +both. + +The most interesting API change is that I now not only override +`malloc`/`free`/etc, I also expose them via a unique set of symbols: +`tc_malloc`/`tc_free`/etc. This enables clients to write their own +memory wrappers that use tcmalloc: +{{{ + void* malloc(size_t size) { void* r = tc_malloc(size); Log(r); return r; } +}}} + + +=== 17 April 2009 === + +I've just released perftools 1.2. + +This is mostly a bugfix release. The major change is internal: I have +a new system for creating packages, which allows me to create 64-bit +packages. (I still don't do that for perftools, because there is +still no great 64-bit solution, with libunwind still giving problems +and --disable-frame-pointers not practical in every environment.) + +Another interesting change involves Windows: a +[http://code.google.com/p/google-perftools/issues/detail?id=126 new +patch] allows users to choose to override malloc/free/etc on Windows +rather than patching, as is done now. This can be used to create +custom CRTs. + +My fix for this +[http://groups.google.com/group/google-perftools/browse_thread/thread/1ff9b50043090d9d/a59210c4206f2060?lnk=gst&q=dynamic#a59210c4206f2060 +bug involving static linking] ended up being to make libtcmalloc.a and +libperftools.a a big .o file, rather than a true `ar` archive. This +should not yield any problems in practice -- in fact, it should be +better, since the heap profiler, leak checker, and cpu profiler will +now all work even with the static libraries -- but if you find it +does, please file a bug report. + +Finally, the profile_handler_unittest provided in the perftools +testsuite (new in this release) is failing on FreeBSD. The end-to-end +test that uses the profile-handler is passing, so I suspect the +problem may be with the test, not the perftools code itself. However, +I do not know enough about how itimers work on FreeBSD to be able to +debug it. If you can figure it out, please let me know! + +=== 11 March 2009 === + +I've just released perftools 1.1! + +It has many changes since perftools 1.0 including + + * Faster performance due to dynamically sized thread caches + * Better heap-sampling for more realistic profiles + * Improved support on Windows (MSVC 7.1 and cygwin) + * Better stacktraces in linux (using VDSO) + * Many bug fixes and feature requests + +Note: if you use the CPU-profiler with applications that fork without +doing an exec right afterwards, please see the README. Recent testing +has shown that profiles are unreliable in that case. The problem has +existed since the first release of perftools. We expect to have a fix +for perftools 1.2. For more details, see +[http://code.google.com/p/google-perftools/issues/detail?id=105 issue 105]. + +Everyone who uses perftools 1.0 is encouraged to upgrade to perftools +1.1. If you see any problems with the new release, please file a bug +report at http://code.google.com/p/google-perftools/issues/list. + +Enjoy! diff --git a/src/third_party/gperftools-2.5/README b/src/third_party/gperftools-2.5/README new file mode 100644 index 00000000000..6b99ea8aa79 --- /dev/null +++ b/src/third_party/gperftools-2.5/README @@ -0,0 +1,283 @@ +gperftools +---------- +(originally Google Performance Tools) + +The fastest malloc we’ve seen; works particularly well with threads +and STL. Also: thread-friendly heap-checker, heap-profiler, and +cpu-profiler. + + +OVERVIEW +--------- + +gperftools is a collection of a high-performance multi-threaded +malloc() implementation, plus some pretty nifty performance analysis +tools. + +gperftools is distributed under the terms of the BSD License. Join our +mailing list at gpeftools@googlegroups.com for updates. + +gperftools was original home for pprof program. But do note that +original pprof (which is still included with gerftools) is now +deprecated in favor of golang version at https://github.com/google/pprof + + +TCMALLOC +-------- +Just link in -ltcmalloc or -ltcmalloc_minimal to get the advantages of +tcmalloc -- a replacement for malloc and new. See below for some +environment variables you can use with tcmalloc, as well. + +tcmalloc functionality is available on all systems we've tested; see +INSTALL for more details. See README_windows.txt for instructions on +using tcmalloc on Windows. + +NOTE: When compiling with programs with gcc, that you plan to link +with libtcmalloc, it's safest to pass in the flags + + -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-free + +when compiling. gcc makes some optimizations assuming it is using its +own, built-in malloc; that assumption obviously isn't true with +tcmalloc. In practice, we haven't seen any problems with this, but +the expected risk is highest for users who register their own malloc +hooks with tcmalloc (using gperftools/malloc_hook.h). The risk is +lowest for folks who use tcmalloc_minimal (or, of course, who pass in +the above flags :-) ). + + +HEAP PROFILER +------------- +See doc/heap-profiler.html for information about how to use tcmalloc's +heap profiler and analyze its output. + +As a quick-start, do the following after installing this package: + +1) Link your executable with -ltcmalloc +2) Run your executable with the HEAPPROFILE environment var set: + $ HEAPPROFILE=/tmp/heapprof <path/to/binary> [binary args] +3) Run pprof to analyze the heap usage + $ pprof <path/to/binary> /tmp/heapprof.0045.heap # run 'ls' to see options + $ pprof --gv <path/to/binary> /tmp/heapprof.0045.heap + +You can also use LD_PRELOAD to heap-profile an executable that you +didn't compile. + +There are other environment variables, besides HEAPPROFILE, you can +set to adjust the heap-profiler behavior; c.f. "ENVIRONMENT VARIABLES" +below. + +The heap profiler is available on all unix-based systems we've tested; +see INSTALL for more details. It is not currently available on Windows. + + +HEAP CHECKER +------------ +See doc/heap-checker.html for information about how to use tcmalloc's +heap checker. + +In order to catch all heap leaks, tcmalloc must be linked *last* into +your executable. The heap checker may mischaracterize some memory +accesses in libraries listed after it on the link line. For instance, +it may report these libraries as leaking memory when they're not. +(See the source code for more details.) + +Here's a quick-start for how to use: + +As a quick-start, do the following after installing this package: + +1) Link your executable with -ltcmalloc +2) Run your executable with the HEAPCHECK environment var set: + $ HEAPCHECK=1 <path/to/binary> [binary args] + +Other values for HEAPCHECK: normal (equivalent to "1"), strict, draconian + +You can also use LD_PRELOAD to heap-check an executable that you +didn't compile. + +The heap checker is only available on Linux at this time; see INSTALL +for more details. + + +CPU PROFILER +------------ +See doc/cpu-profiler.html for information about how to use the CPU +profiler and analyze its output. + +As a quick-start, do the following after installing this package: + +1) Link your executable with -lprofiler +2) Run your executable with the CPUPROFILE environment var set: + $ CPUPROFILE=/tmp/prof.out <path/to/binary> [binary args] +3) Run pprof to analyze the CPU usage + $ pprof <path/to/binary> /tmp/prof.out # -pg-like text output + $ pprof --gv <path/to/binary> /tmp/prof.out # really cool graphical output + +There are other environment variables, besides CPUPROFILE, you can set +to adjust the cpu-profiler behavior; cf "ENVIRONMENT VARIABLES" below. + +The CPU profiler is available on all unix-based systems we've tested; +see INSTALL for more details. It is not currently available on Windows. + +NOTE: CPU profiling doesn't work after fork (unless you immediately + do an exec()-like call afterwards). Furthermore, if you do + fork, and the child calls exit(), it may corrupt the profile + data. You can use _exit() to work around this. We hope to have + a fix for both problems in the next release of perftools + (hopefully perftools 1.2). + + +EVERYTHING IN ONE +----------------- +If you want the CPU profiler, heap profiler, and heap leak-checker to +all be available for your application, you can do: + gcc -o myapp ... -lprofiler -ltcmalloc + +However, if you have a reason to use the static versions of the +library, this two-library linking won't work: + gcc -o myapp ... /usr/lib/libprofiler.a /usr/lib/libtcmalloc.a # errors! + +Instead, use the special libtcmalloc_and_profiler library, which we +make for just this purpose: + gcc -o myapp ... /usr/lib/libtcmalloc_and_profiler.a + + +CONFIGURATION OPTIONS +--------------------- +For advanced users, there are several flags you can pass to +'./configure' that tweak tcmalloc performace. (These are in addition +to the environment variables you can set at runtime to affect +tcmalloc, described below.) See the INSTALL file for details. + + +ENVIRONMENT VARIABLES +--------------------- +The cpu profiler, heap checker, and heap profiler will lie dormant, +using no memory or CPU, until you turn them on. (Thus, there's no +harm in linking -lprofiler into every application, and also -ltcmalloc +assuming you're ok using the non-libc malloc library.) + +The easiest way to turn them on is by setting the appropriate +environment variables. We have several variables that let you +enable/disable features as well as tweak parameters. + +Here are some of the most important variables: + +HEAPPROFILE=<pre> -- turns on heap profiling and dumps data using this prefix +HEAPCHECK=<type> -- turns on heap checking with strictness 'type' +CPUPROFILE=<file> -- turns on cpu profiling and dumps data to this file. +PROFILESELECTED=1 -- if set, cpu-profiler will only profile regions of code + surrounded with ProfilerEnable()/ProfilerDisable(). +CPUPROFILE_FREQUENCY=x-- how many interrupts/second the cpu-profiler samples. + +TCMALLOC_DEBUG=<level> -- the higher level, the more messages malloc emits +MALLOCSTATS=<level> -- prints memory-use stats at program-exit + +For a full list of variables, see the documentation pages: + doc/cpuprofile.html + doc/heapprofile.html + doc/heap_checker.html + + +COMPILING ON NON-LINUX SYSTEMS +------------------------------ + +Perftools was developed and tested on x86 Linux systems, and it works +in its full generality only on those systems. However, we've +successfully ported much of the tcmalloc library to FreeBSD, Solaris +x86, and Darwin (Mac OS X) x86 and ppc; and we've ported the basic +functionality in tcmalloc_minimal to Windows. See INSTALL for details. +See README_windows.txt for details on the Windows port. + + +PERFORMANCE +----------- + +If you're interested in some third-party comparisons of tcmalloc to +other malloc libraries, here are a few web pages that have been +brought to our attention. The first discusses the effect of using +various malloc libraries on OpenLDAP. The second compares tcmalloc to +win32's malloc. + http://www.highlandsun.com/hyc/malloc/ + http://gaiacrtn.free.fr/articles/win32perftools.html + +It's possible to build tcmalloc in a way that trades off faster +performance (particularly for deletes) at the cost of more memory +fragmentation (that is, more unusable memory on your system). See the +INSTALL file for details. + + +OLD SYSTEM ISSUES +----------------- + +When compiling perftools on some old systems, like RedHat 8, you may +get an error like this: + ___tls_get_addr: symbol not found + +This means that you have a system where some parts are updated enough +to support Thread Local Storage, but others are not. The perftools +configure script can't always detect this kind of case, leading to +that error. To fix it, just comment out (or delete) the line + #define HAVE_TLS 1 +in your config.h file before building. + + +64-BIT ISSUES +------------- + +There are two issues that can cause program hangs or crashes on x86_64 +64-bit systems, which use the libunwind library to get stack-traces. +Neither issue should affect the core tcmalloc library; they both +affect the perftools tools such as cpu-profiler, heap-checker, and +heap-profiler. + +1) Some libc's -- at least glibc 2.4 on x86_64 -- have a bug where the +libc function dl_iterate_phdr() acquires its locks in the wrong +order. This bug should not affect tcmalloc, but may cause occasional +deadlock with the cpu-profiler, heap-profiler, and heap-checker. +Its likeliness increases the more dlopen() commands an executable has. +Most executables don't have any, though several library routines like +getgrgid() call dlopen() behind the scenes. + +2) On x86-64 64-bit systems, while tcmalloc itself works fine, the +cpu-profiler tool is unreliable: it will sometimes work, but sometimes +cause a segfault. I'll explain the problem first, and then some +workarounds. + +Note that this only affects the cpu-profiler, which is a +gperftools feature you must turn on manually by setting the +CPUPROFILE environment variable. If you do not turn on cpu-profiling, +you shouldn't see any crashes due to perftools. + +The gory details: The underlying problem is in the backtrace() +function, which is a built-in function in libc. +Backtracing is fairly straightforward in the normal case, but can run +into problems when having to backtrace across a signal frame. +Unfortunately, the cpu-profiler uses signals in order to register a +profiling event, so every backtrace that the profiler does crosses a +signal frame. + +In our experience, the only time there is trouble is when the signal +fires in the middle of pthread_mutex_lock. pthread_mutex_lock is +called quite a bit from system libraries, particularly at program +startup and when creating a new thread. + +The solution: The dwarf debugging format has support for 'cfi +annotations', which make it easy to recognize a signal frame. Some OS +distributions, such as Fedora and gentoo 2007.0, already have added +cfi annotations to their libc. A future version of libunwind should +recognize these annotations; these systems should not see any +crashses. + +Workarounds: If you see problems with crashes when running the +cpu-profiler, consider inserting ProfilerStart()/ProfilerStop() into +your code, rather than setting CPUPROFILE. This will profile only +those sections of the codebase. Though we haven't done much testing, +in theory this should reduce the chance of crashes by limiting the +signal generation to only a small part of the codebase. Ideally, you +would not use ProfilerStart()/ProfilerStop() around code that spawns +new threads, or is otherwise likely to cause a call to +pthread_mutex_lock! + +--- +17 May 2011 diff --git a/src/third_party/gperftools-2.5/README_windows.txt b/src/third_party/gperftools-2.5/README_windows.txt new file mode 100644 index 00000000000..7bba12201e0 --- /dev/null +++ b/src/third_party/gperftools-2.5/README_windows.txt @@ -0,0 +1,120 @@ +--- COMPILING
+
+This project has begun being ported to Windows, only tcmalloc_minimal
+is supported at this time. A working solution file exists in this
+directory:
+ gperftools.sln
+
+You can load this solution file into VC++ 7.1 (Visual Studio 2003) or
+later -- in the latter case, it will automatically convert the files
+to the latest format for you.
+
+When you build the solution, it will create a number of unittests,
+which you can run by hand (or, more easily, under the Visual Studio
+debugger) to make sure everything is working properly on your system.
+The binaries will end up in a directory called "debug" or "release" in
+the top-level directory (next to the .sln file). It will also create
+two binaries, nm-pdb and addr2line-pdb, which you should install in
+the same directory you install the 'pprof' perl script.
+
+I don't know very much about how to install DLLs on Windows, so you'll
+have to figure out that part for yourself. If you choose to just
+re-use the existing .sln, make sure you set the IncludeDir's
+appropriately! Look at the properties for libtcmalloc_minimal.dll.
+
+Note that these systems are set to build in Debug mode by default.
+You may want to change them to Release mode.
+
+To use tcmalloc_minimal in your own projects, you should only need to
+build the dll and install it someplace, so you can link it into
+further binaries. To use the dll, you need to add the following to
+the linker line of your executable:
+ "libtcmalloc_minimal.lib" /INCLUDE:"__tcmalloc"
+
+Here is how to accomplish this in Visual Studio 2005 (VC8):
+
+1) Have your executable depend on the tcmalloc library by selecting
+ "Project Dependencies..." from the "Project" menu. Your executable
+ should depend on "libtcmalloc_minimal".
+
+2) Have your executable depend on a tcmalloc symbol -- this is
+ necessary so the linker doesn't "optimize out" the libtcmalloc
+ dependency -- by right-clicking on your executable's project (in
+ the solution explorer), selecting Properties from the pull-down
+ menu, then selecting "Configuration Properties" -> "Linker" ->
+ "Input". Then, in the "Force Symbol References" field, enter the
+ text "__tcmalloc" (without the quotes). Be sure to do this for both
+ debug and release modes!
+
+You can also link tcmalloc code in statically -- see the example
+project tcmalloc_minimal_unittest-static, which does this. For this
+to work, you'll need to add "/D PERFTOOLS_DLL_DECL=" to the compile
+line of every perftools .cc file. You do not need to depend on the
+tcmalloc symbol in this case (that is, you don't need to do either
+step 1 or step 2 from above).
+
+An alternative to all the above is to statically link your application
+with libc, and then replace its malloc with tcmalloc. This allows you
+to just build and link your program normally; the tcmalloc support
+comes in a post-processing step. This is more reliable than the above
+technique (which depends on run-time patching, which is inherently
+fragile), though more work to set up. For details, see
+ https://groups.google.com/group/google-perftools/browse_thread/thread/41cd3710af85e57b
+
+
+--- THE HEAP-PROFILER
+
+The heap-profiler has had a preliminary port to Windows but does not
+build on Windows by default. It has not been well tested, and
+probably does not work at all when Frame Pointer Optimization (FPO) is
+enabled -- that is, in release mode. The other features of perftools,
+such as the cpu-profiler and leak-checker, have not yet been ported to
+Windows at all.
+
+
+--- WIN64
+
+The function-patcher has to disassemble code, and is very
+x86-specific. However, the rest of perftools should work fine for
+both x86 and x64. In particular, if you use the 'statically link with
+libc, and replace its malloc with tcmalloc' approach, mentioned above,
+it should be possible to use tcmalloc with 64-bit windows.
+
+As of perftools 1.10, there is some support for disassembling x86_64
+instructions, for work with win64. This work is preliminary, but the
+test file preamble_patcher_test.cc is provided to play around with
+that a bit. preamble_patcher_test will not compile on win32.
+
+
+--- ISSUES
+
+NOTE FOR WIN2K USERS: According to reports
+(http://code.google.com/p/gperftools/issues/detail?id=127)
+the stack-tracing necessary for the heap-profiler does not work on
+Win2K. The best workaround is, if you are building on a Win2k system
+is to add "/D NO_TCMALLOC_SAMPLES=" to your build, to turn off the
+stack-tracing. You will not be able to use the heap-profiler if you
+do this.
+
+NOTE ON _MSIZE and _RECALLOC: The tcmalloc version of _msize returns
+the size of the region tcmalloc allocated for you -- which is at least
+as many bytes you asked for, but may be more. (btw, these *are* bytes
+you own, even if you didn't ask for all of them, so it's correct code
+to access all of them if you want.) Unfortunately, the Windows CRT
+_recalloc() routine assumes that _msize returns exactly as many bytes
+as were requested. As a result, _recalloc() may not zero out new
+bytes correctly. IT'S SAFEST NOT TO USE _RECALLOC WITH TCMALLOC.
+_recalloc() is a tricky routine to use in any case (it's not safe to
+use with realloc, for instance).
+
+
+I have little experience with Windows programming, so there may be
+better ways to set this up than I've done! If you run across any
+problems, please post to the google-perftools Google Group, or report
+them on the gperftools Google Code site:
+ http://groups.google.com/group/google-perftools
+ http://code.google.com/p/gperftools/issues/list
+
+-- craig
+
+Last modified: 2 February 2012
diff --git a/src/third_party/gperftools-2.5/TODO b/src/third_party/gperftools-2.5/TODO new file mode 100644 index 00000000000..550f7e09b9d --- /dev/null +++ b/src/third_party/gperftools-2.5/TODO @@ -0,0 +1,47 @@ +HEAP PROFILER + +1) Fix heap profiling under all STLs + * Find out how to force non-glibc STL libraries to call new() and + delete() for every allocation / deallocation. + * Make heap profiler ignore STL-internal allocations for those + libraries under which we cannot profile accurately, so we only + see object-level leaks. +2) Remove dependency on tcmalloc? +3) Port to non-linux O/Ses (right now code uses /proc for library info) +4) Port to non-x86 architectures (locking code in spinlock is x86-specific) +5) Port to C? +6) Figure out how to get setenv() to work properly before main() in + shared libaries, and get rid of the profile-naming hack once we + do. (See HeapProfiler::Init().) + + +HEAP CHECKER + +1) Remove requirement that the heap-checker must be linked last into + an application (hard! -- it needs its global constructor to run + first) + +TCMALLOC + +1) Implement mallinfo/mallopt +2) Have tcmalloc work correctly when libpthread is not linked in + (currently working for glibc, could use other libc's too) +3) Return memory to the system when requirements drop +4) Explore coloring allocated objects to avoid cache conflicts +5) Explore biasing reclamation to larger addresses +6) Add contention stats to a synchronization.cc (can do spinlocks, + but threads? -- may have to provide our own thread implementation) + +CPU PROFILER + +1) Figure out how to get setenv() to work properly before main() in + shared libaries(), and get rid of the profile-naming hack once we + do. (See Profiler::GetUniquePathFromEnv().) +2) Resolve crashing problems on x86_64 (see README) + +STACKTRACE + +1) Remove dependency on linux/x86 + +--- +11 March 2008 diff --git a/src/third_party/gperftools-2.5/src/addressmap-inl.h b/src/third_party/gperftools-2.5/src/addressmap-inl.h new file mode 100644 index 00000000000..fd1dc5b6ffe --- /dev/null +++ b/src/third_party/gperftools-2.5/src/addressmap-inl.h @@ -0,0 +1,422 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Sanjay Ghemawat +// +// A fast map from addresses to values. Assumes that addresses are +// clustered. The main use is intended to be for heap-profiling. +// May be too memory-hungry for other uses. +// +// We use a user-defined allocator/de-allocator so that we can use +// this data structure during heap-profiling. +// +// IMPLEMENTATION DETAIL: +// +// Some default definitions/parameters: +// * Block -- aligned 128-byte region of the address space +// * Cluster -- aligned 1-MB region of the address space +// * Block-ID -- block-number within a cluster +// * Cluster-ID -- Starting address of cluster divided by cluster size +// +// We use a three-level map to represent the state: +// 1. A hash-table maps from a cluster-ID to the data for that cluster. +// 2. For each non-empty cluster we keep an array indexed by +// block-ID tht points to the first entry in the linked-list +// for the block. +// 3. At the bottom, we keep a singly-linked list of all +// entries in a block (for non-empty blocks). +// +// hash table +// +-------------+ +// | id->cluster |---> ... +// | ... | +// | id->cluster |---> Cluster +// +-------------+ +-------+ Data for one block +// | nil | +------------------------------------+ +// | ----+---|->[addr/value]-->[addr/value]-->... | +// | nil | +------------------------------------+ +// | ----+--> ... +// | nil | +// | ... | +// +-------+ +// +// Note that we require zero-bytes of overhead for completely empty +// clusters. The minimum space requirement for a cluster is the size +// of the hash-table entry plus a pointer value for each block in +// the cluster. Empty blocks impose no extra space requirement. +// +// The cost of a lookup is: +// a. A hash-table lookup to find the cluster +// b. An array access in the cluster structure +// c. A traversal over the linked-list for a block + +#ifndef BASE_ADDRESSMAP_INL_H_ +#define BASE_ADDRESSMAP_INL_H_ + +#include "config.h" +#include <stddef.h> +#include <string.h> +#if defined HAVE_STDINT_H +#include <stdint.h> // to get uint16_t (ISO naming madness) +#elif defined HAVE_INTTYPES_H +#include <inttypes.h> // another place uint16_t might be defined +#else +#include <sys/types.h> // our last best hope +#endif + +// This class is thread-unsafe -- that is, instances of this class can +// not be accessed concurrently by multiple threads -- because the +// callback function for Iterate() may mutate contained values. If the +// callback functions you pass do not mutate their Value* argument, +// AddressMap can be treated as thread-compatible -- that is, it's +// safe for multiple threads to call "const" methods on this class, +// but not safe for one thread to call const methods on this class +// while another thread is calling non-const methods on the class. +template <class Value> +class AddressMap { + public: + typedef void* (*Allocator)(size_t size); + typedef void (*DeAllocator)(void* ptr); + typedef const void* Key; + + // Create an AddressMap that uses the specified allocator/deallocator. + // The allocator/deallocator should behave like malloc/free. + // For instance, the allocator does not need to return initialized memory. + AddressMap(Allocator alloc, DeAllocator dealloc); + ~AddressMap(); + + // If the map contains an entry for "key", return it. Else return NULL. + inline const Value* Find(Key key) const; + inline Value* FindMutable(Key key); + + // Insert <key,value> into the map. Any old value associated + // with key is forgotten. + void Insert(Key key, Value value); + + // Remove any entry for key in the map. If an entry was found + // and removed, stores the associated value in "*removed_value" + // and returns true. Else returns false. + bool FindAndRemove(Key key, Value* removed_value); + + // Similar to Find but we assume that keys are addresses of non-overlapping + // memory ranges whose sizes are given by size_func. + // If the map contains a range into which "key" points + // (at its start or inside of it, but not at the end), + // return the address of the associated value + // and store its key in "*res_key". + // Else return NULL. + // max_size specifies largest range size possibly in existence now. + typedef size_t (*ValueSizeFunc)(const Value& v); + const Value* FindInside(ValueSizeFunc size_func, size_t max_size, + Key key, Key* res_key); + + // Iterate over the address map calling 'callback' + // for all stored key-value pairs and passing 'arg' to it. + // We don't use full Closure/Callback machinery not to add + // unnecessary dependencies to this class with low-level uses. + template<class Type> + inline void Iterate(void (*callback)(Key, Value*, Type), Type arg) const; + + private: + typedef uintptr_t Number; + + // The implementation assumes that addresses inserted into the map + // will be clustered. We take advantage of this fact by splitting + // up the address-space into blocks and using a linked-list entry + // for each block. + + // Size of each block. There is one linked-list for each block, so + // do not make the block-size too big. Oterwise, a lot of time + // will be spent traversing linked lists. + static const int kBlockBits = 7; + static const int kBlockSize = 1 << kBlockBits; + + // Entry kept in per-block linked-list + struct Entry { + Entry* next; + Key key; + Value value; + }; + + // We further group a sequence of consecutive blocks into a cluster. + // The data for a cluster is represented as a dense array of + // linked-lists, one list per contained block. + static const int kClusterBits = 13; + static const Number kClusterSize = 1 << (kBlockBits + kClusterBits); + static const int kClusterBlocks = 1 << kClusterBits; + + // We use a simple chaining hash-table to represent the clusters. + struct Cluster { + Cluster* next; // Next cluster in hash table chain + Number id; // Cluster ID + Entry* blocks[kClusterBlocks]; // Per-block linked-lists + }; + + // Number of hash-table entries. With the block-size/cluster-size + // defined above, each cluster covers 1 MB, so an 4K entry + // hash-table will give an average hash-chain length of 1 for 4GB of + // in-use memory. + static const int kHashBits = 12; + static const int kHashSize = 1 << 12; + + // Number of entry objects allocated at a time + static const int ALLOC_COUNT = 64; + + Cluster** hashtable_; // The hash-table + Entry* free_; // Free list of unused Entry objects + + // Multiplicative hash function: + // The value "kHashMultiplier" is the bottom 32 bits of + // int((sqrt(5)-1)/2 * 2^32) + // This is a good multiplier as suggested in CLR, Knuth. The hash + // value is taken to be the top "k" bits of the bottom 32 bits + // of the muliplied value. + static const uint32_t kHashMultiplier = 2654435769u; + static int HashInt(Number x) { + // Multiply by a constant and take the top bits of the result. + const uint32_t m = static_cast<uint32_t>(x) * kHashMultiplier; + return static_cast<int>(m >> (32 - kHashBits)); + } + + // Find cluster object for specified address. If not found + // and "create" is true, create the object. If not found + // and "create" is false, return NULL. + // + // This method is bitwise-const if create is false. + Cluster* FindCluster(Number address, bool create) { + // Look in hashtable + const Number cluster_id = address >> (kBlockBits + kClusterBits); + const int h = HashInt(cluster_id); + for (Cluster* c = hashtable_[h]; c != NULL; c = c->next) { + if (c->id == cluster_id) { + return c; + } + } + + // Create cluster if necessary + if (create) { + Cluster* c = New<Cluster>(1); + c->id = cluster_id; + c->next = hashtable_[h]; + hashtable_[h] = c; + return c; + } + return NULL; + } + + // Return the block ID for an address within its cluster + static int BlockID(Number address) { + return (address >> kBlockBits) & (kClusterBlocks - 1); + } + + //-------------------------------------------------------------- + // Memory management -- we keep all objects we allocate linked + // together in a singly linked list so we can get rid of them + // when we are all done. Furthermore, we allow the client to + // pass in custom memory allocator/deallocator routines. + //-------------------------------------------------------------- + struct Object { + Object* next; + // The real data starts here + }; + + Allocator alloc_; // The allocator + DeAllocator dealloc_; // The deallocator + Object* allocated_; // List of allocated objects + + // Allocates a zeroed array of T with length "num". Also inserts + // the allocated block into a linked list so it can be deallocated + // when we are all done. + template <class T> T* New(int num) { + void* ptr = (*alloc_)(sizeof(Object) + num*sizeof(T)); + memset(ptr, 0, sizeof(Object) + num*sizeof(T)); + Object* obj = reinterpret_cast<Object*>(ptr); + obj->next = allocated_; + allocated_ = obj; + return reinterpret_cast<T*>(reinterpret_cast<Object*>(ptr) + 1); + } +}; + +// More implementation details follow: + +template <class Value> +AddressMap<Value>::AddressMap(Allocator alloc, DeAllocator dealloc) + : free_(NULL), + alloc_(alloc), + dealloc_(dealloc), + allocated_(NULL) { + hashtable_ = New<Cluster*>(kHashSize); +} + +template <class Value> +AddressMap<Value>::~AddressMap() { + // De-allocate all of the objects we allocated + for (Object* obj = allocated_; obj != NULL; /**/) { + Object* next = obj->next; + (*dealloc_)(obj); + obj = next; + } +} + +template <class Value> +inline const Value* AddressMap<Value>::Find(Key key) const { + return const_cast<AddressMap*>(this)->FindMutable(key); +} + +template <class Value> +inline Value* AddressMap<Value>::FindMutable(Key key) { + const Number num = reinterpret_cast<Number>(key); + const Cluster* const c = FindCluster(num, false/*do not create*/); + if (c != NULL) { + for (Entry* e = c->blocks[BlockID(num)]; e != NULL; e = e->next) { + if (e->key == key) { + return &e->value; + } + } + } + return NULL; +} + +template <class Value> +void AddressMap<Value>::Insert(Key key, Value value) { + const Number num = reinterpret_cast<Number>(key); + Cluster* const c = FindCluster(num, true/*create*/); + + // Look in linked-list for this block + const int block = BlockID(num); + for (Entry* e = c->blocks[block]; e != NULL; e = e->next) { + if (e->key == key) { + e->value = value; + return; + } + } + + // Create entry + if (free_ == NULL) { + // Allocate a new batch of entries and add to free-list + Entry* array = New<Entry>(ALLOC_COUNT); + for (int i = 0; i < ALLOC_COUNT-1; i++) { + array[i].next = &array[i+1]; + } + array[ALLOC_COUNT-1].next = free_; + free_ = &array[0]; + } + Entry* e = free_; + free_ = e->next; + e->key = key; + e->value = value; + e->next = c->blocks[block]; + c->blocks[block] = e; +} + +template <class Value> +bool AddressMap<Value>::FindAndRemove(Key key, Value* removed_value) { + const Number num = reinterpret_cast<Number>(key); + Cluster* const c = FindCluster(num, false/*do not create*/); + if (c != NULL) { + for (Entry** p = &c->blocks[BlockID(num)]; *p != NULL; p = &(*p)->next) { + Entry* e = *p; + if (e->key == key) { + *removed_value = e->value; + *p = e->next; // Remove e from linked-list + e->next = free_; // Add e to free-list + free_ = e; + return true; + } + } + } + return false; +} + +template <class Value> +const Value* AddressMap<Value>::FindInside(ValueSizeFunc size_func, + size_t max_size, + Key key, + Key* res_key) { + const Number key_num = reinterpret_cast<Number>(key); + Number num = key_num; // we'll move this to move back through the clusters + while (1) { + const Cluster* c = FindCluster(num, false/*do not create*/); + if (c != NULL) { + while (1) { + const int block = BlockID(num); + bool had_smaller_key = false; + for (const Entry* e = c->blocks[block]; e != NULL; e = e->next) { + const Number e_num = reinterpret_cast<Number>(e->key); + if (e_num <= key_num) { + if (e_num == key_num || // to handle 0-sized ranges + key_num < e_num + (*size_func)(e->value)) { + *res_key = e->key; + return &e->value; + } + had_smaller_key = true; + } + } + if (had_smaller_key) return NULL; // got a range before 'key' + // and it did not contain 'key' + if (block == 0) break; + // try address-wise previous block + num |= kBlockSize - 1; // start at the last addr of prev block + num -= kBlockSize; + if (key_num - num > max_size) return NULL; + } + } + if (num < kClusterSize) return NULL; // first cluster + // go to address-wise previous cluster to try + num |= kClusterSize - 1; // start at the last block of previous cluster + num -= kClusterSize; + if (key_num - num > max_size) return NULL; + // Having max_size to limit the search is crucial: else + // we have to traverse a lot of empty clusters (or blocks). + // We can avoid needing max_size if we put clusters into + // a search tree, but performance suffers considerably + // if we use this approach by using stl::set. + } +} + +template <class Value> +template <class Type> +inline void AddressMap<Value>::Iterate(void (*callback)(Key, Value*, Type), + Type arg) const { + // We could optimize this by traversing only non-empty clusters and/or blocks + // but it does not speed up heap-checker noticeably. + for (int h = 0; h < kHashSize; ++h) { + for (const Cluster* c = hashtable_[h]; c != NULL; c = c->next) { + for (int b = 0; b < kClusterBlocks; ++b) { + for (Entry* e = c->blocks[b]; e != NULL; e = e->next) { + callback(e->key, &e->value, arg); + } + } + } + } +} + +#endif // BASE_ADDRESSMAP_INL_H_ diff --git a/src/third_party/gperftools-2.5/src/base/arm_instruction_set_select.h b/src/third_party/gperftools-2.5/src/base/arm_instruction_set_select.h new file mode 100644 index 00000000000..6fde685272c --- /dev/null +++ b/src/third_party/gperftools-2.5/src/base/arm_instruction_set_select.h @@ -0,0 +1,84 @@ +// Copyright (c) 2011, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: Alexander Levitskiy +// +// Generalizes the plethora of ARM flavors available to an easier to manage set +// Defs reference is at https://wiki.edubuntu.org/ARM/Thumb2PortingHowto + +#ifndef ARM_INSTRUCTION_SET_SELECT_H_ +#define ARM_INSTRUCTION_SET_SELECT_H_ + +#if defined(__ARM_ARCH_8A__) +# define ARMV8 1 +#endif + +#if defined(ARMV8) || \ + defined(__ARM_ARCH_7__) || \ + defined(__ARM_ARCH_7R__) || \ + defined(__ARM_ARCH_7A__) +# define ARMV7 1 +#endif + +#if defined(ARMV7) || \ + defined(__ARM_ARCH_6__) || \ + defined(__ARM_ARCH_6J__) || \ + defined(__ARM_ARCH_6K__) || \ + defined(__ARM_ARCH_6Z__) || \ + defined(__ARM_ARCH_6T2__) || \ + defined(__ARM_ARCH_6ZK__) +# define ARMV6 1 +#endif + +#if defined(ARMV6) || \ + defined(__ARM_ARCH_5T__) || \ + defined(__ARM_ARCH_5E__) || \ + defined(__ARM_ARCH_5TE__) || \ + defined(__ARM_ARCH_5TEJ__) +# define ARMV5 1 +#endif + +#if defined(ARMV5) || \ + defined(__ARM_ARCH_4__) || \ + defined(__ARM_ARCH_4T__) +# define ARMV4 1 +#endif + +#if defined(ARMV4) || \ + defined(__ARM_ARCH_3__) || \ + defined(__ARM_ARCH_3M__) +# define ARMV3 1 +#endif + +#if defined(ARMV3) || \ + defined(__ARM_ARCH_2__) +# define ARMV2 1 +#endif + +#endif // ARM_INSTRUCTION_SET_SELECT_H_ diff --git a/src/third_party/gperftools-2.5/src/base/atomicops-internals-arm-generic.h b/src/third_party/gperftools-2.5/src/base/atomicops-internals-arm-generic.h new file mode 100644 index 00000000000..d0f941309bb --- /dev/null +++ b/src/third_party/gperftools-2.5/src/base/atomicops-internals-arm-generic.h @@ -0,0 +1,228 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2003, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// --- +// +// Author: Lei Zhang, Sasha Levitskiy +// +// This file is an internal atomic implementation, use base/atomicops.h instead. +// +// LinuxKernelCmpxchg is from Google Gears. + +#ifndef BASE_ATOMICOPS_INTERNALS_ARM_GENERIC_H_ +#define BASE_ATOMICOPS_INTERNALS_ARM_GENERIC_H_ + +#include <stdio.h> +#include <stdlib.h> +#include "base/basictypes.h" + +typedef int32_t Atomic32; + +namespace base { +namespace subtle { + +typedef int64_t Atomic64; + +// 0xffff0fc0 is the hard coded address of a function provided by +// the kernel which implements an atomic compare-exchange. On older +// ARM architecture revisions (pre-v6) this may be implemented using +// a syscall. This address is stable, and in active use (hard coded) +// by at least glibc-2.7 and the Android C library. +// pLinuxKernelCmpxchg has both acquire and release barrier sematincs. +typedef Atomic32 (*LinuxKernelCmpxchgFunc)(Atomic32 old_value, + Atomic32 new_value, + volatile Atomic32* ptr); +LinuxKernelCmpxchgFunc pLinuxKernelCmpxchg ATTRIBUTE_WEAK = + (LinuxKernelCmpxchgFunc) 0xffff0fc0; + +typedef void (*LinuxKernelMemoryBarrierFunc)(void); +LinuxKernelMemoryBarrierFunc pLinuxKernelMemoryBarrier ATTRIBUTE_WEAK = + (LinuxKernelMemoryBarrierFunc) 0xffff0fa0; + + +inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr, + Atomic32 old_value, + Atomic32 new_value) { + Atomic32 prev_value = *ptr; + do { + if (!pLinuxKernelCmpxchg(old_value, new_value, + const_cast<Atomic32*>(ptr))) { + return old_value; + } + prev_value = *ptr; + } while (prev_value == old_value); + return prev_value; +} + +inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr, + Atomic32 new_value) { + Atomic32 old_value; + do { + old_value = *ptr; + } while (pLinuxKernelCmpxchg(old_value, new_value, + const_cast<Atomic32*>(ptr))); + return old_value; +} + +inline Atomic32 Acquire_AtomicExchange(volatile Atomic32* ptr, + Atomic32 new_value) { + // pLinuxKernelCmpxchg already has acquire and release barrier semantics. + return NoBarrier_AtomicExchange(ptr, new_value); +} + +inline Atomic32 Release_AtomicExchange(volatile Atomic32* ptr, + Atomic32 new_value) { + // pLinuxKernelCmpxchg already has acquire and release barrier semantics. + return NoBarrier_AtomicExchange(ptr, new_value); +} + +inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr, + Atomic32 old_value, + Atomic32 new_value) { + return NoBarrier_CompareAndSwap(ptr, old_value, new_value); +} + +inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr, + Atomic32 old_value, + Atomic32 new_value) { + return NoBarrier_CompareAndSwap(ptr, old_value, new_value); +} + +inline void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value) { + *ptr = value; +} + +inline void MemoryBarrier() { + pLinuxKernelMemoryBarrier(); +} + +inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) { + *ptr = value; + MemoryBarrier(); +} + +inline void Release_Store(volatile Atomic32* ptr, Atomic32 value) { + MemoryBarrier(); + *ptr = value; +} + +inline Atomic32 NoBarrier_Load(volatile const Atomic32* ptr) { + return *ptr; +} + +inline Atomic32 Acquire_Load(volatile const Atomic32* ptr) { + Atomic32 value = *ptr; + MemoryBarrier(); + return value; +} + +inline Atomic32 Release_Load(volatile const Atomic32* ptr) { + MemoryBarrier(); + return *ptr; +} + + +// 64-bit versions are not implemented yet. + +inline void NotImplementedFatalError(const char *function_name) { + fprintf(stderr, "64-bit %s() not implemented on this platform\n", + function_name); + abort(); +} + +inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr, + Atomic64 old_value, + Atomic64 new_value) { + NotImplementedFatalError("NoBarrier_CompareAndSwap"); + return 0; +} + +inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr, + Atomic64 new_value) { + NotImplementedFatalError("NoBarrier_AtomicExchange"); + return 0; +} + +inline Atomic64 Acquire_AtomicExchange(volatile Atomic64* ptr, + Atomic64 new_value) { + // pLinuxKernelCmpxchg already has acquire and release barrier semantics. + return NoBarrier_AtomicExchange(ptr, new_value); +} + +inline Atomic64 Release_AtomicExchange(volatile Atomic64* ptr, + Atomic64 new_value) { + // pLinuxKernelCmpxchg already has acquire and release barrier semantics. + return NoBarrier_AtomicExchange(ptr, new_value); +} + +inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) { + NotImplementedFatalError("NoBarrier_Store"); +} + +inline void Acquire_Store(volatile Atomic64* ptr, Atomic64 value) { + NotImplementedFatalError("Acquire_Store64"); +} + +inline void Release_Store(volatile Atomic64* ptr, Atomic64 value) { + NotImplementedFatalError("Release_Store"); +} + +inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) { + NotImplementedFatalError("NoBarrier_Load"); + return 0; +} + +inline Atomic64 Acquire_Load(volatile const Atomic64* ptr) { + NotImplementedFatalError("Atomic64 Acquire_Load"); + return 0; +} + +inline Atomic64 Release_Load(volatile const Atomic64* ptr) { + NotImplementedFatalError("Atomic64 Release_Load"); + return 0; +} + +inline Atomic64 Acquire_CompareAndSwap(volatile Atomic64* ptr, + Atomic64 old_value, + Atomic64 new_value) { + NotImplementedFatalError("Atomic64 Acquire_CompareAndSwap"); + return 0; +} + +inline Atomic64 Release_CompareAndSwap(volatile Atomic64* ptr, + Atomic64 old_value, + Atomic64 new_value) { + NotImplementedFatalError("Atomic64 Release_CompareAndSwap"); + return 0; +} + +} // namespace base::subtle +} // namespace base + +#endif // BASE_ATOMICOPS_INTERNALS_ARM_GENERIC_H_ diff --git a/src/third_party/gperftools-2.5/src/base/atomicops-internals-arm-v6plus.h b/src/third_party/gperftools-2.5/src/base/atomicops-internals-arm-v6plus.h new file mode 100644 index 00000000000..35f10481b04 --- /dev/null +++ b/src/third_party/gperftools-2.5/src/base/atomicops-internals-arm-v6plus.h @@ -0,0 +1,330 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2011, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// --- +// +// Author: Sasha Levitskiy +// based on atomicops-internals by Sanjay Ghemawat +// +// This file is an internal atomic implementation, use base/atomicops.h instead. +// +// This code implements ARM atomics for architectures V6 and newer. + +#ifndef BASE_ATOMICOPS_INTERNALS_ARM_V6PLUS_H_ +#define BASE_ATOMICOPS_INTERNALS_ARM_V6PLUS_H_ + +#include <stdio.h> +#include <stdlib.h> +#include "base/basictypes.h" // For COMPILE_ASSERT + +// The LDREXD and STREXD instructions in ARM all v7 variants or above. In v6, +// only some variants support it. For simplicity, we only use exclusive +// 64-bit load/store in V7 or above. +#if defined(ARMV7) +# define BASE_ATOMICOPS_HAS_LDREXD_AND_STREXD +#endif + +typedef int32_t Atomic32; + +namespace base { +namespace subtle { + +typedef int64_t Atomic64; + +// 32-bit low-level ops + +inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr, + Atomic32 old_value, + Atomic32 new_value) { + Atomic32 oldval, res; + do { + __asm__ __volatile__( + "ldrex %1, [%3]\n" + "mov %0, #0\n" + "teq %1, %4\n" + // The following IT (if-then) instruction is needed for the subsequent + // conditional instruction STREXEQ when compiling in THUMB mode. + // In ARM mode, the compiler/assembler will not generate any code for it. + "it eq\n" + "strexeq %0, %5, [%3]\n" + : "=&r" (res), "=&r" (oldval), "+Qo" (*ptr) + : "r" (ptr), "Ir" (old_value), "r" (new_value) + : "cc"); + } while (res); + return oldval; +} + +inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr, + Atomic32 new_value) { + Atomic32 tmp, old; + __asm__ __volatile__( + "1:\n" + "ldrex %1, [%2]\n" + "strex %0, %3, [%2]\n" + "teq %0, #0\n" + "bne 1b" + : "=&r" (tmp), "=&r" (old) + : "r" (ptr), "r" (new_value) + : "cc", "memory"); + return old; +} + +inline void MemoryBarrier() { +#if !defined(ARMV7) + uint32_t dest = 0; + __asm__ __volatile__("mcr p15,0,%0,c7,c10,5" :"=&r"(dest) : : "memory"); +#else + __asm__ __volatile__("dmb" : : : "memory"); +#endif +} + +inline Atomic32 Acquire_AtomicExchange(volatile Atomic32* ptr, + Atomic32 new_value) { + Atomic32 old_value = NoBarrier_AtomicExchange(ptr, new_value); + MemoryBarrier(); + return old_value; +} + +inline Atomic32 Release_AtomicExchange(volatile Atomic32* ptr, + Atomic32 new_value) { + MemoryBarrier(); + return NoBarrier_AtomicExchange(ptr, new_value); +} + +inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr, + Atomic32 old_value, + Atomic32 new_value) { + Atomic32 value = NoBarrier_CompareAndSwap(ptr, old_value, new_value); + MemoryBarrier(); + return value; +} + +inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr, + Atomic32 old_value, + Atomic32 new_value) { + MemoryBarrier(); + return NoBarrier_CompareAndSwap(ptr, old_value, new_value); +} + +inline void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value) { + *ptr = value; +} + +inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) { + *ptr = value; + MemoryBarrier(); +} + +inline void Release_Store(volatile Atomic32* ptr, Atomic32 value) { + MemoryBarrier(); + *ptr = value; +} + +inline Atomic32 NoBarrier_Load(volatile const Atomic32* ptr) { + return *ptr; +} + +inline Atomic32 Acquire_Load(volatile const Atomic32* ptr) { + Atomic32 value = *ptr; + MemoryBarrier(); + return value; +} + +inline Atomic32 Release_Load(volatile const Atomic32* ptr) { + MemoryBarrier(); + return *ptr; +} + +// 64-bit versions are only available if LDREXD and STREXD instructions +// are available. +#ifdef BASE_ATOMICOPS_HAS_LDREXD_AND_STREXD + +#define BASE_HAS_ATOMIC64 1 + +inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr, + Atomic64 old_value, + Atomic64 new_value) { + Atomic64 oldval, res; + do { + __asm__ __volatile__( + "ldrexd %1, [%3]\n" + "mov %0, #0\n" + "teq %Q1, %Q4\n" + // The following IT (if-then) instructions are needed for the subsequent + // conditional instructions when compiling in THUMB mode. + // In ARM mode, the compiler/assembler will not generate any code for it. + "it eq\n" + "teqeq %R1, %R4\n" + "it eq\n" + "strexdeq %0, %5, [%3]\n" + : "=&r" (res), "=&r" (oldval), "+Q" (*ptr) + : "r" (ptr), "Ir" (old_value), "r" (new_value) + : "cc"); + } while (res); + return oldval; +} + +inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr, + Atomic64 new_value) { + int store_failed; + Atomic64 old; + __asm__ __volatile__( + "1:\n" + "ldrexd %1, [%2]\n" + "strexd %0, %3, [%2]\n" + "teq %0, #0\n" + "bne 1b" + : "=&r" (store_failed), "=&r" (old) + : "r" (ptr), "r" (new_value) + : "cc", "memory"); + return old; +} + +inline Atomic64 Acquire_AtomicExchange(volatile Atomic64* ptr, + Atomic64 new_value) { + Atomic64 old_value = NoBarrier_AtomicExchange(ptr, new_value); + MemoryBarrier(); + return old_value; +} + +inline Atomic64 Release_AtomicExchange(volatile Atomic64* ptr, + Atomic64 new_value) { + MemoryBarrier(); + return NoBarrier_AtomicExchange(ptr, new_value); +} + +inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) { + int store_failed; + Atomic64 dummy; + __asm__ __volatile__( + "1:\n" + // Dummy load to lock cache line. + "ldrexd %1, [%3]\n" + "strexd %0, %2, [%3]\n" + "teq %0, #0\n" + "bne 1b" + : "=&r" (store_failed), "=&r"(dummy) + : "r"(value), "r" (ptr) + : "cc", "memory"); +} + +inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) { + Atomic64 res; + __asm__ __volatile__( + "ldrexd %0, [%1]\n" + "clrex\n" + : "=r" (res) + : "r"(ptr), "Q"(*ptr)); + return res; +} + +#else // BASE_ATOMICOPS_HAS_LDREXD_AND_STREXD + +inline void NotImplementedFatalError(const char *function_name) { + fprintf(stderr, "64-bit %s() not implemented on this platform\n", + function_name); + abort(); +} + +inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr, + Atomic64 old_value, + Atomic64 new_value) { + NotImplementedFatalError("NoBarrier_CompareAndSwap"); + return 0; +} + +inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr, + Atomic64 new_value) { + NotImplementedFatalError("NoBarrier_AtomicExchange"); + return 0; +} + +inline Atomic64 Acquire_AtomicExchange(volatile Atomic64* ptr, + Atomic64 new_value) { + NotImplementedFatalError("Acquire_AtomicExchange"); + return 0; +} + +inline Atomic64 Release_AtomicExchange(volatile Atomic64* ptr, + Atomic64 new_value) { + NotImplementedFatalError("Release_AtomicExchange"); + return 0; +} + +inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) { + NotImplementedFatalError("NoBarrier_Store"); +} + +inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) { + NotImplementedFatalError("NoBarrier_Load"); + return 0; +} + +#endif // BASE_ATOMICOPS_HAS_LDREXD_AND_STREXD + +inline void Acquire_Store(volatile Atomic64* ptr, Atomic64 value) { + NoBarrier_Store(ptr, value); + MemoryBarrier(); +} + +inline void Release_Store(volatile Atomic64* ptr, Atomic64 value) { + MemoryBarrier(); + NoBarrier_Store(ptr, value); +} + +inline Atomic64 Acquire_Load(volatile const Atomic64* ptr) { + Atomic64 value = NoBarrier_Load(ptr); + MemoryBarrier(); + return value; +} + +inline Atomic64 Release_Load(volatile const Atomic64* ptr) { + MemoryBarrier(); + return NoBarrier_Load(ptr); +} + +inline Atomic64 Acquire_CompareAndSwap(volatile Atomic64* ptr, + Atomic64 old_value, + Atomic64 new_value) { + Atomic64 value = NoBarrier_CompareAndSwap(ptr, old_value, new_value); + MemoryBarrier(); + return value; +} + +inline Atomic64 Release_CompareAndSwap(volatile Atomic64* ptr, + Atomic64 old_value, + Atomic64 new_value) { + MemoryBarrier(); + return NoBarrier_CompareAndSwap(ptr, old_value, new_value); +} + +} // namespace subtle ends +} // namespace base ends + +#endif // BASE_ATOMICOPS_INTERNALS_ARM_V6PLUS_H_ diff --git a/src/third_party/gperftools-2.5/src/base/atomicops-internals-gcc.h b/src/third_party/gperftools-2.5/src/base/atomicops-internals-gcc.h new file mode 100644 index 00000000000..f8d27863cb7 --- /dev/null +++ b/src/third_party/gperftools-2.5/src/base/atomicops-internals-gcc.h @@ -0,0 +1,203 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2014, Linaro +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// --- +// +// Author: Riku Voipio, riku.voipio@linaro.org +// +// atomic primitives implemented with gcc atomic intrinsics: +// http://gcc.gnu.org/onlinedocs/gcc/_005f_005fatomic-Builtins.html +// + +#ifndef BASE_ATOMICOPS_INTERNALS_GCC_GENERIC_H_ +#define BASE_ATOMICOPS_INTERNALS_GCC_GENERIC_H_ + +#include <stdio.h> +#include <stdlib.h> +#include "base/basictypes.h" + +typedef int32_t Atomic32; + +namespace base { +namespace subtle { + +typedef int64_t Atomic64; + +inline void MemoryBarrier() { + __sync_synchronize(); +} + +inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr, + Atomic32 old_value, + Atomic32 new_value) { + Atomic32 prev_value = old_value; + __atomic_compare_exchange_n(ptr, &prev_value, new_value, + 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED); + return prev_value; +} + +inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr, + Atomic32 new_value) { + return __atomic_exchange_n(const_cast<Atomic32*>(ptr), new_value, __ATOMIC_RELAXED); +} + +inline Atomic32 Acquire_AtomicExchange(volatile Atomic32* ptr, + Atomic32 new_value) { + return __atomic_exchange_n(const_cast<Atomic32*>(ptr), new_value, __ATOMIC_ACQUIRE); +} + +inline Atomic32 Release_AtomicExchange(volatile Atomic32* ptr, + Atomic32 new_value) { + return __atomic_exchange_n(const_cast<Atomic32*>(ptr), new_value, __ATOMIC_RELEASE); +} + +inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr, + Atomic32 old_value, + Atomic32 new_value) { + Atomic32 prev_value = old_value; + __atomic_compare_exchange_n(ptr, &prev_value, new_value, + 0, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED); + return prev_value; +} + +inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr, + Atomic32 old_value, + Atomic32 new_value) { + Atomic32 prev_value = old_value; + __atomic_compare_exchange_n(ptr, &prev_value, new_value, + 0, __ATOMIC_RELEASE, __ATOMIC_RELAXED); + return prev_value; +} + +inline void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value) { + *ptr = value; +} + +inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) { + *ptr = value; + MemoryBarrier(); +} + +inline void Release_Store(volatile Atomic32* ptr, Atomic32 value) { + MemoryBarrier(); + *ptr = value; +} + +inline Atomic32 NoBarrier_Load(volatile const Atomic32* ptr) { + return *ptr; +} + +inline Atomic32 Acquire_Load(volatile const Atomic32* ptr) { + Atomic32 value = *ptr; + MemoryBarrier(); + return value; +} + +inline Atomic32 Release_Load(volatile const Atomic32* ptr) { + MemoryBarrier(); + return *ptr; +} + +// 64-bit versions + +inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr, + Atomic64 old_value, + Atomic64 new_value) { + Atomic64 prev_value = old_value; + __atomic_compare_exchange_n(ptr, &prev_value, new_value, + 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED); + return prev_value; +} + +inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr, + Atomic64 new_value) { + return __atomic_exchange_n(const_cast<Atomic64*>(ptr), new_value, __ATOMIC_RELAXED); +} + +inline Atomic64 Acquire_AtomicExchange(volatile Atomic64* ptr, + Atomic64 new_value) { + return __atomic_exchange_n(const_cast<Atomic64*>(ptr), new_value, __ATOMIC_ACQUIRE); +} + +inline Atomic64 Release_AtomicExchange(volatile Atomic64* ptr, + Atomic64 new_value) { + return __atomic_exchange_n(const_cast<Atomic64*>(ptr), new_value, __ATOMIC_RELEASE); +} + +inline Atomic64 Acquire_CompareAndSwap(volatile Atomic64* ptr, + Atomic64 old_value, + Atomic64 new_value) { + Atomic64 prev_value = old_value; + __atomic_compare_exchange_n(ptr, &prev_value, new_value, + 0, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED); + return prev_value; +} + +inline Atomic64 Release_CompareAndSwap(volatile Atomic64* ptr, + Atomic64 old_value, + Atomic64 new_value) { + Atomic64 prev_value = old_value; + __atomic_compare_exchange_n(ptr, &prev_value, new_value, + 0, __ATOMIC_RELEASE, __ATOMIC_RELAXED); + return prev_value; +} + +inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) { + *ptr = value; +} + +inline void Acquire_Store(volatile Atomic64* ptr, Atomic64 value) { + *ptr = value; + MemoryBarrier(); +} + +inline void Release_Store(volatile Atomic64* ptr, Atomic64 value) { + MemoryBarrier(); + *ptr = value; +} + +inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) { + return *ptr; +} + +inline Atomic64 Acquire_Load(volatile const Atomic64* ptr) { + Atomic64 value = *ptr; + MemoryBarrier(); + return value; +} + +inline Atomic64 Release_Load(volatile const Atomic64* ptr) { + MemoryBarrier(); + return *ptr; +} + +} // namespace base::subtle +} // namespace base + +#endif // BASE_ATOMICOPS_INTERNALS_GCC_GENERIC_H_ diff --git a/src/third_party/gperftools-2.5/src/base/atomicops-internals-linuxppc.h b/src/third_party/gperftools-2.5/src/base/atomicops-internals-linuxppc.h new file mode 100644 index 00000000000..b52fdf0d1ec --- /dev/null +++ b/src/third_party/gperftools-2.5/src/base/atomicops-internals-linuxppc.h @@ -0,0 +1,437 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +/* Copyright (c) 2008, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + */ + +// Implementation of atomic operations for ppc-linux. This file should not +// be included directly. Clients should instead include +// "base/atomicops.h". + +#ifndef BASE_ATOMICOPS_INTERNALS_LINUXPPC_H_ +#define BASE_ATOMICOPS_INTERNALS_LINUXPPC_H_ + +typedef int32_t Atomic32; + +#ifdef __PPC64__ +#define BASE_HAS_ATOMIC64 1 +#endif + +namespace base { +namespace subtle { + +static inline void _sync(void) { + __asm__ __volatile__("sync": : : "memory"); +} + +static inline void _lwsync(void) { + // gcc defines __NO_LWSYNC__ when appropriate; see + // http://gcc.gnu.org/ml/gcc-patches/2006-11/msg01238.html +#ifdef __NO_LWSYNC__ + __asm__ __volatile__("msync": : : "memory"); +#else + __asm__ __volatile__("lwsync": : : "memory"); +#endif +} + +static inline void _isync(void) { + __asm__ __volatile__("isync": : : "memory"); +} + +static inline Atomic32 OSAtomicAdd32(Atomic32 amount, Atomic32 *value) { + Atomic32 t; + __asm__ __volatile__( +"1: lwarx %0,0,%3\n\ + add %0,%2,%0\n\ + stwcx. %0,0,%3 \n\ + bne- 1b" + : "=&r" (t), "+m" (*value) + : "r" (amount), "r" (value) + : "cc"); + return t; +} + +static inline Atomic32 OSAtomicAdd32Barrier(Atomic32 amount, Atomic32 *value) { + Atomic32 t; + _lwsync(); + t = OSAtomicAdd32(amount, value); + // This is based on the code snippet in the architecture manual (Vol + // 2, Appendix B). It's a little tricky: correctness depends on the + // fact that the code right before this (in OSAtomicAdd32) has a + // conditional branch with a data dependency on the update. + // Otherwise, we'd have to use sync. + _isync(); + return t; +} + +static inline bool OSAtomicCompareAndSwap32(Atomic32 old_value, + Atomic32 new_value, + Atomic32 *value) { + Atomic32 prev; + __asm__ __volatile__( +"1: lwarx %0,0,%2\n\ + cmpw 0,%0,%3\n\ + bne- 2f\n\ + stwcx. %4,0,%2\n\ + bne- 1b\n\ +2:" + : "=&r" (prev), "+m" (*value) + : "r" (value), "r" (old_value), "r" (new_value) + : "cc"); + return prev == old_value; +} + +static inline Atomic32 OSAtomicCompareAndSwap32Acquire(Atomic32 old_value, + Atomic32 new_value, + Atomic32 *value) { + Atomic32 t; + t = OSAtomicCompareAndSwap32(old_value, new_value, value); + // This is based on the code snippet in the architecture manual (Vol + // 2, Appendix B). It's a little tricky: correctness depends on the + // fact that the code right before this (in + // OSAtomicCompareAndSwap32) has a conditional branch with a data + // dependency on the update. Otherwise, we'd have to use sync. + _isync(); + return t; +} + +static inline Atomic32 OSAtomicCompareAndSwap32Release(Atomic32 old_value, + Atomic32 new_value, + Atomic32 *value) { + _lwsync(); + return OSAtomicCompareAndSwap32(old_value, new_value, value); +} + +typedef int64_t Atomic64; + +inline void MemoryBarrier() { + // This can't be _lwsync(); we need to order the immediately + // preceding stores against any load that may follow, but lwsync + // doesn't guarantee that. + _sync(); +} + +// 32-bit Versions. + +inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32 *ptr, + Atomic32 old_value, + Atomic32 new_value) { + Atomic32 prev_value; + do { + if (OSAtomicCompareAndSwap32(old_value, new_value, + const_cast<Atomic32*>(ptr))) { + return old_value; + } + prev_value = *ptr; + } while (prev_value == old_value); + return prev_value; +} + +inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32 *ptr, + Atomic32 new_value) { + Atomic32 old_value; + do { + old_value = *ptr; + } while (!OSAtomicCompareAndSwap32(old_value, new_value, + const_cast<Atomic32*>(ptr))); + return old_value; +} + +inline Atomic32 Acquire_AtomicExchange(volatile Atomic32 *ptr, + Atomic32 new_value) { + Atomic32 old_value; + do { + old_value = *ptr; + } while (!OSAtomicCompareAndSwap32Acquire(old_value, new_value, + const_cast<Atomic32*>(ptr))); + return old_value; +} + +inline Atomic32 Release_AtomicExchange(volatile Atomic32 *ptr, + Atomic32 new_value) { + Atomic32 old_value; + do { + old_value = *ptr; + } while (!OSAtomicCompareAndSwap32Release(old_value, new_value, + const_cast<Atomic32*>(ptr))); + return old_value; +} + +inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32 *ptr, + Atomic32 old_value, + Atomic32 new_value) { + Atomic32 prev_value; + do { + if (OSAtomicCompareAndSwap32Acquire(old_value, new_value, + const_cast<Atomic32*>(ptr))) { + return old_value; + } + prev_value = *ptr; + } while (prev_value == old_value); + return prev_value; +} + +inline Atomic32 Release_CompareAndSwap(volatile Atomic32 *ptr, + Atomic32 old_value, + Atomic32 new_value) { + Atomic32 prev_value; + do { + if (OSAtomicCompareAndSwap32Release(old_value, new_value, + const_cast<Atomic32*>(ptr))) { + return old_value; + } + prev_value = *ptr; + } while (prev_value == old_value); + return prev_value; +} + +#ifdef __PPC64__ + +// 64-bit Versions. + +static inline Atomic64 OSAtomicAdd64(Atomic64 amount, Atomic64 *value) { + Atomic64 t; + __asm__ __volatile__( +"1: ldarx %0,0,%3\n\ + add %0,%2,%0\n\ + stdcx. %0,0,%3 \n\ + bne- 1b" + : "=&r" (t), "+m" (*value) + : "r" (amount), "r" (value) + : "cc"); + return t; +} + +static inline Atomic64 OSAtomicAdd64Barrier(Atomic64 amount, Atomic64 *value) { + Atomic64 t; + _lwsync(); + t = OSAtomicAdd64(amount, value); + // This is based on the code snippet in the architecture manual (Vol + // 2, Appendix B). It's a little tricky: correctness depends on the + // fact that the code right before this (in OSAtomicAdd64) has a + // conditional branch with a data dependency on the update. + // Otherwise, we'd have to use sync. + _isync(); + return t; +} + +static inline bool OSAtomicCompareAndSwap64(Atomic64 old_value, + Atomic64 new_value, + Atomic64 *value) { + Atomic64 prev; + __asm__ __volatile__( +"1: ldarx %0,0,%2\n\ + cmpd 0,%0,%3\n\ + bne- 2f\n\ + stdcx. %4,0,%2\n\ + bne- 1b\n\ +2:" + : "=&r" (prev), "+m" (*value) + : "r" (value), "r" (old_value), "r" (new_value) + : "cc"); + return prev == old_value; +} + +static inline Atomic64 OSAtomicCompareAndSwap64Acquire(Atomic64 old_value, + Atomic64 new_value, + Atomic64 *value) { + Atomic64 t; + t = OSAtomicCompareAndSwap64(old_value, new_value, value); + // This is based on the code snippet in the architecture manual (Vol + // 2, Appendix B). It's a little tricky: correctness depends on the + // fact that the code right before this (in + // OSAtomicCompareAndSwap64) has a conditional branch with a data + // dependency on the update. Otherwise, we'd have to use sync. + _isync(); + return t; +} + +static inline Atomic64 OSAtomicCompareAndSwap64Release(Atomic64 old_value, + Atomic64 new_value, + Atomic64 *value) { + _lwsync(); + return OSAtomicCompareAndSwap64(old_value, new_value, value); +} + + +inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64 *ptr, + Atomic64 old_value, + Atomic64 new_value) { + Atomic64 prev_value; + do { + if (OSAtomicCompareAndSwap64(old_value, new_value, + const_cast<Atomic64*>(ptr))) { + return old_value; + } + prev_value = *ptr; + } while (prev_value == old_value); + return prev_value; +} + +inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64 *ptr, + Atomic64 new_value) { + Atomic64 old_value; + do { + old_value = *ptr; + } while (!OSAtomicCompareAndSwap64(old_value, new_value, + const_cast<Atomic64*>(ptr))); + return old_value; +} + +inline Atomic64 Acquire_AtomicExchange(volatile Atomic64 *ptr, + Atomic64 new_value) { + Atomic64 old_value; + do { + old_value = *ptr; + } while (!OSAtomicCompareAndSwap64Acquire(old_value, new_value, + const_cast<Atomic64*>(ptr))); + return old_value; +} + +inline Atomic64 Release_AtomicExchange(volatile Atomic64 *ptr, + Atomic64 new_value) { + Atomic64 old_value; + do { + old_value = *ptr; + } while (!OSAtomicCompareAndSwap64Release(old_value, new_value, + const_cast<Atomic64*>(ptr))); + return old_value; +} + +inline Atomic64 Acquire_CompareAndSwap(volatile Atomic64 *ptr, + Atomic64 old_value, + Atomic64 new_value) { + Atomic64 prev_value; + do { + if (OSAtomicCompareAndSwap64Acquire(old_value, new_value, + const_cast<Atomic64*>(ptr))) { + return old_value; + } + prev_value = *ptr; + } while (prev_value == old_value); + return prev_value; +} + +inline Atomic64 Release_CompareAndSwap(volatile Atomic64 *ptr, + Atomic64 old_value, + Atomic64 new_value) { + Atomic64 prev_value; + do { + if (OSAtomicCompareAndSwap64Release(old_value, new_value, + const_cast<Atomic64*>(ptr))) { + return old_value; + } + prev_value = *ptr; + } while (prev_value == old_value); + return prev_value; +} + +#endif + +inline void NoBarrier_Store(volatile Atomic32 *ptr, Atomic32 value) { + *ptr = value; +} + +inline void Acquire_Store(volatile Atomic32 *ptr, Atomic32 value) { + *ptr = value; + // This can't be _lwsync(); we need to order the immediately + // preceding stores against any load that may follow, but lwsync + // doesn't guarantee that. + _sync(); +} + +inline void Release_Store(volatile Atomic32 *ptr, Atomic32 value) { + _lwsync(); + *ptr = value; +} + +inline Atomic32 NoBarrier_Load(volatile const Atomic32 *ptr) { + return *ptr; +} + +inline Atomic32 Acquire_Load(volatile const Atomic32 *ptr) { + Atomic32 value = *ptr; + _lwsync(); + return value; +} + +inline Atomic32 Release_Load(volatile const Atomic32 *ptr) { + // This can't be _lwsync(); we need to order the immediately + // preceding stores against any load that may follow, but lwsync + // doesn't guarantee that. + _sync(); + return *ptr; +} + +#ifdef __PPC64__ + +// 64-bit Versions. + +inline void NoBarrier_Store(volatile Atomic64 *ptr, Atomic64 value) { + *ptr = value; +} + +inline void Acquire_Store(volatile Atomic64 *ptr, Atomic64 value) { + *ptr = value; + // This can't be _lwsync(); we need to order the immediately + // preceding stores against any load that may follow, but lwsync + // doesn't guarantee that. + _sync(); +} + +inline void Release_Store(volatile Atomic64 *ptr, Atomic64 value) { + _lwsync(); + *ptr = value; +} + +inline Atomic64 NoBarrier_Load(volatile const Atomic64 *ptr) { + return *ptr; +} + +inline Atomic64 Acquire_Load(volatile const Atomic64 *ptr) { + Atomic64 value = *ptr; + _lwsync(); + return value; +} + +inline Atomic64 Release_Load(volatile const Atomic64 *ptr) { + // This can't be _lwsync(); we need to order the immediately + // preceding stores against any load that may follow, but lwsync + // doesn't guarantee that. + _sync(); + return *ptr; +} + +#endif + +} // namespace base::subtle +} // namespace base + +#endif // BASE_ATOMICOPS_INTERNALS_LINUXPPC_H_ diff --git a/src/third_party/gperftools-2.5/src/base/atomicops-internals-macosx.h b/src/third_party/gperftools-2.5/src/base/atomicops-internals-macosx.h new file mode 100644 index 00000000000..b5130d4f4d7 --- /dev/null +++ b/src/third_party/gperftools-2.5/src/base/atomicops-internals-macosx.h @@ -0,0 +1,370 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +/* Copyright (c) 2006, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +// Implementation of atomic operations for Mac OS X. This file should not +// be included directly. Clients should instead include +// "base/atomicops.h". + +#ifndef BASE_ATOMICOPS_INTERNALS_MACOSX_H_ +#define BASE_ATOMICOPS_INTERNALS_MACOSX_H_ + +typedef int32_t Atomic32; + +// MacOS uses long for intptr_t, AtomicWord and Atomic32 are always different +// on the Mac, even when they are the same size. Similarly, on __ppc64__, +// AtomicWord and Atomic64 are always different. Thus, we need explicit +// casting. +#ifdef __LP64__ +#define AtomicWordCastType base::subtle::Atomic64 +#else +#define AtomicWordCastType Atomic32 +#endif + +#if defined(__LP64__) || defined(__i386__) +#define BASE_HAS_ATOMIC64 1 // Use only in tests and base/atomic* +#endif + +#include <libkern/OSAtomic.h> + +namespace base { +namespace subtle { + +#if !defined(__LP64__) && defined(__ppc__) + +// The Mac 64-bit OSAtomic implementations are not available for 32-bit PowerPC, +// while the underlying assembly instructions are available only some +// implementations of PowerPC. + +// The following inline functions will fail with the error message at compile +// time ONLY IF they are called. So it is safe to use this header if user +// code only calls AtomicWord and Atomic32 operations. +// +// NOTE(vchen): Implementation notes to implement the atomic ops below may +// be found in "PowerPC Virtual Environment Architecture, Book II, +// Version 2.02", January 28, 2005, Appendix B, page 46. Unfortunately, +// extra care must be taken to ensure data are properly 8-byte aligned, and +// that data are returned correctly according to Mac OS X ABI specs. + +inline int64_t OSAtomicCompareAndSwap64( + int64_t oldValue, int64_t newValue, int64_t *theValue) { + __asm__ __volatile__( + "_OSAtomicCompareAndSwap64_not_supported_for_32_bit_ppc\n\t"); + return 0; +} + +inline int64_t OSAtomicAdd64(int64_t theAmount, int64_t *theValue) { + __asm__ __volatile__( + "_OSAtomicAdd64_not_supported_for_32_bit_ppc\n\t"); + return 0; +} + +inline int64_t OSAtomicCompareAndSwap64Barrier( + int64_t oldValue, int64_t newValue, int64_t *theValue) { + int64_t prev = OSAtomicCompareAndSwap64(oldValue, newValue, theValue); + OSMemoryBarrier(); + return prev; +} + +inline int64_t OSAtomicAdd64Barrier( + int64_t theAmount, int64_t *theValue) { + int64_t new_val = OSAtomicAdd64(theAmount, theValue); + OSMemoryBarrier(); + return new_val; +} +#endif + +typedef int64_t Atomic64; + +inline void MemoryBarrier() { + OSMemoryBarrier(); +} + +// 32-bit Versions. + +inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32 *ptr, + Atomic32 old_value, + Atomic32 new_value) { + Atomic32 prev_value; + do { + if (OSAtomicCompareAndSwap32(old_value, new_value, + const_cast<Atomic32*>(ptr))) { + return old_value; + } + prev_value = *ptr; + } while (prev_value == old_value); + return prev_value; +} + +inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32 *ptr, + Atomic32 new_value) { + Atomic32 old_value; + do { + old_value = *ptr; + } while (!OSAtomicCompareAndSwap32(old_value, new_value, + const_cast<Atomic32*>(ptr))); + return old_value; +} + +inline Atomic32 Acquire_AtomicExchange(volatile Atomic32 *ptr, + Atomic32 new_value) { + Atomic32 old_value; + do { + old_value = *ptr; + } while (!OSAtomicCompareAndSwap32Barrier(old_value, new_value, + const_cast<Atomic32*>(ptr))); + return old_value; +} + +inline Atomic32 Release_AtomicExchange(volatile Atomic32 *ptr, + Atomic32 new_value) { + return Acquire_AtomicExchange(ptr, new_value); +} + +inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32 *ptr, + Atomic32 old_value, + Atomic32 new_value) { + Atomic32 prev_value; + do { + if (OSAtomicCompareAndSwap32Barrier(old_value, new_value, + const_cast<Atomic32*>(ptr))) { + return old_value; + } + prev_value = *ptr; + } while (prev_value == old_value); + return prev_value; +} + +inline Atomic32 Release_CompareAndSwap(volatile Atomic32 *ptr, + Atomic32 old_value, + Atomic32 new_value) { + return Acquire_CompareAndSwap(ptr, old_value, new_value); +} + +inline void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value) { + *ptr = value; +} + +inline void Acquire_Store(volatile Atomic32 *ptr, Atomic32 value) { + *ptr = value; + MemoryBarrier(); +} + +inline void Release_Store(volatile Atomic32 *ptr, Atomic32 value) { + MemoryBarrier(); + *ptr = value; +} + +inline Atomic32 NoBarrier_Load(volatile const Atomic32* ptr) { + return *ptr; +} + +inline Atomic32 Acquire_Load(volatile const Atomic32 *ptr) { + Atomic32 value = *ptr; + MemoryBarrier(); + return value; +} + +inline Atomic32 Release_Load(volatile const Atomic32 *ptr) { + MemoryBarrier(); + return *ptr; +} + +// 64-bit version + +inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64 *ptr, + Atomic64 old_value, + Atomic64 new_value) { + Atomic64 prev_value; + do { + if (OSAtomicCompareAndSwap64(old_value, new_value, + const_cast<Atomic64*>(ptr))) { + return old_value; + } + prev_value = *ptr; + } while (prev_value == old_value); + return prev_value; +} + +inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64 *ptr, + Atomic64 new_value) { + Atomic64 old_value; + do { + old_value = *ptr; + } while (!OSAtomicCompareAndSwap64(old_value, new_value, + const_cast<Atomic64*>(ptr))); + return old_value; +} + +inline Atomic64 Acquire_AtomicExchange(volatile Atomic64 *ptr, + Atomic64 new_value) { + Atomic64 old_value; + do { + old_value = *ptr; + } while (!OSAtomicCompareAndSwap64Barrier(old_value, new_value, + const_cast<Atomic64*>(ptr))); + return old_value; +} + +inline Atomic64 Release_AtomicExchange(volatile Atomic64 *ptr, + Atomic64 new_value) { + return Acquire_AtomicExchange(ptr, new_value); +} + +inline Atomic64 Acquire_CompareAndSwap(volatile Atomic64 *ptr, + Atomic64 old_value, + Atomic64 new_value) { + Atomic64 prev_value; + do { + if (OSAtomicCompareAndSwap64Barrier(old_value, new_value, + const_cast<Atomic64*>(ptr))) { + return old_value; + } + prev_value = *ptr; + } while (prev_value == old_value); + return prev_value; +} + +inline Atomic64 Release_CompareAndSwap(volatile Atomic64 *ptr, + Atomic64 old_value, + Atomic64 new_value) { + // The lib kern interface does not distinguish between + // Acquire and Release memory barriers; they are equivalent. + return Acquire_CompareAndSwap(ptr, old_value, new_value); +} + +#ifdef __LP64__ + +// 64-bit implementation on 64-bit platform + +inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) { + *ptr = value; +} + +inline void Acquire_Store(volatile Atomic64 *ptr, Atomic64 value) { + *ptr = value; + MemoryBarrier(); +} + +inline void Release_Store(volatile Atomic64 *ptr, Atomic64 value) { + MemoryBarrier(); + *ptr = value; +} + +inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) { + return *ptr; +} + +inline Atomic64 Acquire_Load(volatile const Atomic64 *ptr) { + Atomic64 value = *ptr; + MemoryBarrier(); + return value; +} + +inline Atomic64 Release_Load(volatile const Atomic64 *ptr) { + MemoryBarrier(); + return *ptr; +} + +#else + +// 64-bit implementation on 32-bit platform + +#if defined(__ppc__) + +inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) { + __asm__ __volatile__( + "_NoBarrier_Store_not_supported_for_32_bit_ppc\n\t"); +} + +inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) { + __asm__ __volatile__( + "_NoBarrier_Load_not_supported_for_32_bit_ppc\n\t"); + return 0; +} + +#elif defined(__i386__) + +inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) { + __asm__ __volatile__("movq %1, %%mm0\n\t" // Use mmx reg for 64-bit atomic + "movq %%mm0, %0\n\t" // moves (ptr could be read-only) + "emms\n\t" // Reset FP registers + : "=m" (*ptr) + : "m" (value) + : // mark the FP stack and mmx registers as clobbered + "st", "st(1)", "st(2)", "st(3)", "st(4)", + "st(5)", "st(6)", "st(7)", "mm0", "mm1", + "mm2", "mm3", "mm4", "mm5", "mm6", "mm7"); + +} + +inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) { + Atomic64 value; + __asm__ __volatile__("movq %1, %%mm0\n\t" // Use mmx reg for 64-bit atomic + "movq %%mm0, %0\n\t" // moves (ptr could be read-only) + "emms\n\t" // Reset FP registers + : "=m" (value) + : "m" (*ptr) + : // mark the FP stack and mmx registers as clobbered + "st", "st(1)", "st(2)", "st(3)", "st(4)", + "st(5)", "st(6)", "st(7)", "mm0", "mm1", + "mm2", "mm3", "mm4", "mm5", "mm6", "mm7"); + + return value; +} +#endif + + +inline void Acquire_Store(volatile Atomic64 *ptr, Atomic64 value) { + NoBarrier_Store(ptr, value); + MemoryBarrier(); +} + +inline void Release_Store(volatile Atomic64 *ptr, Atomic64 value) { + MemoryBarrier(); + NoBarrier_Store(ptr, value); +} + +inline Atomic64 Acquire_Load(volatile const Atomic64 *ptr) { + Atomic64 value = NoBarrier_Load(ptr); + MemoryBarrier(); + return value; +} + +inline Atomic64 Release_Load(volatile const Atomic64 *ptr) { + MemoryBarrier(); + return NoBarrier_Load(ptr); +} +#endif // __LP64__ + +} // namespace base::subtle +} // namespace base + +#endif // BASE_ATOMICOPS_INTERNALS_MACOSX_H_ diff --git a/src/third_party/gperftools-2.5/src/base/atomicops-internals-mips.h b/src/third_party/gperftools-2.5/src/base/atomicops-internals-mips.h new file mode 100644 index 00000000000..4bfd7f6c70d --- /dev/null +++ b/src/third_party/gperftools-2.5/src/base/atomicops-internals-mips.h @@ -0,0 +1,323 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +/* Copyright (c) 2013, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +// Author: Jovan Zelincevic <jovan.zelincevic@imgtec.com> +// based on atomicops-internals by Sanjay Ghemawat + +// This file is an internal atomic implementation, use base/atomicops.h instead. +// +// This code implements MIPS atomics. + +#ifndef BASE_ATOMICOPS_INTERNALS_MIPS_H_ +#define BASE_ATOMICOPS_INTERNALS_MIPS_H_ + +#if (_MIPS_ISA == _MIPS_ISA_MIPS64) +#define BASE_HAS_ATOMIC64 1 +#endif + +typedef int32_t Atomic32; + +namespace base { +namespace subtle { + +// Atomically execute: +// result = *ptr; +// if (*ptr == old_value) +// *ptr = new_value; +// return result; +// +// I.e., replace "*ptr" with "new_value" if "*ptr" used to be "old_value". +// Always return the old value of "*ptr" +// +// This routine implies no memory barriers. +inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr, + Atomic32 old_value, + Atomic32 new_value) +{ + Atomic32 prev, tmp; + __asm__ volatile( + ".set push \n" + ".set noreorder \n" + + "1: \n" + "ll %0, %5 \n" // prev = *ptr + "bne %0, %3, 2f \n" // if (prev != old_value) goto 2 + " move %2, %4 \n" // tmp = new_value + "sc %2, %1 \n" // *ptr = tmp (with atomic check) + "beqz %2, 1b \n" // start again on atomic error + " nop \n" // delay slot nop + "2: \n" + + ".set pop \n" + : "=&r" (prev), "=m" (*ptr), + "=&r" (tmp) + : "Ir" (old_value), "r" (new_value), + "m" (*ptr) + : "memory" + ); + return prev; +} + +// Atomically store new_value into *ptr, returning the previous value held in +// *ptr. This routine implies no memory barriers. +inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr, + Atomic32 new_value) +{ + Atomic32 temp, old; + __asm__ volatile( + ".set push \n" + ".set noreorder \n" + + "1: \n" + "ll %1, %2 \n" // old = *ptr + "move %0, %3 \n" // temp = new_value + "sc %0, %2 \n" // *ptr = temp (with atomic check) + "beqz %0, 1b \n" // start again on atomic error + " nop \n" // delay slot nop + + ".set pop \n" + : "=&r" (temp), "=&r" (old), + "=m" (*ptr) + : "r" (new_value), "m" (*ptr) + : "memory" + ); + return old; +} + +inline void MemoryBarrier() +{ + __asm__ volatile("sync" : : : "memory"); +} + +// "Acquire" operations +// ensure that no later memory access can be reordered ahead of the operation. +// "Release" operations ensure that no previous memory access can be reordered +// after the operation. "Barrier" operations have both "Acquire" and "Release" +// semantics. A MemoryBarrier() has "Barrier" semantics, but does no memory +// access. +inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr, + Atomic32 old_value, + Atomic32 new_value) +{ + Atomic32 res = NoBarrier_CompareAndSwap(ptr, old_value, new_value); + MemoryBarrier(); + return res; +} + +inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr, + Atomic32 old_value, + Atomic32 new_value) +{ + MemoryBarrier(); + Atomic32 res = NoBarrier_CompareAndSwap(ptr, old_value, new_value); + return res; +} + +inline void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value) +{ + *ptr = value; +} + +inline Atomic32 Acquire_AtomicExchange(volatile Atomic32* ptr, + Atomic32 new_value) +{ + Atomic32 old_value = NoBarrier_AtomicExchange(ptr, new_value); + MemoryBarrier(); + return old_value; +} + +inline Atomic32 Release_AtomicExchange(volatile Atomic32* ptr, + Atomic32 new_value) +{ + MemoryBarrier(); + return NoBarrier_AtomicExchange(ptr, new_value); +} + +inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) +{ + *ptr = value; + MemoryBarrier(); +} + +inline void Release_Store(volatile Atomic32* ptr, Atomic32 value) +{ + MemoryBarrier(); + *ptr = value; +} + +inline Atomic32 NoBarrier_Load(volatile const Atomic32* ptr) +{ + return *ptr; +} + +inline Atomic32 Acquire_Load(volatile const Atomic32* ptr) +{ + Atomic32 value = *ptr; + MemoryBarrier(); + return value; +} + +inline Atomic32 Release_Load(volatile const Atomic32* ptr) +{ + MemoryBarrier(); + return *ptr; +} + +#if (_MIPS_ISA == _MIPS_ISA_MIPS64) || (_MIPS_SIM == _MIPS_SIM_ABI64) + +typedef int64_t Atomic64; + +inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr, + Atomic64 old_value, + Atomic64 new_value) +{ + Atomic64 prev, tmp; + __asm__ volatile( + ".set push \n" + ".set noreorder \n" + + "1: \n" + "lld %0, %5 \n" // prev = *ptr + "bne %0, %3, 2f \n" // if (prev != old_value) goto 2 + " move %2, %4 \n" // tmp = new_value + "scd %2, %1 \n" // *ptr = tmp (with atomic check) + "beqz %2, 1b \n" // start again on atomic error + " nop \n" // delay slot nop + "2: \n" + + ".set pop \n" + : "=&r" (prev), "=m" (*ptr), + "=&r" (tmp) + : "Ir" (old_value), "r" (new_value), + "m" (*ptr) + : "memory" + ); + return prev; +} + +inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr, + Atomic64 new_value) +{ + Atomic64 temp, old; + __asm__ volatile( + ".set push \n" + ".set noreorder \n" + + "1: \n" + "lld %1, %2 \n" // old = *ptr + "move %0, %3 \n" // temp = new_value + "scd %0, %2 \n" // *ptr = temp (with atomic check) + "beqz %0, 1b \n" // start again on atomic error + " nop \n" // delay slot nop + + ".set pop \n" + : "=&r" (temp), "=&r" (old), + "=m" (*ptr) + : "r" (new_value), "m" (*ptr) + : "memory" + ); + return old; +} + +inline Atomic64 Acquire_AtomicExchange(volatile Atomic64* ptr, + Atomic64 new_value) +{ + Atomic64 old_value = NoBarrier_AtomicExchange(ptr, new_value); + MemoryBarrier(); + return old_value; +} + +inline Atomic64 Acquire_CompareAndSwap(volatile Atomic64* ptr, + Atomic64 old_value, + Atomic64 new_value) +{ + Atomic64 res = NoBarrier_CompareAndSwap(ptr, old_value, new_value); + MemoryBarrier(); + return res; +} + +inline Atomic64 Release_CompareAndSwap(volatile Atomic64* ptr, + Atomic64 old_value, + Atomic64 new_value) +{ + MemoryBarrier(); + Atomic64 res = NoBarrier_CompareAndSwap(ptr, old_value, new_value); + return res; +} + +inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) +{ + *ptr = value; +} + +inline Atomic64 Release_AtomicExchange(volatile Atomic64* ptr, + Atomic64 new_value) +{ + MemoryBarrier(); + return NoBarrier_AtomicExchange(ptr, new_value); +} + +inline void Acquire_Store(volatile Atomic64* ptr, Atomic64 value) +{ + *ptr = value; + MemoryBarrier(); +} + +inline void Release_Store(volatile Atomic64* ptr, Atomic64 value) +{ + MemoryBarrier(); + *ptr = value; +} + +inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) +{ + return *ptr; +} + +inline Atomic64 Acquire_Load(volatile const Atomic64* ptr) +{ + Atomic64 value = *ptr; + MemoryBarrier(); + return value; +} + +inline Atomic64 Release_Load(volatile const Atomic64* ptr) +{ + MemoryBarrier(); + return *ptr; +} + +#endif + +} // namespace base::subtle +} // namespace base + +#endif // BASE_ATOMICOPS_INTERNALS_MIPS_H_ diff --git a/src/third_party/gperftools-2.5/src/base/atomicops-internals-windows.h b/src/third_party/gperftools-2.5/src/base/atomicops-internals-windows.h new file mode 100644 index 00000000000..93ced8770d4 --- /dev/null +++ b/src/third_party/gperftools-2.5/src/base/atomicops-internals-windows.h @@ -0,0 +1,457 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +/* Copyright (c) 2006, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: Sanjay Ghemawat + */ + +// Implementation of atomic operations using Windows API +// functions. This file should not be included directly. Clients +// should instead include "base/atomicops.h". + +#ifndef BASE_ATOMICOPS_INTERNALS_WINDOWS_H_ +#define BASE_ATOMICOPS_INTERNALS_WINDOWS_H_ + +#include <stdio.h> +#include <stdlib.h> +#include "base/basictypes.h" // For COMPILE_ASSERT + +typedef int32 Atomic32; + +#if defined(_WIN64) +#define BASE_HAS_ATOMIC64 1 // Use only in tests and base/atomic* +#endif + +namespace base { +namespace subtle { + +typedef int64 Atomic64; + +// 32-bit low-level operations on any platform + +extern "C" { +// We use windows intrinsics when we can (they seem to be supported +// well on MSVC 8.0 and above). Unfortunately, in some +// environments, <windows.h> and <intrin.h> have conflicting +// declarations of some other intrinsics, breaking compilation: +// http://connect.microsoft.com/VisualStudio/feedback/details/262047 +// Therefore, we simply declare the relevant intrinsics ourself. + +// MinGW has a bug in the header files where it doesn't indicate the +// first argument is volatile -- they're not up to date. See +// http://readlist.com/lists/lists.sourceforge.net/mingw-users/0/3861.html +// We have to const_cast away the volatile to avoid compiler warnings. +// TODO(csilvers): remove this once MinGW has updated MinGW/include/winbase.h +#if defined(__MINGW32__) +inline LONG FastInterlockedCompareExchange(volatile LONG* ptr, + LONG newval, LONG oldval) { + return ::InterlockedCompareExchange(const_cast<LONG*>(ptr), newval, oldval); +} +inline LONG FastInterlockedExchange(volatile LONG* ptr, LONG newval) { + return ::InterlockedExchange(const_cast<LONG*>(ptr), newval); +} +inline LONG FastInterlockedExchangeAdd(volatile LONG* ptr, LONG increment) { + return ::InterlockedExchangeAdd(const_cast<LONG*>(ptr), increment); +} + +#elif _MSC_VER >= 1400 // intrinsics didn't work so well before MSVC 8.0 +// Unfortunately, in some environments, <windows.h> and <intrin.h> +// have conflicting declarations of some intrinsics, breaking +// compilation. So we declare the intrinsics we need ourselves. See +// http://connect.microsoft.com/VisualStudio/feedback/details/262047 +LONG _InterlockedCompareExchange(volatile LONG* ptr, LONG newval, LONG oldval); +#pragma intrinsic(_InterlockedCompareExchange) +inline LONG FastInterlockedCompareExchange(volatile LONG* ptr, + LONG newval, LONG oldval) { + return _InterlockedCompareExchange(ptr, newval, oldval); +} + +LONG _InterlockedExchange(volatile LONG* ptr, LONG newval); +#pragma intrinsic(_InterlockedExchange) +inline LONG FastInterlockedExchange(volatile LONG* ptr, LONG newval) { + return _InterlockedExchange(ptr, newval); +} + +LONG _InterlockedExchangeAdd(volatile LONG* ptr, LONG increment); +#pragma intrinsic(_InterlockedExchangeAdd) +inline LONG FastInterlockedExchangeAdd(volatile LONG* ptr, LONG increment) { + return _InterlockedExchangeAdd(ptr, increment); +} + +#else +inline LONG FastInterlockedCompareExchange(volatile LONG* ptr, + LONG newval, LONG oldval) { + return ::InterlockedCompareExchange(ptr, newval, oldval); +} +inline LONG FastInterlockedExchange(volatile LONG* ptr, LONG newval) { + return ::InterlockedExchange(ptr, newval); +} +inline LONG FastInterlockedExchangeAdd(volatile LONG* ptr, LONG increment) { + return ::InterlockedExchangeAdd(ptr, increment); +} + +#endif // ifdef __MINGW32__ +} // extern "C" + +inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr, + Atomic32 old_value, + Atomic32 new_value) { + LONG result = FastInterlockedCompareExchange( + reinterpret_cast<volatile LONG*>(ptr), + static_cast<LONG>(new_value), + static_cast<LONG>(old_value)); + return static_cast<Atomic32>(result); +} + +inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr, + Atomic32 new_value) { + LONG result = FastInterlockedExchange( + reinterpret_cast<volatile LONG*>(ptr), + static_cast<LONG>(new_value)); + return static_cast<Atomic32>(result); +} + +inline Atomic32 Acquire_AtomicExchange(volatile Atomic32* ptr, + Atomic32 new_value) { + // FastInterlockedExchange has both acquire and release memory barriers. + return NoBarrier_AtomicExchange(ptr, new_value); +} + +inline Atomic32 Release_AtomicExchange(volatile Atomic32* ptr, + Atomic32 new_value) { + // FastInterlockedExchange has both acquire and release memory barriers. + return NoBarrier_AtomicExchange(ptr, new_value); +} + +} // namespace base::subtle +} // namespace base + + +// In msvc8/vs2005, winnt.h already contains a definition for +// MemoryBarrier in the global namespace. Add it there for earlier +// versions and forward to it from within the namespace. +#if !(defined(_MSC_VER) && _MSC_VER >= 1400) +inline void MemoryBarrier() { + Atomic32 value = 0; + base::subtle::NoBarrier_AtomicExchange(&value, 0); + // actually acts as a barrier in thisd implementation +} +#endif + +namespace base { +namespace subtle { + +inline void MemoryBarrier() { + ::MemoryBarrier(); +} + +inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr, + Atomic32 old_value, + Atomic32 new_value) { + return NoBarrier_CompareAndSwap(ptr, old_value, new_value); +} + +inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr, + Atomic32 old_value, + Atomic32 new_value) { + return NoBarrier_CompareAndSwap(ptr, old_value, new_value); +} + +inline void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value) { + *ptr = value; +} + +inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) { + Acquire_AtomicExchange(ptr, value); +} + +inline void Release_Store(volatile Atomic32* ptr, Atomic32 value) { + *ptr = value; // works w/o barrier for current Intel chips as of June 2005 + // See comments in Atomic64 version of Release_Store() below. +} + +inline Atomic32 NoBarrier_Load(volatile const Atomic32* ptr) { + return *ptr; +} + +inline Atomic32 Acquire_Load(volatile const Atomic32* ptr) { + Atomic32 value = *ptr; + return value; +} + +inline Atomic32 Release_Load(volatile const Atomic32* ptr) { + MemoryBarrier(); + return *ptr; +} + +// 64-bit operations + +#if defined(_WIN64) || defined(__MINGW64__) + +// 64-bit low-level operations on 64-bit platform. + +COMPILE_ASSERT(sizeof(Atomic64) == sizeof(PVOID), atomic_word_is_atomic); + +// These are the intrinsics needed for 64-bit operations. Similar to the +// 32-bit case above. + +extern "C" { +#if defined(__MINGW64__) +inline PVOID FastInterlockedCompareExchangePointer(volatile PVOID* ptr, + PVOID newval, PVOID oldval) { + return ::InterlockedCompareExchangePointer(const_cast<PVOID*>(ptr), + newval, oldval); +} +inline PVOID FastInterlockedExchangePointer(volatile PVOID* ptr, PVOID newval) { + return ::InterlockedExchangePointer(const_cast<PVOID*>(ptr), newval); +} +inline LONGLONG FastInterlockedExchangeAdd64(volatile LONGLONG* ptr, + LONGLONG increment) { + return ::InterlockedExchangeAdd64(const_cast<LONGLONG*>(ptr), increment); +} + +#elif _MSC_VER >= 1400 // intrinsics didn't work so well before MSVC 8.0 +// Like above, we need to declare the intrinsics ourselves. +PVOID _InterlockedCompareExchangePointer(volatile PVOID* ptr, + PVOID newval, PVOID oldval); +#pragma intrinsic(_InterlockedCompareExchangePointer) +inline PVOID FastInterlockedCompareExchangePointer(volatile PVOID* ptr, + PVOID newval, PVOID oldval) { + return _InterlockedCompareExchangePointer(const_cast<PVOID*>(ptr), + newval, oldval); +} + +PVOID _InterlockedExchangePointer(volatile PVOID* ptr, PVOID newval); +#pragma intrinsic(_InterlockedExchangePointer) +inline PVOID FastInterlockedExchangePointer(volatile PVOID* ptr, PVOID newval) { + return _InterlockedExchangePointer(const_cast<PVOID*>(ptr), newval); +} + +LONGLONG _InterlockedExchangeAdd64(volatile LONGLONG* ptr, LONGLONG increment); +#pragma intrinsic(_InterlockedExchangeAdd64) +inline LONGLONG FastInterlockedExchangeAdd64(volatile LONGLONG* ptr, + LONGLONG increment) { + return _InterlockedExchangeAdd64(const_cast<LONGLONG*>(ptr), increment); +} + +#else +inline PVOID FastInterlockedCompareExchangePointer(volatile PVOID* ptr, + PVOID newval, PVOID oldval) { + return ::InterlockedCompareExchangePointer(ptr, newval, oldval); +} +inline PVOID FastInterlockedExchangePointer(volatile PVOID* ptr, PVOID newval) { + return ::InterlockedExchangePointer(ptr, newval); +} +inline LONGLONG FastInterlockedExchangeAdd64(volatile LONGLONG* ptr, + LONGLONG increment) { + return ::InterlockedExchangeAdd64(ptr, increment); +} + +#endif // ifdef __MINGW64__ +} // extern "C" + +inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr, + Atomic64 old_value, + Atomic64 new_value) { + PVOID result = FastInterlockedCompareExchangePointer( + reinterpret_cast<volatile PVOID*>(ptr), + reinterpret_cast<PVOID>(new_value), reinterpret_cast<PVOID>(old_value)); + return reinterpret_cast<Atomic64>(result); +} + +inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr, + Atomic64 new_value) { + PVOID result = FastInterlockedExchangePointer( + reinterpret_cast<volatile PVOID*>(ptr), + reinterpret_cast<PVOID>(new_value)); + return reinterpret_cast<Atomic64>(result); +} + +inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) { + *ptr = value; +} + +inline void Acquire_Store(volatile Atomic64* ptr, Atomic64 value) { + NoBarrier_AtomicExchange(ptr, value); + // acts as a barrier in this implementation +} + +inline void Release_Store(volatile Atomic64* ptr, Atomic64 value) { + *ptr = value; // works w/o barrier for current Intel chips as of June 2005 + + // When new chips come out, check: + // IA-32 Intel Architecture Software Developer's Manual, Volume 3: + // System Programming Guide, Chatper 7: Multiple-processor management, + // Section 7.2, Memory Ordering. + // Last seen at: + // http://developer.intel.com/design/pentium4/manuals/index_new.htm +} + +inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) { + return *ptr; +} + +inline Atomic64 Acquire_Load(volatile const Atomic64* ptr) { + Atomic64 value = *ptr; + return value; +} + +inline Atomic64 Release_Load(volatile const Atomic64* ptr) { + MemoryBarrier(); + return *ptr; +} + +#else // defined(_WIN64) || defined(__MINGW64__) + +// 64-bit low-level operations on 32-bit platform + +// TODO(vchen): The GNU assembly below must be converted to MSVC inline +// assembly. Then the file should be renamed to ...-x86-msvc.h, probably. + +inline void NotImplementedFatalError(const char *function_name) { + fprintf(stderr, "64-bit %s() not implemented on this platform\n", + function_name); + abort(); +} + +inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr, + Atomic64 old_value, + Atomic64 new_value) { +#if 0 // Not implemented + Atomic64 prev; + __asm__ __volatile__("movl (%3), %%ebx\n\t" // Move 64-bit new_value into + "movl 4(%3), %%ecx\n\t" // ecx:ebx + "lock; cmpxchg8b %1\n\t" // If edx:eax (old_value) same + : "=A" (prev) // as contents of ptr: + : "m" (*ptr), // ecx:ebx => ptr + "0" (old_value), // else: + "r" (&new_value) // old *ptr => edx:eax + : "memory", "%ebx", "%ecx"); + return prev; +#else + NotImplementedFatalError("NoBarrier_CompareAndSwap"); + return 0; +#endif +} + +inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr, + Atomic64 new_value) { +#if 0 // Not implemented + __asm__ __volatile__( + "movl (%2), %%ebx\n\t" // Move 64-bit new_value into + "movl 4(%2), %%ecx\n\t" // ecx:ebx + "0:\n\t" + "movl %1, %%eax\n\t" // Read contents of ptr into + "movl 4%1, %%edx\n\t" // edx:eax + "lock; cmpxchg8b %1\n\t" // Attempt cmpxchg; if *ptr + "jnz 0b\n\t" // is no longer edx:eax, loop + : "=A" (new_value) + : "m" (*ptr), + "r" (&new_value) + : "memory", "%ebx", "%ecx"); + return new_value; // Now it's the previous value. +#else + NotImplementedFatalError("NoBarrier_AtomicExchange"); + return 0; +#endif +} + +inline void NoBarrier_Store(volatile Atomic64* ptrValue, Atomic64 value) +{ + __asm { + movq mm0, value; // Use mmx reg for 64-bit atomic moves + mov eax, ptrValue; + movq [eax], mm0; + emms; // Empty mmx state to enable FP registers + } +} + +inline void Acquire_Store(volatile Atomic64* ptr, Atomic64 value) { + NoBarrier_AtomicExchange(ptr, value); + // acts as a barrier in this implementation +} + +inline void Release_Store(volatile Atomic64* ptr, Atomic64 value) { + NoBarrier_Store(ptr, value); +} + +inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptrValue) +{ + Atomic64 value; + __asm { + mov eax, ptrValue; + movq mm0, [eax]; // Use mmx reg for 64-bit atomic moves + movq value, mm0; + emms; // Empty mmx state to enable FP registers + } + return value; +} + +inline Atomic64 Acquire_Load(volatile const Atomic64* ptr) { + Atomic64 value = NoBarrier_Load(ptr); + return value; +} + +inline Atomic64 Release_Load(volatile const Atomic64* ptr) { + MemoryBarrier(); + return NoBarrier_Load(ptr); +} + +#endif // defined(_WIN64) || defined(__MINGW64__) + + +inline Atomic64 Acquire_AtomicExchange(volatile Atomic64* ptr, + Atomic64 new_value) { + // FastInterlockedExchange has both acquire and release memory barriers. + return NoBarrier_AtomicExchange(ptr, new_value); +} + +inline Atomic64 Release_AtomicExchange(volatile Atomic64* ptr, + Atomic64 new_value) { + // FastInterlockedExchange has both acquire and release memory barriers. + return NoBarrier_AtomicExchange(ptr, new_value); +} + +inline Atomic64 Acquire_CompareAndSwap(volatile Atomic64* ptr, + Atomic64 old_value, + Atomic64 new_value) { + return NoBarrier_CompareAndSwap(ptr, old_value, new_value); +} + +inline Atomic64 Release_CompareAndSwap(volatile Atomic64* ptr, + Atomic64 old_value, + Atomic64 new_value) { + return NoBarrier_CompareAndSwap(ptr, old_value, new_value); +} + +} // namespace base::subtle +} // namespace base + +#endif // BASE_ATOMICOPS_INTERNALS_WINDOWS_H_ diff --git a/src/third_party/gperftools-2.5/src/base/atomicops-internals-x86.cc b/src/third_party/gperftools-2.5/src/base/atomicops-internals-x86.cc new file mode 100644 index 00000000000..c3391e78234 --- /dev/null +++ b/src/third_party/gperftools-2.5/src/base/atomicops-internals-x86.cc @@ -0,0 +1,112 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +/* Copyright (c) 2007, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * This module gets enough CPU information to optimize the + * atomicops module on x86. + */ + +#include "base/atomicops.h" +#include "base/basictypes.h" +#include "base/googleinit.h" +#include "base/logging.h" +#include <string.h> + +// This file only makes sense with atomicops-internals-x86.h -- it +// depends on structs that are defined in that file. If atomicops.h +// doesn't sub-include that file, then we aren't needed, and shouldn't +// try to do anything. +#ifdef BASE_ATOMICOPS_INTERNALS_X86_H_ + +// Inline cpuid instruction. In PIC compilations, %ebx contains the address +// of the global offset table. To avoid breaking such executables, this code +// must preserve that register's value across cpuid instructions. +#if defined(__i386__) +#define cpuid(a, b, c, d, inp) \ + asm ("mov %%ebx, %%edi\n" \ + "cpuid\n" \ + "xchg %%edi, %%ebx\n" \ + : "=a" (a), "=D" (b), "=c" (c), "=d" (d) : "a" (inp)) +#elif defined (__x86_64__) +#define cpuid(a, b, c, d, inp) \ + asm ("mov %%rbx, %%rdi\n" \ + "cpuid\n" \ + "xchg %%rdi, %%rbx\n" \ + : "=a" (a), "=D" (b), "=c" (c), "=d" (d) : "a" (inp)) +#endif + +#if defined(cpuid) // initialize the struct only on x86 + +// Set the flags so that code will run correctly and conservatively +// until InitGoogle() is called. +struct AtomicOps_x86CPUFeatureStruct AtomicOps_Internalx86CPUFeatures = { + false, // no SSE2 + false // no cmpxchg16b +}; + +// Initialize the AtomicOps_Internalx86CPUFeatures struct. +static void AtomicOps_Internalx86CPUFeaturesInit() { + uint32 eax; + uint32 ebx; + uint32 ecx; + uint32 edx; + + // Get vendor string (issue CPUID with eax = 0) + cpuid(eax, ebx, ecx, edx, 0); + char vendor[13]; + memcpy(vendor, &ebx, 4); + memcpy(vendor + 4, &edx, 4); + memcpy(vendor + 8, &ecx, 4); + vendor[12] = 0; + + // get feature flags in ecx/edx, and family/model in eax + cpuid(eax, ebx, ecx, edx, 1); + + int family = (eax >> 8) & 0xf; // family and model fields + int model = (eax >> 4) & 0xf; + if (family == 0xf) { // use extended family and model fields + family += (eax >> 20) & 0xff; + model += ((eax >> 16) & 0xf) << 4; + } + + // edx bit 26 is SSE2 which we use to tell use whether we can use mfence + AtomicOps_Internalx86CPUFeatures.has_sse2 = ((edx >> 26) & 1); + + // ecx bit 13 indicates whether the cmpxchg16b instruction is supported + AtomicOps_Internalx86CPUFeatures.has_cmpxchg16b = ((ecx >> 13) & 1); +} + +REGISTER_MODULE_INITIALIZER(atomicops_x86, { + AtomicOps_Internalx86CPUFeaturesInit(); +}); + +#endif + +#endif /* ifdef BASE_ATOMICOPS_INTERNALS_X86_H_ */ diff --git a/src/third_party/gperftools-2.5/src/base/atomicops-internals-x86.h b/src/third_party/gperftools-2.5/src/base/atomicops-internals-x86.h new file mode 100644 index 00000000000..e441ac7e673 --- /dev/null +++ b/src/third_party/gperftools-2.5/src/base/atomicops-internals-x86.h @@ -0,0 +1,391 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +/* Copyright (c) 2006, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: Sanjay Ghemawat + */ + +// Implementation of atomic operations for x86. This file should not +// be included directly. Clients should instead include +// "base/atomicops.h". + +#ifndef BASE_ATOMICOPS_INTERNALS_X86_H_ +#define BASE_ATOMICOPS_INTERNALS_X86_H_ +#include "base/basictypes.h" + +typedef int32_t Atomic32; +#define BASE_HAS_ATOMIC64 1 // Use only in tests and base/atomic* + + +// NOTE(vchen): x86 does not need to define AtomicWordCastType, because it +// already matches Atomic32 or Atomic64, depending on the platform. + + +// This struct is not part of the public API of this module; clients may not +// use it. +// Features of this x86. Values may not be correct before main() is run, +// but are set conservatively. +struct AtomicOps_x86CPUFeatureStruct { + bool has_sse2; // Processor has SSE2. + bool has_cmpxchg16b; // Processor supports cmpxchg16b instruction. +}; + +ATTRIBUTE_VISIBILITY_HIDDEN +extern struct AtomicOps_x86CPUFeatureStruct AtomicOps_Internalx86CPUFeatures; + + +#define ATOMICOPS_COMPILER_BARRIER() __asm__ __volatile__("" : : : "memory") + + +namespace base { +namespace subtle { + +typedef int64_t Atomic64; + +// 32-bit low-level operations on any platform. + +inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr, + Atomic32 old_value, + Atomic32 new_value) { + Atomic32 prev; + __asm__ __volatile__("lock; cmpxchgl %1,%2" + : "=a" (prev) + : "q" (new_value), "m" (*ptr), "0" (old_value) + : "memory"); + return prev; +} + +inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr, + Atomic32 new_value) { + __asm__ __volatile__("xchgl %1,%0" // The lock prefix is implicit for xchg. + : "=r" (new_value) + : "m" (*ptr), "0" (new_value) + : "memory"); + return new_value; // Now it's the previous value. +} + +inline Atomic32 Acquire_AtomicExchange(volatile Atomic32* ptr, + Atomic32 new_value) { + Atomic32 old_val = NoBarrier_AtomicExchange(ptr, new_value); + return old_val; +} + +inline Atomic32 Release_AtomicExchange(volatile Atomic32* ptr, + Atomic32 new_value) { + // xchgl already has release memory barrier semantics. + return NoBarrier_AtomicExchange(ptr, new_value); +} + +inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr, + Atomic32 old_value, + Atomic32 new_value) { + Atomic32 x = NoBarrier_CompareAndSwap(ptr, old_value, new_value); + return x; +} + +inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr, + Atomic32 old_value, + Atomic32 new_value) { + return NoBarrier_CompareAndSwap(ptr, old_value, new_value); +} + +inline void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value) { + *ptr = value; +} + +#if defined(__x86_64__) + +// 64-bit implementations of memory barrier can be simpler, because it +// "mfence" is guaranteed to exist. +inline void MemoryBarrier() { + __asm__ __volatile__("mfence" : : : "memory"); +} + +inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) { + *ptr = value; + MemoryBarrier(); +} + +#else + +inline void MemoryBarrier() { + if (AtomicOps_Internalx86CPUFeatures.has_sse2) { + __asm__ __volatile__("mfence" : : : "memory"); + } else { // mfence is faster but not present on PIII + Atomic32 x = 0; + Acquire_AtomicExchange(&x, 0); + } +} + +inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) { + if (AtomicOps_Internalx86CPUFeatures.has_sse2) { + *ptr = value; + __asm__ __volatile__("mfence" : : : "memory"); + } else { + Acquire_AtomicExchange(ptr, value); + } +} +#endif + +inline void Release_Store(volatile Atomic32* ptr, Atomic32 value) { + ATOMICOPS_COMPILER_BARRIER(); + *ptr = value; // An x86 store acts as a release barrier. + // See comments in Atomic64 version of Release_Store(), below. +} + +inline Atomic32 NoBarrier_Load(volatile const Atomic32* ptr) { + return *ptr; +} + +inline Atomic32 Acquire_Load(volatile const Atomic32* ptr) { + Atomic32 value = *ptr; // An x86 load acts as a acquire barrier. + // See comments in Atomic64 version of Release_Store(), below. + ATOMICOPS_COMPILER_BARRIER(); + return value; +} + +inline Atomic32 Release_Load(volatile const Atomic32* ptr) { + MemoryBarrier(); + return *ptr; +} + +#if defined(__x86_64__) + +// 64-bit low-level operations on 64-bit platform. + +inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr, + Atomic64 old_value, + Atomic64 new_value) { + Atomic64 prev; + __asm__ __volatile__("lock; cmpxchgq %1,%2" + : "=a" (prev) + : "q" (new_value), "m" (*ptr), "0" (old_value) + : "memory"); + return prev; +} + +inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr, + Atomic64 new_value) { + __asm__ __volatile__("xchgq %1,%0" // The lock prefix is implicit for xchg. + : "=r" (new_value) + : "m" (*ptr), "0" (new_value) + : "memory"); + return new_value; // Now it's the previous value. +} + +inline Atomic64 Acquire_AtomicExchange(volatile Atomic64* ptr, + Atomic64 new_value) { + Atomic64 old_val = NoBarrier_AtomicExchange(ptr, new_value); + return old_val; +} + +inline Atomic64 Release_AtomicExchange(volatile Atomic64* ptr, + Atomic64 new_value) { + // xchgq already has release memory barrier semantics. + return NoBarrier_AtomicExchange(ptr, new_value); +} + +inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) { + *ptr = value; +} + +inline void Acquire_Store(volatile Atomic64* ptr, Atomic64 value) { + *ptr = value; + MemoryBarrier(); +} + +inline void Release_Store(volatile Atomic64* ptr, Atomic64 value) { + ATOMICOPS_COMPILER_BARRIER(); + + *ptr = value; // An x86 store acts as a release barrier + // for current AMD/Intel chips as of Jan 2008. + // See also Acquire_Load(), below. + + // When new chips come out, check: + // IA-32 Intel Architecture Software Developer's Manual, Volume 3: + // System Programming Guide, Chatper 7: Multiple-processor management, + // Section 7.2, Memory Ordering. + // Last seen at: + // http://developer.intel.com/design/pentium4/manuals/index_new.htm + // + // x86 stores/loads fail to act as barriers for a few instructions (clflush + // maskmovdqu maskmovq movntdq movnti movntpd movntps movntq) but these are + // not generated by the compiler, and are rare. Users of these instructions + // need to know about cache behaviour in any case since all of these involve + // either flushing cache lines or non-temporal cache hints. +} + +inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) { + return *ptr; +} + +inline Atomic64 Acquire_Load(volatile const Atomic64* ptr) { + Atomic64 value = *ptr; // An x86 load acts as a acquire barrier, + // for current AMD/Intel chips as of Jan 2008. + // See also Release_Store(), above. + ATOMICOPS_COMPILER_BARRIER(); + return value; +} + +inline Atomic64 Release_Load(volatile const Atomic64* ptr) { + MemoryBarrier(); + return *ptr; +} + +#else // defined(__x86_64__) + +// 64-bit low-level operations on 32-bit platform. + +#if !((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1)) +// For compilers older than gcc 4.1, we use inline asm. +// +// Potential pitfalls: +// +// 1. %ebx points to Global offset table (GOT) with -fPIC. +// We need to preserve this register. +// 2. When explicit registers are used in inline asm, the +// compiler may not be aware of it and might try to reuse +// the same register for another argument which has constraints +// that allow it ("r" for example). + +inline Atomic64 __sync_val_compare_and_swap(volatile Atomic64* ptr, + Atomic64 old_value, + Atomic64 new_value) { + Atomic64 prev; + __asm__ __volatile__("push %%ebx\n\t" + "movl (%3), %%ebx\n\t" // Move 64-bit new_value into + "movl 4(%3), %%ecx\n\t" // ecx:ebx + "lock; cmpxchg8b (%1)\n\t"// If edx:eax (old_value) same + "pop %%ebx\n\t" + : "=A" (prev) // as contents of ptr: + : "D" (ptr), // ecx:ebx => ptr + "0" (old_value), // else: + "S" (&new_value) // old *ptr => edx:eax + : "memory", "%ecx"); + return prev; +} +#endif // Compiler < gcc-4.1 + +inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr, + Atomic64 old_val, + Atomic64 new_val) { + return __sync_val_compare_and_swap(ptr, old_val, new_val); +} + +inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr, + Atomic64 new_val) { + Atomic64 old_val; + + do { + old_val = *ptr; + } while (__sync_val_compare_and_swap(ptr, old_val, new_val) != old_val); + + return old_val; +} + +inline Atomic64 Acquire_AtomicExchange(volatile Atomic64* ptr, + Atomic64 new_val) { + Atomic64 old_val = NoBarrier_AtomicExchange(ptr, new_val); + return old_val; +} + +inline Atomic64 Release_AtomicExchange(volatile Atomic64* ptr, + Atomic64 new_val) { + return NoBarrier_AtomicExchange(ptr, new_val); +} + +inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) { + __asm__ __volatile__("movq %1, %%mm0\n\t" // Use mmx reg for 64-bit atomic + "movq %%mm0, %0\n\t" // moves (ptr could be read-only) + "emms\n\t" // Empty mmx state/Reset FP regs + : "=m" (*ptr) + : "m" (value) + : // mark the FP stack and mmx registers as clobbered + "st", "st(1)", "st(2)", "st(3)", "st(4)", + "st(5)", "st(6)", "st(7)", "mm0", "mm1", + "mm2", "mm3", "mm4", "mm5", "mm6", "mm7"); +} + +inline void Acquire_Store(volatile Atomic64* ptr, Atomic64 value) { + NoBarrier_Store(ptr, value); + MemoryBarrier(); +} + +inline void Release_Store(volatile Atomic64* ptr, Atomic64 value) { + ATOMICOPS_COMPILER_BARRIER(); + NoBarrier_Store(ptr, value); +} + +inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) { + Atomic64 value; + __asm__ __volatile__("movq %1, %%mm0\n\t" // Use mmx reg for 64-bit atomic + "movq %%mm0, %0\n\t" // moves (ptr could be read-only) + "emms\n\t" // Empty mmx state/Reset FP regs + : "=m" (value) + : "m" (*ptr) + : // mark the FP stack and mmx registers as clobbered + "st", "st(1)", "st(2)", "st(3)", "st(4)", + "st(5)", "st(6)", "st(7)", "mm0", "mm1", + "mm2", "mm3", "mm4", "mm5", "mm6", "mm7"); + return value; +} + +inline Atomic64 Acquire_Load(volatile const Atomic64* ptr) { + Atomic64 value = NoBarrier_Load(ptr); + ATOMICOPS_COMPILER_BARRIER(); + return value; +} + +inline Atomic64 Release_Load(volatile const Atomic64* ptr) { + MemoryBarrier(); + return NoBarrier_Load(ptr); +} + +#endif // defined(__x86_64__) + +inline Atomic64 Acquire_CompareAndSwap(volatile Atomic64* ptr, + Atomic64 old_value, + Atomic64 new_value) { + Atomic64 x = NoBarrier_CompareAndSwap(ptr, old_value, new_value); + return x; +} + +inline Atomic64 Release_CompareAndSwap(volatile Atomic64* ptr, + Atomic64 old_value, + Atomic64 new_value) { + return NoBarrier_CompareAndSwap(ptr, old_value, new_value); +} + +} // namespace base::subtle +} // namespace base + +#undef ATOMICOPS_COMPILER_BARRIER + +#endif // BASE_ATOMICOPS_INTERNALS_X86_H_ diff --git a/src/third_party/gperftools-2.5/src/base/atomicops.h b/src/third_party/gperftools-2.5/src/base/atomicops.h new file mode 100644 index 00000000000..be038f34416 --- /dev/null +++ b/src/third_party/gperftools-2.5/src/base/atomicops.h @@ -0,0 +1,391 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +/* Copyright (c) 2006, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: Sanjay Ghemawat + */ + +// For atomic operations on statistics counters, see atomic_stats_counter.h. +// For atomic operations on sequence numbers, see atomic_sequence_num.h. +// For atomic operations on reference counts, see atomic_refcount.h. + +// Some fast atomic operations -- typically with machine-dependent +// implementations. This file may need editing as Google code is +// ported to different architectures. + +// The routines exported by this module are subtle. If you use them, even if +// you get the code right, it will depend on careful reasoning about atomicity +// and memory ordering; it will be less readable, and harder to maintain. If +// you plan to use these routines, you should have a good reason, such as solid +// evidence that performance would otherwise suffer, or there being no +// alternative. You should assume only properties explicitly guaranteed by the +// specifications in this file. You are almost certainly _not_ writing code +// just for the x86; if you assume x86 semantics, x86 hardware bugs and +// implementations on other archtectures will cause your code to break. If you +// do not know what you are doing, avoid these routines, and use a Mutex. +// +// These following lower-level operations are typically useful only to people +// implementing higher-level synchronization operations like spinlocks, +// mutexes, and condition-variables. They combine CompareAndSwap(), a load, or +// a store with appropriate memory-ordering instructions. "Acquire" operations +// ensure that no later memory access can be reordered ahead of the operation. +// "Release" operations ensure that no previous memory access can be reordered +// after the operation. "Barrier" operations have both "Acquire" and "Release" +// semantics. A MemoryBarrier() has "Barrier" semantics, but does no memory +// access. +// +// It is incorrect to make direct assignments to/from an atomic variable. +// You should use one of the Load or Store routines. The NoBarrier +// versions are provided when no barriers are needed: +// NoBarrier_Store() +// NoBarrier_Load() +// Although there are currently no compiler enforcement, you are encouraged +// to use these. Moreover, if you choose to use base::subtle::Atomic64 type, +// you MUST use one of the Load or Store routines to get correct behavior +// on 32-bit platforms. +// +// The intent is eventually to put all of these routines in namespace +// base::subtle + +#ifndef THREAD_ATOMICOPS_H_ +#define THREAD_ATOMICOPS_H_ + +#include <config.h> +#ifdef HAVE_STDINT_H +#include <stdint.h> +#endif + +// ------------------------------------------------------------------------ +// Include the platform specific implementations of the types +// and operations listed below. Implementations are to provide Atomic32 +// and Atomic64 operations. If there is a mismatch between intptr_t and +// the Atomic32 or Atomic64 types for a platform, the platform-specific header +// should define the macro, AtomicWordCastType in a clause similar to the +// following: +// #if ...pointers are 64 bits... +// # define AtomicWordCastType base::subtle::Atomic64 +// #else +// # define AtomicWordCastType Atomic32 +// #endif +// TODO(csilvers): figure out ARCH_PIII/ARCH_K8 (perhaps via ./configure?) +// ------------------------------------------------------------------------ + +#include "base/arm_instruction_set_select.h" +#define GCC_VERSION (__GNUC__ * 10000 \ + + __GNUC_MINOR__ * 100 \ + + __GNUC_PATCHLEVEL__) + +#if defined(TCMALLOC_PREFER_GCC_ATOMICS) && defined(__GNUC__) && GCC_VERSION >= 40700 +#include "base/atomicops-internals-gcc.h" +#elif defined(__MACH__) && defined(__APPLE__) +#include "base/atomicops-internals-macosx.h" +#elif defined(__GNUC__) && defined(ARMV6) +#include "base/atomicops-internals-arm-v6plus.h" +#elif defined(ARMV3) +#include "base/atomicops-internals-arm-generic.h" +#elif defined(__GNUC__) && (defined(__i386) || defined(__x86_64__)) +#include "base/atomicops-internals-x86.h" +#elif defined(_WIN32) +#include "base/atomicops-internals-windows.h" +#elif defined(__linux__) && defined(__PPC__) +#include "base/atomicops-internals-linuxppc.h" +#elif defined(__GNUC__) && defined(__mips__) +#include "base/atomicops-internals-mips.h" +#elif defined(__GNUC__) && GCC_VERSION >= 40700 +#include "base/atomicops-internals-gcc.h" +#else +#error You need to implement atomic operations for this architecture +#endif + +// Signed type that can hold a pointer and supports the atomic ops below, as +// well as atomic loads and stores. Instances must be naturally-aligned. +typedef intptr_t AtomicWord; + +#ifdef AtomicWordCastType +// ------------------------------------------------------------------------ +// This section is needed only when explicit type casting is required to +// cast AtomicWord to one of the basic atomic types (Atomic64 or Atomic32). +// It also serves to document the AtomicWord interface. +// ------------------------------------------------------------------------ + +namespace base { +namespace subtle { + +// Atomically execute: +// result = *ptr; +// if (*ptr == old_value) +// *ptr = new_value; +// return result; +// +// I.e., replace "*ptr" with "new_value" if "*ptr" used to be "old_value". +// Always return the old value of "*ptr" +// +// This routine implies no memory barriers. +inline AtomicWord NoBarrier_CompareAndSwap(volatile AtomicWord* ptr, + AtomicWord old_value, + AtomicWord new_value) { + return NoBarrier_CompareAndSwap( + reinterpret_cast<volatile AtomicWordCastType*>(ptr), + old_value, new_value); +} + +// Atomically store new_value into *ptr, returning the previous value held in +// *ptr. This routine implies no memory barriers. +inline AtomicWord NoBarrier_AtomicExchange(volatile AtomicWord* ptr, + AtomicWord new_value) { + return NoBarrier_AtomicExchange( + reinterpret_cast<volatile AtomicWordCastType*>(ptr), new_value); +} + +inline AtomicWord Acquire_AtomicExchange(volatile AtomicWord* ptr, + AtomicWord new_value) { + return Acquire_AtomicExchange( + reinterpret_cast<volatile AtomicWordCastType*>(ptr), new_value); +} + +inline AtomicWord Release_AtomicExchange(volatile AtomicWord* ptr, + AtomicWord new_value) { + return Release_AtomicExchange( + reinterpret_cast<volatile AtomicWordCastType*>(ptr), new_value); +} + +inline AtomicWord Acquire_CompareAndSwap(volatile AtomicWord* ptr, + AtomicWord old_value, + AtomicWord new_value) { + return base::subtle::Acquire_CompareAndSwap( + reinterpret_cast<volatile AtomicWordCastType*>(ptr), + old_value, new_value); +} + +inline AtomicWord Release_CompareAndSwap(volatile AtomicWord* ptr, + AtomicWord old_value, + AtomicWord new_value) { + return base::subtle::Release_CompareAndSwap( + reinterpret_cast<volatile AtomicWordCastType*>(ptr), + old_value, new_value); +} + +inline void NoBarrier_Store(volatile AtomicWord *ptr, AtomicWord value) { + NoBarrier_Store( + reinterpret_cast<volatile AtomicWordCastType*>(ptr), value); +} + +inline void Acquire_Store(volatile AtomicWord* ptr, AtomicWord value) { + return base::subtle::Acquire_Store( + reinterpret_cast<volatile AtomicWordCastType*>(ptr), value); +} + +inline void Release_Store(volatile AtomicWord* ptr, AtomicWord value) { + return base::subtle::Release_Store( + reinterpret_cast<volatile AtomicWordCastType*>(ptr), value); +} + +inline AtomicWord NoBarrier_Load(volatile const AtomicWord *ptr) { + return NoBarrier_Load( + reinterpret_cast<volatile const AtomicWordCastType*>(ptr)); +} + +inline AtomicWord Acquire_Load(volatile const AtomicWord* ptr) { + return base::subtle::Acquire_Load( + reinterpret_cast<volatile const AtomicWordCastType*>(ptr)); +} + +inline AtomicWord Release_Load(volatile const AtomicWord* ptr) { + return base::subtle::Release_Load( + reinterpret_cast<volatile const AtomicWordCastType*>(ptr)); +} + +} // namespace base::subtle +} // namespace base +#endif // AtomicWordCastType + +// ------------------------------------------------------------------------ +// Commented out type definitions and method declarations for documentation +// of the interface provided by this module. +// ------------------------------------------------------------------------ + +#if 0 + +// Signed 32-bit type that supports the atomic ops below, as well as atomic +// loads and stores. Instances must be naturally aligned. This type differs +// from AtomicWord in 64-bit binaries where AtomicWord is 64-bits. +typedef int32_t Atomic32; + +// Corresponding operations on Atomic32 +namespace base { +namespace subtle { + +// Signed 64-bit type that supports the atomic ops below, as well as atomic +// loads and stores. Instances must be naturally aligned. This type differs +// from AtomicWord in 32-bit binaries where AtomicWord is 32-bits. +typedef int64_t Atomic64; + +Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr, + Atomic32 old_value, + Atomic32 new_value); +Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr, Atomic32 new_value); +Atomic32 Acquire_AtomicExchange(volatile Atomic32* ptr, Atomic32 new_value); +Atomic32 Release_AtomicExchange(volatile Atomic32* ptr, Atomic32 new_value); +Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr, + Atomic32 old_value, + Atomic32 new_value); +Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr, + Atomic32 old_value, + Atomic32 new_value); +void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value); +void Acquire_Store(volatile Atomic32* ptr, Atomic32 value); +void Release_Store(volatile Atomic32* ptr, Atomic32 value); +Atomic32 NoBarrier_Load(volatile const Atomic32* ptr); +Atomic32 Acquire_Load(volatile const Atomic32* ptr); +Atomic32 Release_Load(volatile const Atomic32* ptr); + +// Corresponding operations on Atomic64 +Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr, + Atomic64 old_value, + Atomic64 new_value); +Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr, Atomic64 new_value); +Atomic64 Acquire_AtomicExchange(volatile Atomic64* ptr, Atomic64 new_value); +Atomic64 Release_AtomicExchange(volatile Atomic64* ptr, Atomic64 new_value); + +Atomic64 Acquire_CompareAndSwap(volatile Atomic64* ptr, + Atomic64 old_value, + Atomic64 new_value); +Atomic64 Release_CompareAndSwap(volatile Atomic64* ptr, + Atomic64 old_value, + Atomic64 new_value); +void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value); +void Acquire_Store(volatile Atomic64* ptr, Atomic64 value); +void Release_Store(volatile Atomic64* ptr, Atomic64 value); +Atomic64 NoBarrier_Load(volatile const Atomic64* ptr); +Atomic64 Acquire_Load(volatile const Atomic64* ptr); +Atomic64 Release_Load(volatile const Atomic64* ptr); +} // namespace base::subtle +} // namespace base + +void MemoryBarrier(); + +#endif // 0 + + +// ------------------------------------------------------------------------ +// The following are to be deprecated when all uses have been changed to +// use the base::subtle namespace. +// ------------------------------------------------------------------------ + +#ifdef AtomicWordCastType +// AtomicWord versions to be deprecated +inline AtomicWord Acquire_CompareAndSwap(volatile AtomicWord* ptr, + AtomicWord old_value, + AtomicWord new_value) { + return base::subtle::Acquire_CompareAndSwap(ptr, old_value, new_value); +} + +inline AtomicWord Release_CompareAndSwap(volatile AtomicWord* ptr, + AtomicWord old_value, + AtomicWord new_value) { + return base::subtle::Release_CompareAndSwap(ptr, old_value, new_value); +} + +inline void Acquire_Store(volatile AtomicWord* ptr, AtomicWord value) { + return base::subtle::Acquire_Store(ptr, value); +} + +inline void Release_Store(volatile AtomicWord* ptr, AtomicWord value) { + return base::subtle::Release_Store(ptr, value); +} + +inline AtomicWord Acquire_Load(volatile const AtomicWord* ptr) { + return base::subtle::Acquire_Load(ptr); +} + +inline AtomicWord Release_Load(volatile const AtomicWord* ptr) { + return base::subtle::Release_Load(ptr); +} +#endif // AtomicWordCastType + +// 32-bit Acquire/Release operations to be deprecated. + +inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr, + Atomic32 old_value, + Atomic32 new_value) { + return base::subtle::Acquire_CompareAndSwap(ptr, old_value, new_value); +} +inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr, + Atomic32 old_value, + Atomic32 new_value) { + return base::subtle::Release_CompareAndSwap(ptr, old_value, new_value); +} +inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) { + base::subtle::Acquire_Store(ptr, value); +} +inline void Release_Store(volatile Atomic32* ptr, Atomic32 value) { + return base::subtle::Release_Store(ptr, value); +} +inline Atomic32 Acquire_Load(volatile const Atomic32* ptr) { + return base::subtle::Acquire_Load(ptr); +} +inline Atomic32 Release_Load(volatile const Atomic32* ptr) { + return base::subtle::Release_Load(ptr); +} + +#ifdef BASE_HAS_ATOMIC64 + +// 64-bit Acquire/Release operations to be deprecated. + +inline base::subtle::Atomic64 Acquire_CompareAndSwap( + volatile base::subtle::Atomic64* ptr, + base::subtle::Atomic64 old_value, base::subtle::Atomic64 new_value) { + return base::subtle::Acquire_CompareAndSwap(ptr, old_value, new_value); +} +inline base::subtle::Atomic64 Release_CompareAndSwap( + volatile base::subtle::Atomic64* ptr, + base::subtle::Atomic64 old_value, base::subtle::Atomic64 new_value) { + return base::subtle::Release_CompareAndSwap(ptr, old_value, new_value); +} +inline void Acquire_Store( + volatile base::subtle::Atomic64* ptr, base::subtle::Atomic64 value) { + base::subtle::Acquire_Store(ptr, value); +} +inline void Release_Store( + volatile base::subtle::Atomic64* ptr, base::subtle::Atomic64 value) { + return base::subtle::Release_Store(ptr, value); +} +inline base::subtle::Atomic64 Acquire_Load( + volatile const base::subtle::Atomic64* ptr) { + return base::subtle::Acquire_Load(ptr); +} +inline base::subtle::Atomic64 Release_Load( + volatile const base::subtle::Atomic64* ptr) { + return base::subtle::Release_Load(ptr); +} + +#endif // BASE_HAS_ATOMIC64 + +#endif // THREAD_ATOMICOPS_H_ diff --git a/src/third_party/gperftools-2.5/src/base/basictypes.h b/src/third_party/gperftools-2.5/src/base/basictypes.h new file mode 100644 index 00000000000..f0e25daf8f1 --- /dev/null +++ b/src/third_party/gperftools-2.5/src/base/basictypes.h @@ -0,0 +1,392 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef _BASICTYPES_H_ +#define _BASICTYPES_H_ + +#include <config.h> +#include <string.h> // for memcpy() +#ifdef HAVE_INTTYPES_H +#include <inttypes.h> // gets us PRId64, etc +#endif + +// To use this in an autoconf setting, make sure you run the following +// autoconf macros: +// AC_HEADER_STDC /* for stdint_h and inttypes_h */ +// AC_CHECK_TYPES([__int64]) /* defined in some windows platforms */ + +#ifdef HAVE_INTTYPES_H +#include <inttypes.h> // uint16_t might be here; PRId64 too. +#endif +#ifdef HAVE_STDINT_H +#include <stdint.h> // to get uint16_t (ISO naming madness) +#endif +#include <sys/types.h> // our last best hope for uint16_t + +// Standard typedefs +// All Google code is compiled with -funsigned-char to make "char" +// unsigned. Google code therefore doesn't need a "uchar" type. +// TODO(csilvers): how do we make sure unsigned-char works on non-gcc systems? +typedef signed char schar; +typedef int8_t int8; +typedef int16_t int16; +typedef int32_t int32; +typedef int64_t int64; + +// NOTE: unsigned types are DANGEROUS in loops and other arithmetical +// places. Use the signed types unless your variable represents a bit +// pattern (eg a hash value) or you really need the extra bit. Do NOT +// use 'unsigned' to express "this value should always be positive"; +// use assertions for this. + +typedef uint8_t uint8; +typedef uint16_t uint16; +typedef uint32_t uint32; +typedef uint64_t uint64; + +const uint16 kuint16max = ( (uint16) 0xFFFF); +const uint32 kuint32max = ( (uint32) 0xFFFFFFFF); +const uint64 kuint64max = ( (((uint64) kuint32max) << 32) | kuint32max ); + +const int8 kint8max = ( ( int8) 0x7F); +const int16 kint16max = ( ( int16) 0x7FFF); +const int32 kint32max = ( ( int32) 0x7FFFFFFF); +const int64 kint64max = ( ((( int64) kint32max) << 32) | kuint32max ); + +const int8 kint8min = ( ( int8) 0x80); +const int16 kint16min = ( ( int16) 0x8000); +const int32 kint32min = ( ( int32) 0x80000000); +const int64 kint64min = ( (((uint64) kint32min) << 32) | 0 ); + +// Define the "portable" printf and scanf macros, if they're not +// already there (via the inttypes.h we #included above, hopefully). +// Mostly it's old systems that don't support inttypes.h, so we assume +// they're 32 bit. +#ifndef PRIx64 +#define PRIx64 "llx" +#endif +#ifndef SCNx64 +#define SCNx64 "llx" +#endif +#ifndef PRId64 +#define PRId64 "lld" +#endif +#ifndef SCNd64 +#define SCNd64 "lld" +#endif +#ifndef PRIu64 +#define PRIu64 "llu" +#endif +#ifndef PRIxPTR +#define PRIxPTR "lx" +#endif + +// Also allow for printing of a pthread_t. +#define GPRIuPTHREAD "lu" +#define GPRIxPTHREAD "lx" +#if defined(__CYGWIN__) || defined(__CYGWIN32__) || defined(__APPLE__) || defined(__FreeBSD__) +#define PRINTABLE_PTHREAD(pthreadt) reinterpret_cast<uintptr_t>(pthreadt) +#else +#define PRINTABLE_PTHREAD(pthreadt) pthreadt +#endif + +// A macro to disallow the evil copy constructor and operator= functions +// This should be used in the private: declarations for a class +#define DISALLOW_EVIL_CONSTRUCTORS(TypeName) \ + TypeName(const TypeName&); \ + void operator=(const TypeName&) + +// An alternate name that leaves out the moral judgment... :-) +#define DISALLOW_COPY_AND_ASSIGN(TypeName) DISALLOW_EVIL_CONSTRUCTORS(TypeName) + +// The COMPILE_ASSERT macro can be used to verify that a compile time +// expression is true. For example, you could use it to verify the +// size of a static array: +// +// COMPILE_ASSERT(sizeof(num_content_type_names) == sizeof(int), +// content_type_names_incorrect_size); +// +// or to make sure a struct is smaller than a certain size: +// +// COMPILE_ASSERT(sizeof(foo) < 128, foo_too_large); +// +// The second argument to the macro is the name of the variable. If +// the expression is false, most compilers will issue a warning/error +// containing the name of the variable. +// +// Implementation details of COMPILE_ASSERT: +// +// - COMPILE_ASSERT works by defining an array type that has -1 +// elements (and thus is invalid) when the expression is false. +// +// - The simpler definition +// +// #define COMPILE_ASSERT(expr, msg) typedef char msg[(expr) ? 1 : -1] +// +// does not work, as gcc supports variable-length arrays whose sizes +// are determined at run-time (this is gcc's extension and not part +// of the C++ standard). As a result, gcc fails to reject the +// following code with the simple definition: +// +// int foo; +// COMPILE_ASSERT(foo, msg); // not supposed to compile as foo is +// // not a compile-time constant. +// +// - By using the type CompileAssert<(bool(expr))>, we ensures that +// expr is a compile-time constant. (Template arguments must be +// determined at compile-time.) +// +// - The outter parentheses in CompileAssert<(bool(expr))> are necessary +// to work around a bug in gcc 3.4.4 and 4.0.1. If we had written +// +// CompileAssert<bool(expr)> +// +// instead, these compilers will refuse to compile +// +// COMPILE_ASSERT(5 > 0, some_message); +// +// (They seem to think the ">" in "5 > 0" marks the end of the +// template argument list.) +// +// - The array size is (bool(expr) ? 1 : -1), instead of simply +// +// ((expr) ? 1 : -1). +// +// This is to avoid running into a bug in MS VC 7.1, which +// causes ((0.0) ? 1 : -1) to incorrectly evaluate to 1. + +template <bool> +struct CompileAssert { +}; + +#ifdef HAVE___ATTRIBUTE__ +# define ATTRIBUTE_UNUSED __attribute__((unused)) +#else +# define ATTRIBUTE_UNUSED +#endif + +#define COMPILE_ASSERT(expr, msg) \ + typedef CompileAssert<(bool(expr))> msg[bool(expr) ? 1 : -1] ATTRIBUTE_UNUSED + +#define arraysize(a) (sizeof(a) / sizeof(*(a))) + +#define OFFSETOF_MEMBER(strct, field) \ + (reinterpret_cast<char*>(&reinterpret_cast<strct*>(16)->field) - \ + reinterpret_cast<char*>(16)) + +// bit_cast<Dest,Source> implements the equivalent of +// "*reinterpret_cast<Dest*>(&source)". +// +// The reinterpret_cast method would produce undefined behavior +// according to ISO C++ specification section 3.10 -15 -. +// bit_cast<> calls memcpy() which is blessed by the standard, +// especially by the example in section 3.9. +// +// Fortunately memcpy() is very fast. In optimized mode, with a +// constant size, gcc 2.95.3, gcc 4.0.1, and msvc 7.1 produce inline +// code with the minimal amount of data movement. On a 32-bit system, +// memcpy(d,s,4) compiles to one load and one store, and memcpy(d,s,8) +// compiles to two loads and two stores. + +template <class Dest, class Source> +inline Dest bit_cast(const Source& source) { + COMPILE_ASSERT(sizeof(Dest) == sizeof(Source), bitcasting_unequal_sizes); + Dest dest; + memcpy(&dest, &source, sizeof(dest)); + return dest; +} + +#ifdef HAVE___ATTRIBUTE__ +# define ATTRIBUTE_WEAK __attribute__((weak)) +# define ATTRIBUTE_NOINLINE __attribute__((noinline)) +#else +# define ATTRIBUTE_WEAK +# define ATTRIBUTE_NOINLINE +#endif + +#if defined(HAVE___ATTRIBUTE__) && defined(__ELF__) +# define ATTRIBUTE_VISIBILITY_HIDDEN __attribute__((visibility("hidden"))) +#else +# define ATTRIBUTE_VISIBILITY_HIDDEN +#endif + +// Section attributes are supported for both ELF and Mach-O, but in +// very different ways. Here's the API we provide: +// 1) ATTRIBUTE_SECTION: put this with the declaration of all functions +// you want to be in the same linker section +// 2) DEFINE_ATTRIBUTE_SECTION_VARS: must be called once per unique +// name. You want to make sure this is executed before any +// DECLARE_ATTRIBUTE_SECTION_VARS; the easiest way is to put them +// in the same .cc file. Put this call at the global level. +// 3) INIT_ATTRIBUTE_SECTION_VARS: you can scatter calls to this in +// multiple places to help ensure execution before any +// DECLARE_ATTRIBUTE_SECTION_VARS. You must have at least one +// DEFINE, but you can have many INITs. Put each in its own scope. +// 4) DECLARE_ATTRIBUTE_SECTION_VARS: must be called before using +// ATTRIBUTE_SECTION_START or ATTRIBUTE_SECTION_STOP on a name. +// Put this call at the global level. +// 5) ATTRIBUTE_SECTION_START/ATTRIBUTE_SECTION_STOP: call this to say +// where in memory a given section is. All functions declared with +// ATTRIBUTE_SECTION are guaranteed to be between START and STOP. + +#if defined(HAVE___ATTRIBUTE__) && defined(__ELF__) +# define ATTRIBUTE_SECTION(name) __attribute__ ((section (#name))) + + // Weak section declaration to be used as a global declaration + // for ATTRIBUTE_SECTION_START|STOP(name) to compile and link + // even without functions with ATTRIBUTE_SECTION(name). +# define DECLARE_ATTRIBUTE_SECTION_VARS(name) \ + extern char __start_##name[] ATTRIBUTE_WEAK; \ + extern char __stop_##name[] ATTRIBUTE_WEAK +# define INIT_ATTRIBUTE_SECTION_VARS(name) // no-op for ELF +# define DEFINE_ATTRIBUTE_SECTION_VARS(name) // no-op for ELF + + // Return void* pointers to start/end of a section of code with functions + // having ATTRIBUTE_SECTION(name), or 0 if no such function exists. + // One must DECLARE_ATTRIBUTE_SECTION(name) for this to compile and link. +# define ATTRIBUTE_SECTION_START(name) (reinterpret_cast<void*>(__start_##name)) +# define ATTRIBUTE_SECTION_STOP(name) (reinterpret_cast<void*>(__stop_##name)) +# define HAVE_ATTRIBUTE_SECTION_START 1 + +#elif defined(HAVE___ATTRIBUTE__) && defined(__MACH__) +# define ATTRIBUTE_SECTION(name) __attribute__ ((section ("__TEXT, " #name))) + +#include <mach-o/getsect.h> +#include <mach-o/dyld.h> +class AssignAttributeStartEnd { + public: + AssignAttributeStartEnd(const char* name, char** pstart, char** pend) { + // Find out what dynamic library name is defined in + if (_dyld_present()) { + for (int i = _dyld_image_count() - 1; i >= 0; --i) { + const mach_header* hdr = _dyld_get_image_header(i); +#ifdef MH_MAGIC_64 + if (hdr->magic == MH_MAGIC_64) { + uint64_t len; + *pstart = getsectdatafromheader_64((mach_header_64*)hdr, + "__TEXT", name, &len); + if (*pstart) { // NULL if not defined in this dynamic library + *pstart += _dyld_get_image_vmaddr_slide(i); // correct for reloc + *pend = *pstart + len; + return; + } + } +#endif + if (hdr->magic == MH_MAGIC) { + uint32_t len; + *pstart = getsectdatafromheader(hdr, "__TEXT", name, &len); + if (*pstart) { // NULL if not defined in this dynamic library + *pstart += _dyld_get_image_vmaddr_slide(i); // correct for reloc + *pend = *pstart + len; + return; + } + } + } + } + // If we get here, not defined in a dll at all. See if defined statically. + unsigned long len; // don't ask me why this type isn't uint32_t too... + *pstart = getsectdata("__TEXT", name, &len); + *pend = *pstart + len; + } +}; + +#define DECLARE_ATTRIBUTE_SECTION_VARS(name) \ + extern char* __start_##name; \ + extern char* __stop_##name + +#define INIT_ATTRIBUTE_SECTION_VARS(name) \ + DECLARE_ATTRIBUTE_SECTION_VARS(name); \ + static const AssignAttributeStartEnd __assign_##name( \ + #name, &__start_##name, &__stop_##name) + +#define DEFINE_ATTRIBUTE_SECTION_VARS(name) \ + char* __start_##name, *__stop_##name; \ + INIT_ATTRIBUTE_SECTION_VARS(name) + +# define ATTRIBUTE_SECTION_START(name) (reinterpret_cast<void*>(__start_##name)) +# define ATTRIBUTE_SECTION_STOP(name) (reinterpret_cast<void*>(__stop_##name)) +# define HAVE_ATTRIBUTE_SECTION_START 1 + +#else // not HAVE___ATTRIBUTE__ && __ELF__, nor HAVE___ATTRIBUTE__ && __MACH__ +# define ATTRIBUTE_SECTION(name) +# define DECLARE_ATTRIBUTE_SECTION_VARS(name) +# define INIT_ATTRIBUTE_SECTION_VARS(name) +# define DEFINE_ATTRIBUTE_SECTION_VARS(name) +# define ATTRIBUTE_SECTION_START(name) (reinterpret_cast<void*>(0)) +# define ATTRIBUTE_SECTION_STOP(name) (reinterpret_cast<void*>(0)) + +#endif // HAVE___ATTRIBUTE__ and __ELF__ or __MACH__ + +#if defined(HAVE___ATTRIBUTE__) +# if (defined(__i386__) || defined(__x86_64__)) +# define CACHELINE_ALIGNED __attribute__((aligned(64))) +# elif (defined(__PPC__) || defined(__PPC64__)) +# define CACHELINE_ALIGNED __attribute__((aligned(16))) +# elif (defined(__arm__)) +# define CACHELINE_ALIGNED __attribute__((aligned(64))) + // some ARMs have shorter cache lines (ARM1176JZF-S is 32 bytes for example) but obviously 64-byte aligned implies 32-byte aligned +# elif (defined(__mips__)) +# define CACHELINE_ALIGNED __attribute__((aligned(128))) +# elif (defined(__aarch64__)) +# define CACHELINE_ALIGNED __attribute__((aligned(64))) + // implementation specific, Cortex-A53 and 57 should have 64 bytes +# elif (defined(__s390x__)) +# define CACHELINE_ALIGNED __attribute__((aligned(256))) +# else +# error Could not determine cache line length - unknown architecture +# endif +#else +# define CACHELINE_ALIGNED +#endif // defined(HAVE___ATTRIBUTE__) && (__i386__ || __x86_64__) + +// Structure for discovering alignment +union MemoryAligner { + void* p; + double d; + size_t s; +} CACHELINE_ALIGNED; + +// The following enum should be used only as a constructor argument to indicate +// that the variable has static storage class, and that the constructor should +// do nothing to its state. It indicates to the reader that it is legal to +// declare a static nistance of the class, provided the constructor is given +// the base::LINKER_INITIALIZED argument. Normally, it is unsafe to declare a +// static variable that has a constructor or a destructor because invocation +// order is undefined. However, IF the type can be initialized by filling with +// zeroes (which the loader does for static variables), AND the destructor also +// does nothing to the storage, then a constructor declared as +// explicit MyClass(base::LinkerInitialized x) {} +// and invoked as +// static MyClass my_variable_name(base::LINKER_INITIALIZED); +namespace base { +enum LinkerInitialized { LINKER_INITIALIZED }; +} + +#endif // _BASICTYPES_H_ diff --git a/src/third_party/gperftools-2.5/src/base/commandlineflags.h b/src/third_party/gperftools-2.5/src/base/commandlineflags.h new file mode 100644 index 00000000000..f54776aba5b --- /dev/null +++ b/src/third_party/gperftools-2.5/src/base/commandlineflags.h @@ -0,0 +1,166 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// This file is a compatibility layer that defines Google's version of +// command line flags that are used for configuration. +// +// We put flags into their own namespace. It is purposefully +// named in an opaque way that people should have trouble typing +// directly. The idea is that DEFINE puts the flag in the weird +// namespace, and DECLARE imports the flag from there into the +// current namespace. The net result is to force people to use +// DECLARE to get access to a flag, rather than saying +// extern bool FLAGS_logtostderr; +// or some such instead. We want this so we can put extra +// functionality (like sanity-checking) in DECLARE if we want, +// and make sure it is picked up everywhere. +// +// We also put the type of the variable in the namespace, so that +// people can't DECLARE_int32 something that they DEFINE_bool'd +// elsewhere. +#ifndef BASE_COMMANDLINEFLAGS_H_ +#define BASE_COMMANDLINEFLAGS_H_ + +#include <config.h> +#include <string> +#include <string.h> // for memchr +#include <stdlib.h> // for getenv +#include "base/basictypes.h" + +#define DECLARE_VARIABLE(type, name) \ + namespace FLAG__namespace_do_not_use_directly_use_DECLARE_##type##_instead { \ + extern PERFTOOLS_DLL_DECL type FLAGS_##name; \ + } \ + using FLAG__namespace_do_not_use_directly_use_DECLARE_##type##_instead::FLAGS_##name + +#define DEFINE_VARIABLE(type, name, value, meaning) \ + namespace FLAG__namespace_do_not_use_directly_use_DECLARE_##type##_instead { \ + PERFTOOLS_DLL_DECL type FLAGS_##name(value); \ + char FLAGS_no##name; \ + } \ + using FLAG__namespace_do_not_use_directly_use_DECLARE_##type##_instead::FLAGS_##name + +// bool specialization +#define DECLARE_bool(name) \ + DECLARE_VARIABLE(bool, name) +#define DEFINE_bool(name, value, meaning) \ + DEFINE_VARIABLE(bool, name, value, meaning) + +// int32 specialization +#define DECLARE_int32(name) \ + DECLARE_VARIABLE(int32, name) +#define DEFINE_int32(name, value, meaning) \ + DEFINE_VARIABLE(int32, name, value, meaning) + +// int64 specialization +#define DECLARE_int64(name) \ + DECLARE_VARIABLE(int64, name) +#define DEFINE_int64(name, value, meaning) \ + DEFINE_VARIABLE(int64, name, value, meaning) + +#define DECLARE_uint64(name) \ + DECLARE_VARIABLE(uint64, name) +#define DEFINE_uint64(name, value, meaning) \ + DEFINE_VARIABLE(uint64, name, value, meaning) + +// double specialization +#define DECLARE_double(name) \ + DECLARE_VARIABLE(double, name) +#define DEFINE_double(name, value, meaning) \ + DEFINE_VARIABLE(double, name, value, meaning) + +// Special case for string, because we have to specify the namespace +// std::string, which doesn't play nicely with our FLAG__namespace hackery. +#define DECLARE_string(name) \ + namespace FLAG__namespace_do_not_use_directly_use_DECLARE_string_instead { \ + extern std::string FLAGS_##name; \ + } \ + using FLAG__namespace_do_not_use_directly_use_DECLARE_string_instead::FLAGS_##name +#define DEFINE_string(name, value, meaning) \ + namespace FLAG__namespace_do_not_use_directly_use_DECLARE_string_instead { \ + std::string FLAGS_##name(value); \ + char FLAGS_no##name; \ + } \ + using FLAG__namespace_do_not_use_directly_use_DECLARE_string_instead::FLAGS_##name + +// implemented in sysinfo.cc +namespace tcmalloc { + namespace commandlineflags { + + inline bool StringToBool(const char *value, bool def) { + if (!value) { + return def; + } + return memchr("tTyY1\0", value[0], 6) != NULL; + } + + inline int StringToInt(const char *value, int def) { + if (!value) { + return def; + } + return strtol(value, NULL, 10); + } + + inline long long StringToLongLong(const char *value, long long def) { + if (!value) { + return def; + } + return strtoll(value, NULL, 10); + } + + inline double StringToDouble(const char *value, double def) { + if (!value) { + return def; + } + return strtod(value, NULL); + } + } +} + +// These macros (could be functions, but I don't want to bother with a .cc +// file), make it easier to initialize flags from the environment. + +#define EnvToString(envname, dflt) \ + (!getenv(envname) ? (dflt) : getenv(envname)) + +#define EnvToBool(envname, dflt) \ + tcmalloc::commandlineflags::StringToBool(getenv(envname), dflt) + +#define EnvToInt(envname, dflt) \ + tcmalloc::commandlineflags::StringToInt(getenv(envname), dflt) + +#define EnvToInt64(envname, dflt) \ + tcmalloc::commandlineflags::StringToLongLong(getenv(envname), dflt) + +#define EnvToDouble(envname, dflt) \ + tcmalloc::commandlineflags::StringToDouble(getenv(envname), dflt) + +#endif // BASE_COMMANDLINEFLAGS_H_ diff --git a/src/third_party/gperftools-2.5/src/base/dynamic_annotations.c b/src/third_party/gperftools-2.5/src/base/dynamic_annotations.c new file mode 100644 index 00000000000..87bd2ecde97 --- /dev/null +++ b/src/third_party/gperftools-2.5/src/base/dynamic_annotations.c @@ -0,0 +1,179 @@ +/* Copyright (c) 2008-2009, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: Kostya Serebryany + */ + +#ifdef __cplusplus +# error "This file should be built as pure C to avoid name mangling" +#endif + +#include "config.h" +#include <stdlib.h> +#include <string.h> + +#include "base/dynamic_annotations.h" +#include "getenv_safe.h" // for TCMallocGetenvSafe + +#ifdef __GNUC__ +/* valgrind.h uses gcc extensions so it won't build with other compilers */ +# ifdef HAVE_VALGRIND_H /* prefer the user's copy if they have it */ +# include <valgrind.h> +# else /* otherwise just use the copy that we have */ +# include "third_party/valgrind.h" +# endif +#endif + +/* Compiler-based ThreadSanitizer defines + DYNAMIC_ANNOTATIONS_EXTERNAL_IMPL = 1 + and provides its own definitions of the functions. */ + +#ifndef DYNAMIC_ANNOTATIONS_EXTERNAL_IMPL +# define DYNAMIC_ANNOTATIONS_EXTERNAL_IMPL 0 +#endif + +/* Each function is empty and called (via a macro) only in debug mode. + The arguments are captured by dynamic tools at runtime. */ + +#if DYNAMIC_ANNOTATIONS_ENABLED == 1 \ + && DYNAMIC_ANNOTATIONS_EXTERNAL_IMPL == 0 + +void AnnotateRWLockCreate(const char *file, int line, + const volatile void *lock){} +void AnnotateRWLockDestroy(const char *file, int line, + const volatile void *lock){} +void AnnotateRWLockAcquired(const char *file, int line, + const volatile void *lock, long is_w){} +void AnnotateRWLockReleased(const char *file, int line, + const volatile void *lock, long is_w){} +void AnnotateBarrierInit(const char *file, int line, + const volatile void *barrier, long count, + long reinitialization_allowed) {} +void AnnotateBarrierWaitBefore(const char *file, int line, + const volatile void *barrier) {} +void AnnotateBarrierWaitAfter(const char *file, int line, + const volatile void *barrier) {} +void AnnotateBarrierDestroy(const char *file, int line, + const volatile void *barrier) {} + +void AnnotateCondVarWait(const char *file, int line, + const volatile void *cv, + const volatile void *lock){} +void AnnotateCondVarSignal(const char *file, int line, + const volatile void *cv){} +void AnnotateCondVarSignalAll(const char *file, int line, + const volatile void *cv){} +void AnnotatePublishMemoryRange(const char *file, int line, + const volatile void *address, + long size){} +void AnnotateUnpublishMemoryRange(const char *file, int line, + const volatile void *address, + long size){} +void AnnotatePCQCreate(const char *file, int line, + const volatile void *pcq){} +void AnnotatePCQDestroy(const char *file, int line, + const volatile void *pcq){} +void AnnotatePCQPut(const char *file, int line, + const volatile void *pcq){} +void AnnotatePCQGet(const char *file, int line, + const volatile void *pcq){} +void AnnotateNewMemory(const char *file, int line, + const volatile void *mem, + long size){} +void AnnotateExpectRace(const char *file, int line, + const volatile void *mem, + const char *description){} +void AnnotateBenignRace(const char *file, int line, + const volatile void *mem, + const char *description){} +void AnnotateBenignRaceSized(const char *file, int line, + const volatile void *mem, + long size, + const char *description) {} +void AnnotateMutexIsUsedAsCondVar(const char *file, int line, + const volatile void *mu){} +void AnnotateTraceMemory(const char *file, int line, + const volatile void *arg){} +void AnnotateThreadName(const char *file, int line, + const char *name){} +void AnnotateIgnoreReadsBegin(const char *file, int line){} +void AnnotateIgnoreReadsEnd(const char *file, int line){} +void AnnotateIgnoreWritesBegin(const char *file, int line){} +void AnnotateIgnoreWritesEnd(const char *file, int line){} +void AnnotateEnableRaceDetection(const char *file, int line, int enable){} +void AnnotateNoOp(const char *file, int line, + const volatile void *arg){} +void AnnotateFlushState(const char *file, int line){} + +#endif /* DYNAMIC_ANNOTATIONS_ENABLED == 1 + && DYNAMIC_ANNOTATIONS_EXTERNAL_IMPL == 0 */ + +#if DYNAMIC_ANNOTATIONS_EXTERNAL_IMPL == 0 + +static int GetRunningOnValgrind(void) { +#ifdef RUNNING_ON_VALGRIND + if (RUNNING_ON_VALGRIND) return 1; +#endif + const char *running_on_valgrind_str = TCMallocGetenvSafe("RUNNING_ON_VALGRIND"); + if (running_on_valgrind_str) { + return strcmp(running_on_valgrind_str, "0") != 0; + } + return 0; +} + +/* See the comments in dynamic_annotations.h */ +int RunningOnValgrind(void) { + static volatile int running_on_valgrind = -1; + int local_running_on_valgrind = running_on_valgrind; + /* C doesn't have thread-safe initialization of statics, and we + don't want to depend on pthread_once here, so hack it. */ + ANNOTATE_BENIGN_RACE(&running_on_valgrind, "safe hack"); + if (local_running_on_valgrind == -1) + running_on_valgrind = local_running_on_valgrind = GetRunningOnValgrind(); + return local_running_on_valgrind; +} + +#endif /* DYNAMIC_ANNOTATIONS_EXTERNAL_IMPL == 0 */ + +/* See the comments in dynamic_annotations.h */ +double ValgrindSlowdown(void) { + /* Same initialization hack as in RunningOnValgrind(). */ + static volatile double slowdown = 0.0; + double local_slowdown = slowdown; + ANNOTATE_BENIGN_RACE(&slowdown, "safe hack"); + if (RunningOnValgrind() == 0) { + return 1.0; + } + if (local_slowdown == 0.0) { + char *env = getenv("VALGRIND_SLOWDOWN"); + slowdown = local_slowdown = env ? atof(env) : 50.0; + } + return local_slowdown; +} diff --git a/src/third_party/gperftools-2.5/src/base/dynamic_annotations.h b/src/third_party/gperftools-2.5/src/base/dynamic_annotations.h new file mode 100644 index 00000000000..4669315ced3 --- /dev/null +++ b/src/third_party/gperftools-2.5/src/base/dynamic_annotations.h @@ -0,0 +1,627 @@ +/* Copyright (c) 2008, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: Kostya Serebryany + */ + +/* This file defines dynamic annotations for use with dynamic analysis + tool such as valgrind, PIN, etc. + + Dynamic annotation is a source code annotation that affects + the generated code (that is, the annotation is not a comment). + Each such annotation is attached to a particular + instruction and/or to a particular object (address) in the program. + + The annotations that should be used by users are macros in all upper-case + (e.g., ANNOTATE_NEW_MEMORY). + + Actual implementation of these macros may differ depending on the + dynamic analysis tool being used. + + See http://code.google.com/p/data-race-test/ for more information. + + This file supports the following dynamic analysis tools: + - None (DYNAMIC_ANNOTATIONS_ENABLED is not defined or zero). + Macros are defined empty. + - ThreadSanitizer, Helgrind, DRD (DYNAMIC_ANNOTATIONS_ENABLED is 1). + Macros are defined as calls to non-inlinable empty functions + that are intercepted by Valgrind. */ + +#ifndef BASE_DYNAMIC_ANNOTATIONS_H_ +#define BASE_DYNAMIC_ANNOTATIONS_H_ + +#ifndef DYNAMIC_ANNOTATIONS_ENABLED +# define DYNAMIC_ANNOTATIONS_ENABLED 0 +#endif + +#if DYNAMIC_ANNOTATIONS_ENABLED != 0 + + /* ------------------------------------------------------------- + Annotations useful when implementing condition variables such as CondVar, + using conditional critical sections (Await/LockWhen) and when constructing + user-defined synchronization mechanisms. + + The annotations ANNOTATE_HAPPENS_BEFORE() and ANNOTATE_HAPPENS_AFTER() can + be used to define happens-before arcs in user-defined synchronization + mechanisms: the race detector will infer an arc from the former to the + latter when they share the same argument pointer. + + Example 1 (reference counting): + + void Unref() { + ANNOTATE_HAPPENS_BEFORE(&refcount_); + if (AtomicDecrementByOne(&refcount_) == 0) { + ANNOTATE_HAPPENS_AFTER(&refcount_); + delete this; + } + } + + Example 2 (message queue): + + void MyQueue::Put(Type *e) { + MutexLock lock(&mu_); + ANNOTATE_HAPPENS_BEFORE(e); + PutElementIntoMyQueue(e); + } + + Type *MyQueue::Get() { + MutexLock lock(&mu_); + Type *e = GetElementFromMyQueue(); + ANNOTATE_HAPPENS_AFTER(e); + return e; + } + + Note: when possible, please use the existing reference counting and message + queue implementations instead of inventing new ones. */ + + /* Report that wait on the condition variable at address "cv" has succeeded + and the lock at address "lock" is held. */ + #define ANNOTATE_CONDVAR_LOCK_WAIT(cv, lock) \ + AnnotateCondVarWait(__FILE__, __LINE__, cv, lock) + + /* Report that wait on the condition variable at "cv" has succeeded. Variant + w/o lock. */ + #define ANNOTATE_CONDVAR_WAIT(cv) \ + AnnotateCondVarWait(__FILE__, __LINE__, cv, NULL) + + /* Report that we are about to signal on the condition variable at address + "cv". */ + #define ANNOTATE_CONDVAR_SIGNAL(cv) \ + AnnotateCondVarSignal(__FILE__, __LINE__, cv) + + /* Report that we are about to signal_all on the condition variable at "cv". */ + #define ANNOTATE_CONDVAR_SIGNAL_ALL(cv) \ + AnnotateCondVarSignalAll(__FILE__, __LINE__, cv) + + /* Annotations for user-defined synchronization mechanisms. */ + #define ANNOTATE_HAPPENS_BEFORE(obj) ANNOTATE_CONDVAR_SIGNAL(obj) + #define ANNOTATE_HAPPENS_AFTER(obj) ANNOTATE_CONDVAR_WAIT(obj) + + /* Report that the bytes in the range [pointer, pointer+size) are about + to be published safely. The race checker will create a happens-before + arc from the call ANNOTATE_PUBLISH_MEMORY_RANGE(pointer, size) to + subsequent accesses to this memory. + Note: this annotation may not work properly if the race detector uses + sampling, i.e. does not observe all memory accesses. + */ + #define ANNOTATE_PUBLISH_MEMORY_RANGE(pointer, size) \ + AnnotatePublishMemoryRange(__FILE__, __LINE__, pointer, size) + + /* DEPRECATED. Don't use it. */ + #define ANNOTATE_UNPUBLISH_MEMORY_RANGE(pointer, size) \ + AnnotateUnpublishMemoryRange(__FILE__, __LINE__, pointer, size) + + /* DEPRECATED. Don't use it. */ + #define ANNOTATE_SWAP_MEMORY_RANGE(pointer, size) \ + do { \ + ANNOTATE_UNPUBLISH_MEMORY_RANGE(pointer, size); \ + ANNOTATE_PUBLISH_MEMORY_RANGE(pointer, size); \ + } while (0) + + /* Instruct the tool to create a happens-before arc between mu->Unlock() and + mu->Lock(). This annotation may slow down the race detector and hide real + races. Normally it is used only when it would be difficult to annotate each + of the mutex's critical sections individually using the annotations above. + This annotation makes sense only for hybrid race detectors. For pure + happens-before detectors this is a no-op. For more details see + http://code.google.com/p/data-race-test/wiki/PureHappensBeforeVsHybrid . */ + #define ANNOTATE_PURE_HAPPENS_BEFORE_MUTEX(mu) \ + AnnotateMutexIsUsedAsCondVar(__FILE__, __LINE__, mu) + + /* Deprecated. Use ANNOTATE_PURE_HAPPENS_BEFORE_MUTEX. */ + #define ANNOTATE_MUTEX_IS_USED_AS_CONDVAR(mu) \ + AnnotateMutexIsUsedAsCondVar(__FILE__, __LINE__, mu) + + /* ------------------------------------------------------------- + Annotations useful when defining memory allocators, or when memory that + was protected in one way starts to be protected in another. */ + + /* Report that a new memory at "address" of size "size" has been allocated. + This might be used when the memory has been retrieved from a free list and + is about to be reused, or when a the locking discipline for a variable + changes. */ + #define ANNOTATE_NEW_MEMORY(address, size) \ + AnnotateNewMemory(__FILE__, __LINE__, address, size) + + /* ------------------------------------------------------------- + Annotations useful when defining FIFO queues that transfer data between + threads. */ + + /* Report that the producer-consumer queue (such as ProducerConsumerQueue) at + address "pcq" has been created. The ANNOTATE_PCQ_* annotations + should be used only for FIFO queues. For non-FIFO queues use + ANNOTATE_HAPPENS_BEFORE (for put) and ANNOTATE_HAPPENS_AFTER (for get). */ + #define ANNOTATE_PCQ_CREATE(pcq) \ + AnnotatePCQCreate(__FILE__, __LINE__, pcq) + + /* Report that the queue at address "pcq" is about to be destroyed. */ + #define ANNOTATE_PCQ_DESTROY(pcq) \ + AnnotatePCQDestroy(__FILE__, __LINE__, pcq) + + /* Report that we are about to put an element into a FIFO queue at address + "pcq". */ + #define ANNOTATE_PCQ_PUT(pcq) \ + AnnotatePCQPut(__FILE__, __LINE__, pcq) + + /* Report that we've just got an element from a FIFO queue at address "pcq". */ + #define ANNOTATE_PCQ_GET(pcq) \ + AnnotatePCQGet(__FILE__, __LINE__, pcq) + + /* ------------------------------------------------------------- + Annotations that suppress errors. It is usually better to express the + program's synchronization using the other annotations, but these can + be used when all else fails. */ + + /* Report that we may have a benign race at "pointer", with size + "sizeof(*(pointer))". "pointer" must be a non-void* pointer. Insert at the + point where "pointer" has been allocated, preferably close to the point + where the race happens. See also ANNOTATE_BENIGN_RACE_STATIC. */ + #define ANNOTATE_BENIGN_RACE(pointer, description) \ + AnnotateBenignRaceSized(__FILE__, __LINE__, pointer, \ + sizeof(*(pointer)), description) + + /* Same as ANNOTATE_BENIGN_RACE(address, description), but applies to + the memory range [address, address+size). */ + #define ANNOTATE_BENIGN_RACE_SIZED(address, size, description) \ + AnnotateBenignRaceSized(__FILE__, __LINE__, address, size, description) + + /* Request the analysis tool to ignore all reads in the current thread + until ANNOTATE_IGNORE_READS_END is called. + Useful to ignore intentional racey reads, while still checking + other reads and all writes. + See also ANNOTATE_UNPROTECTED_READ. */ + #define ANNOTATE_IGNORE_READS_BEGIN() \ + AnnotateIgnoreReadsBegin(__FILE__, __LINE__) + + /* Stop ignoring reads. */ + #define ANNOTATE_IGNORE_READS_END() \ + AnnotateIgnoreReadsEnd(__FILE__, __LINE__) + + /* Similar to ANNOTATE_IGNORE_READS_BEGIN, but ignore writes. */ + #define ANNOTATE_IGNORE_WRITES_BEGIN() \ + AnnotateIgnoreWritesBegin(__FILE__, __LINE__) + + /* Stop ignoring writes. */ + #define ANNOTATE_IGNORE_WRITES_END() \ + AnnotateIgnoreWritesEnd(__FILE__, __LINE__) + + /* Start ignoring all memory accesses (reads and writes). */ + #define ANNOTATE_IGNORE_READS_AND_WRITES_BEGIN() \ + do {\ + ANNOTATE_IGNORE_READS_BEGIN();\ + ANNOTATE_IGNORE_WRITES_BEGIN();\ + }while(0)\ + + /* Stop ignoring all memory accesses. */ + #define ANNOTATE_IGNORE_READS_AND_WRITES_END() \ + do {\ + ANNOTATE_IGNORE_WRITES_END();\ + ANNOTATE_IGNORE_READS_END();\ + }while(0)\ + + /* Enable (enable!=0) or disable (enable==0) race detection for all threads. + This annotation could be useful if you want to skip expensive race analysis + during some period of program execution, e.g. during initialization. */ + #define ANNOTATE_ENABLE_RACE_DETECTION(enable) \ + AnnotateEnableRaceDetection(__FILE__, __LINE__, enable) + + /* ------------------------------------------------------------- + Annotations useful for debugging. */ + + /* Request to trace every access to "address". */ + #define ANNOTATE_TRACE_MEMORY(address) \ + AnnotateTraceMemory(__FILE__, __LINE__, address) + + /* Report the current thread name to a race detector. */ + #define ANNOTATE_THREAD_NAME(name) \ + AnnotateThreadName(__FILE__, __LINE__, name) + + /* ------------------------------------------------------------- + Annotations useful when implementing locks. They are not + normally needed by modules that merely use locks. + The "lock" argument is a pointer to the lock object. */ + + /* Report that a lock has been created at address "lock". */ + #define ANNOTATE_RWLOCK_CREATE(lock) \ + AnnotateRWLockCreate(__FILE__, __LINE__, lock) + + /* Report that the lock at address "lock" is about to be destroyed. */ + #define ANNOTATE_RWLOCK_DESTROY(lock) \ + AnnotateRWLockDestroy(__FILE__, __LINE__, lock) + + /* Report that the lock at address "lock" has been acquired. + is_w=1 for writer lock, is_w=0 for reader lock. */ + #define ANNOTATE_RWLOCK_ACQUIRED(lock, is_w) \ + AnnotateRWLockAcquired(__FILE__, __LINE__, lock, is_w) + + /* Report that the lock at address "lock" is about to be released. */ + #define ANNOTATE_RWLOCK_RELEASED(lock, is_w) \ + AnnotateRWLockReleased(__FILE__, __LINE__, lock, is_w) + + /* ------------------------------------------------------------- + Annotations useful when implementing barriers. They are not + normally needed by modules that merely use barriers. + The "barrier" argument is a pointer to the barrier object. */ + + /* Report that the "barrier" has been initialized with initial "count". + If 'reinitialization_allowed' is true, initialization is allowed to happen + multiple times w/o calling barrier_destroy() */ + #define ANNOTATE_BARRIER_INIT(barrier, count, reinitialization_allowed) \ + AnnotateBarrierInit(__FILE__, __LINE__, barrier, count, \ + reinitialization_allowed) + + /* Report that we are about to enter barrier_wait("barrier"). */ + #define ANNOTATE_BARRIER_WAIT_BEFORE(barrier) \ + AnnotateBarrierWaitBefore(__FILE__, __LINE__, barrier) + + /* Report that we just exited barrier_wait("barrier"). */ + #define ANNOTATE_BARRIER_WAIT_AFTER(barrier) \ + AnnotateBarrierWaitAfter(__FILE__, __LINE__, barrier) + + /* Report that the "barrier" has been destroyed. */ + #define ANNOTATE_BARRIER_DESTROY(barrier) \ + AnnotateBarrierDestroy(__FILE__, __LINE__, barrier) + + /* ------------------------------------------------------------- + Annotations useful for testing race detectors. */ + + /* Report that we expect a race on the variable at "address". + Use only in unit tests for a race detector. */ + #define ANNOTATE_EXPECT_RACE(address, description) \ + AnnotateExpectRace(__FILE__, __LINE__, address, description) + + /* A no-op. Insert where you like to test the interceptors. */ + #define ANNOTATE_NO_OP(arg) \ + AnnotateNoOp(__FILE__, __LINE__, arg) + + /* Force the race detector to flush its state. The actual effect depends on + * the implementation of the detector. */ + #define ANNOTATE_FLUSH_STATE() \ + AnnotateFlushState(__FILE__, __LINE__) + + +#else /* DYNAMIC_ANNOTATIONS_ENABLED == 0 */ + + #define ANNOTATE_RWLOCK_CREATE(lock) /* empty */ + #define ANNOTATE_RWLOCK_DESTROY(lock) /* empty */ + #define ANNOTATE_RWLOCK_ACQUIRED(lock, is_w) /* empty */ + #define ANNOTATE_RWLOCK_RELEASED(lock, is_w) /* empty */ + #define ANNOTATE_BARRIER_INIT(barrier, count, reinitialization_allowed) /* */ + #define ANNOTATE_BARRIER_WAIT_BEFORE(barrier) /* empty */ + #define ANNOTATE_BARRIER_WAIT_AFTER(barrier) /* empty */ + #define ANNOTATE_BARRIER_DESTROY(barrier) /* empty */ + #define ANNOTATE_CONDVAR_LOCK_WAIT(cv, lock) /* empty */ + #define ANNOTATE_CONDVAR_WAIT(cv) /* empty */ + #define ANNOTATE_CONDVAR_SIGNAL(cv) /* empty */ + #define ANNOTATE_CONDVAR_SIGNAL_ALL(cv) /* empty */ + #define ANNOTATE_HAPPENS_BEFORE(obj) /* empty */ + #define ANNOTATE_HAPPENS_AFTER(obj) /* empty */ + #define ANNOTATE_PUBLISH_MEMORY_RANGE(address, size) /* empty */ + #define ANNOTATE_UNPUBLISH_MEMORY_RANGE(address, size) /* empty */ + #define ANNOTATE_SWAP_MEMORY_RANGE(address, size) /* empty */ + #define ANNOTATE_PCQ_CREATE(pcq) /* empty */ + #define ANNOTATE_PCQ_DESTROY(pcq) /* empty */ + #define ANNOTATE_PCQ_PUT(pcq) /* empty */ + #define ANNOTATE_PCQ_GET(pcq) /* empty */ + #define ANNOTATE_NEW_MEMORY(address, size) /* empty */ + #define ANNOTATE_EXPECT_RACE(address, description) /* empty */ + #define ANNOTATE_BENIGN_RACE(address, description) /* empty */ + #define ANNOTATE_BENIGN_RACE_SIZED(address, size, description) /* empty */ + #define ANNOTATE_PURE_HAPPENS_BEFORE_MUTEX(mu) /* empty */ + #define ANNOTATE_MUTEX_IS_USED_AS_CONDVAR(mu) /* empty */ + #define ANNOTATE_TRACE_MEMORY(arg) /* empty */ + #define ANNOTATE_THREAD_NAME(name) /* empty */ + #define ANNOTATE_IGNORE_READS_BEGIN() /* empty */ + #define ANNOTATE_IGNORE_READS_END() /* empty */ + #define ANNOTATE_IGNORE_WRITES_BEGIN() /* empty */ + #define ANNOTATE_IGNORE_WRITES_END() /* empty */ + #define ANNOTATE_IGNORE_READS_AND_WRITES_BEGIN() /* empty */ + #define ANNOTATE_IGNORE_READS_AND_WRITES_END() /* empty */ + #define ANNOTATE_ENABLE_RACE_DETECTION(enable) /* empty */ + #define ANNOTATE_NO_OP(arg) /* empty */ + #define ANNOTATE_FLUSH_STATE() /* empty */ + +#endif /* DYNAMIC_ANNOTATIONS_ENABLED */ + +/* Macro definitions for GCC attributes that allow static thread safety + analysis to recognize and use some of the dynamic annotations as + escape hatches. + TODO(lcwu): remove the check for __SUPPORT_DYN_ANNOTATION__ once the + default crosstool/GCC supports these GCC attributes. */ + +#define ANNOTALYSIS_STATIC_INLINE +#define ANNOTALYSIS_SEMICOLON_OR_EMPTY_BODY ; +#define ANNOTALYSIS_IGNORE_READS_BEGIN +#define ANNOTALYSIS_IGNORE_READS_END +#define ANNOTALYSIS_IGNORE_WRITES_BEGIN +#define ANNOTALYSIS_IGNORE_WRITES_END +#define ANNOTALYSIS_UNPROTECTED_READ + +#if defined(__GNUC__) && (!defined(SWIG)) && (!defined(__clang__)) && \ + defined(__SUPPORT_TS_ANNOTATION__) && defined(__SUPPORT_DYN_ANNOTATION__) + +#if DYNAMIC_ANNOTATIONS_ENABLED == 0 +#define ANNOTALYSIS_ONLY 1 +#undef ANNOTALYSIS_STATIC_INLINE +#define ANNOTALYSIS_STATIC_INLINE static inline +#undef ANNOTALYSIS_SEMICOLON_OR_EMPTY_BODY +#define ANNOTALYSIS_SEMICOLON_OR_EMPTY_BODY { (void)file; (void)line; } +#endif + +/* Only emit attributes when annotalysis is enabled. */ +#if defined(__SUPPORT_TS_ANNOTATION__) && defined(__SUPPORT_DYN_ANNOTATION__) +#undef ANNOTALYSIS_IGNORE_READS_BEGIN +#define ANNOTALYSIS_IGNORE_READS_BEGIN __attribute__ ((ignore_reads_begin)) +#undef ANNOTALYSIS_IGNORE_READS_END +#define ANNOTALYSIS_IGNORE_READS_END __attribute__ ((ignore_reads_end)) +#undef ANNOTALYSIS_IGNORE_WRITES_BEGIN +#define ANNOTALYSIS_IGNORE_WRITES_BEGIN __attribute__ ((ignore_writes_begin)) +#undef ANNOTALYSIS_IGNORE_WRITES_END +#define ANNOTALYSIS_IGNORE_WRITES_END __attribute__ ((ignore_writes_end)) +#undef ANNOTALYSIS_UNPROTECTED_READ +#define ANNOTALYSIS_UNPROTECTED_READ __attribute__ ((unprotected_read)) +#endif + +#endif // defined(__GNUC__) && (!defined(SWIG)) && (!defined(__clang__)) + +/* Use the macros above rather than using these functions directly. */ +#ifdef __cplusplus +extern "C" { +#endif +void AnnotateRWLockCreate(const char *file, int line, + const volatile void *lock); +void AnnotateRWLockDestroy(const char *file, int line, + const volatile void *lock); +void AnnotateRWLockAcquired(const char *file, int line, + const volatile void *lock, long is_w); +void AnnotateRWLockReleased(const char *file, int line, + const volatile void *lock, long is_w); +void AnnotateBarrierInit(const char *file, int line, + const volatile void *barrier, long count, + long reinitialization_allowed); +void AnnotateBarrierWaitBefore(const char *file, int line, + const volatile void *barrier); +void AnnotateBarrierWaitAfter(const char *file, int line, + const volatile void *barrier); +void AnnotateBarrierDestroy(const char *file, int line, + const volatile void *barrier); +void AnnotateCondVarWait(const char *file, int line, + const volatile void *cv, + const volatile void *lock); +void AnnotateCondVarSignal(const char *file, int line, + const volatile void *cv); +void AnnotateCondVarSignalAll(const char *file, int line, + const volatile void *cv); +void AnnotatePublishMemoryRange(const char *file, int line, + const volatile void *address, + long size); +void AnnotateUnpublishMemoryRange(const char *file, int line, + const volatile void *address, + long size); +void AnnotatePCQCreate(const char *file, int line, + const volatile void *pcq); +void AnnotatePCQDestroy(const char *file, int line, + const volatile void *pcq); +void AnnotatePCQPut(const char *file, int line, + const volatile void *pcq); +void AnnotatePCQGet(const char *file, int line, + const volatile void *pcq); +void AnnotateNewMemory(const char *file, int line, + const volatile void *address, + long size); +void AnnotateExpectRace(const char *file, int line, + const volatile void *address, + const char *description); +void AnnotateBenignRace(const char *file, int line, + const volatile void *address, + const char *description); +void AnnotateBenignRaceSized(const char *file, int line, + const volatile void *address, + long size, + const char *description); +void AnnotateMutexIsUsedAsCondVar(const char *file, int line, + const volatile void *mu); +void AnnotateTraceMemory(const char *file, int line, + const volatile void *arg); +void AnnotateThreadName(const char *file, int line, + const char *name); +ANNOTALYSIS_STATIC_INLINE +void AnnotateIgnoreReadsBegin(const char *file, int line) + ANNOTALYSIS_IGNORE_READS_BEGIN ANNOTALYSIS_SEMICOLON_OR_EMPTY_BODY +ANNOTALYSIS_STATIC_INLINE +void AnnotateIgnoreReadsEnd(const char *file, int line) + ANNOTALYSIS_IGNORE_READS_END ANNOTALYSIS_SEMICOLON_OR_EMPTY_BODY +ANNOTALYSIS_STATIC_INLINE +void AnnotateIgnoreWritesBegin(const char *file, int line) + ANNOTALYSIS_IGNORE_WRITES_BEGIN ANNOTALYSIS_SEMICOLON_OR_EMPTY_BODY +ANNOTALYSIS_STATIC_INLINE +void AnnotateIgnoreWritesEnd(const char *file, int line) + ANNOTALYSIS_IGNORE_WRITES_END ANNOTALYSIS_SEMICOLON_OR_EMPTY_BODY +void AnnotateEnableRaceDetection(const char *file, int line, int enable); +void AnnotateNoOp(const char *file, int line, + const volatile void *arg); +void AnnotateFlushState(const char *file, int line); + +/* Return non-zero value if running under valgrind. + + If "valgrind.h" is included into dynamic_annotations.c, + the regular valgrind mechanism will be used. + See http://valgrind.org/docs/manual/manual-core-adv.html about + RUNNING_ON_VALGRIND and other valgrind "client requests". + The file "valgrind.h" may be obtained by doing + svn co svn://svn.valgrind.org/valgrind/trunk/include + + If for some reason you can't use "valgrind.h" or want to fake valgrind, + there are two ways to make this function return non-zero: + - Use environment variable: export RUNNING_ON_VALGRIND=1 + - Make your tool intercept the function RunningOnValgrind() and + change its return value. + */ +int RunningOnValgrind(void); + +/* ValgrindSlowdown returns: + * 1.0, if (RunningOnValgrind() == 0) + * 50.0, if (RunningOnValgrind() != 0 && getenv("VALGRIND_SLOWDOWN") == NULL) + * atof(getenv("VALGRIND_SLOWDOWN")) otherwise + This function can be used to scale timeout values: + EXAMPLE: + for (;;) { + DoExpensiveBackgroundTask(); + SleepForSeconds(5 * ValgrindSlowdown()); + } + */ +double ValgrindSlowdown(void); + +#ifdef __cplusplus +} +#endif + +#if DYNAMIC_ANNOTATIONS_ENABLED != 0 && defined(__cplusplus) + + /* ANNOTATE_UNPROTECTED_READ is the preferred way to annotate racey reads. + + Instead of doing + ANNOTATE_IGNORE_READS_BEGIN(); + ... = x; + ANNOTATE_IGNORE_READS_END(); + one can use + ... = ANNOTATE_UNPROTECTED_READ(x); */ + template <class T> + inline T ANNOTATE_UNPROTECTED_READ(const volatile T &x) + ANNOTALYSIS_UNPROTECTED_READ { + ANNOTATE_IGNORE_READS_BEGIN(); + T res = x; + ANNOTATE_IGNORE_READS_END(); + return res; + } + /* Apply ANNOTATE_BENIGN_RACE_SIZED to a static variable. */ + #define ANNOTATE_BENIGN_RACE_STATIC(static_var, description) \ + namespace { \ + class static_var ## _annotator { \ + public: \ + static_var ## _annotator() { \ + ANNOTATE_BENIGN_RACE_SIZED(&static_var, \ + sizeof(static_var), \ + # static_var ": " description); \ + } \ + }; \ + static static_var ## _annotator the ## static_var ## _annotator;\ + } +#else /* DYNAMIC_ANNOTATIONS_ENABLED == 0 */ + + #define ANNOTATE_UNPROTECTED_READ(x) (x) + #define ANNOTATE_BENIGN_RACE_STATIC(static_var, description) /* empty */ + +#endif /* DYNAMIC_ANNOTATIONS_ENABLED */ + +/* Annotalysis, a GCC based static analyzer, is able to understand and use + some of the dynamic annotations defined in this file. However, dynamic + annotations are usually disabled in the opt mode (to avoid additional + runtime overheads) while Annotalysis only works in the opt mode. + In order for Annotalysis to use these dynamic annotations when they + are disabled, we re-define these annotations here. Note that unlike the + original macro definitions above, these macros are expanded to calls to + static inline functions so that the compiler will be able to remove the + calls after the analysis. */ + +#ifdef ANNOTALYSIS_ONLY + + #undef ANNOTALYSIS_ONLY + + /* Undefine and re-define the macros that the static analyzer understands. */ + #undef ANNOTATE_IGNORE_READS_BEGIN + #define ANNOTATE_IGNORE_READS_BEGIN() \ + AnnotateIgnoreReadsBegin(__FILE__, __LINE__) + + #undef ANNOTATE_IGNORE_READS_END + #define ANNOTATE_IGNORE_READS_END() \ + AnnotateIgnoreReadsEnd(__FILE__, __LINE__) + + #undef ANNOTATE_IGNORE_WRITES_BEGIN + #define ANNOTATE_IGNORE_WRITES_BEGIN() \ + AnnotateIgnoreWritesBegin(__FILE__, __LINE__) + + #undef ANNOTATE_IGNORE_WRITES_END + #define ANNOTATE_IGNORE_WRITES_END() \ + AnnotateIgnoreWritesEnd(__FILE__, __LINE__) + + #undef ANNOTATE_IGNORE_READS_AND_WRITES_BEGIN + #define ANNOTATE_IGNORE_READS_AND_WRITES_BEGIN() \ + do { \ + ANNOTATE_IGNORE_READS_BEGIN(); \ + ANNOTATE_IGNORE_WRITES_BEGIN(); \ + }while(0) \ + + #undef ANNOTATE_IGNORE_READS_AND_WRITES_END + #define ANNOTATE_IGNORE_READS_AND_WRITES_END() \ + do { \ + ANNOTATE_IGNORE_WRITES_END(); \ + ANNOTATE_IGNORE_READS_END(); \ + }while(0) \ + + #if defined(__cplusplus) + #undef ANNOTATE_UNPROTECTED_READ + template <class T> + inline T ANNOTATE_UNPROTECTED_READ(const volatile T &x) + ANNOTALYSIS_UNPROTECTED_READ { + ANNOTATE_IGNORE_READS_BEGIN(); + T res = x; + ANNOTATE_IGNORE_READS_END(); + return res; + } + #endif /* __cplusplus */ + +#endif /* ANNOTALYSIS_ONLY */ + +/* Undefine the macros intended only in this file. */ +#undef ANNOTALYSIS_STATIC_INLINE +#undef ANNOTALYSIS_SEMICOLON_OR_EMPTY_BODY + +#endif /* BASE_DYNAMIC_ANNOTATIONS_H_ */ diff --git a/src/third_party/gperftools-2.5/src/base/elf_mem_image.cc b/src/third_party/gperftools-2.5/src/base/elf_mem_image.cc new file mode 100644 index 00000000000..d2ca1a5e131 --- /dev/null +++ b/src/third_party/gperftools-2.5/src/base/elf_mem_image.cc @@ -0,0 +1,434 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Paul Pluzhnikov +// +// Allow dynamic symbol lookup in an in-memory Elf image. +// + +#include "base/elf_mem_image.h" + +#ifdef HAVE_ELF_MEM_IMAGE // defined in elf_mem_image.h + +#include <stddef.h> // for size_t, ptrdiff_t +#include "base/logging.h" + +// From binutils/include/elf/common.h (this doesn't appear to be documented +// anywhere else). +// +// /* This flag appears in a Versym structure. It means that the symbol +// is hidden, and is only visible with an explicit version number. +// This is a GNU extension. */ +// #define VERSYM_HIDDEN 0x8000 +// +// /* This is the mask for the rest of the Versym information. */ +// #define VERSYM_VERSION 0x7fff + +#define VERSYM_VERSION 0x7fff + +namespace base { + +namespace { +template <int N> class ElfClass { + public: + static const int kElfClass = -1; + static int ElfBind(const ElfW(Sym) *) { + CHECK(false); // << "Unexpected word size"; + return 0; + } + static int ElfType(const ElfW(Sym) *) { + CHECK(false); // << "Unexpected word size"; + return 0; + } +}; + +template <> class ElfClass<32> { + public: + static const int kElfClass = ELFCLASS32; + static int ElfBind(const ElfW(Sym) *symbol) { + return ELF32_ST_BIND(symbol->st_info); + } + static int ElfType(const ElfW(Sym) *symbol) { + return ELF32_ST_TYPE(symbol->st_info); + } +}; + +template <> class ElfClass<64> { + public: + static const int kElfClass = ELFCLASS64; + static int ElfBind(const ElfW(Sym) *symbol) { + return ELF64_ST_BIND(symbol->st_info); + } + static int ElfType(const ElfW(Sym) *symbol) { + return ELF64_ST_TYPE(symbol->st_info); + } +}; + +typedef ElfClass<__WORDSIZE> CurrentElfClass; + +// Extract an element from one of the ELF tables, cast it to desired type. +// This is just a simple arithmetic and a glorified cast. +// Callers are responsible for bounds checking. +template <class T> +const T* GetTableElement(const ElfW(Ehdr) *ehdr, + ElfW(Off) table_offset, + ElfW(Word) element_size, + size_t index) { + return reinterpret_cast<const T*>(reinterpret_cast<const char *>(ehdr) + + table_offset + + index * element_size); +} +} // namespace + +const void *const ElfMemImage::kInvalidBase = + reinterpret_cast<const void *>(~0L); + +ElfMemImage::ElfMemImage(const void *base) { + CHECK(base != kInvalidBase); + Init(base); +} + +int ElfMemImage::GetNumSymbols() const { + if (!hash_) { + return 0; + } + // See http://www.caldera.com/developers/gabi/latest/ch5.dynamic.html#hash + return hash_[1]; +} + +const ElfW(Sym) *ElfMemImage::GetDynsym(int index) const { + CHECK_LT(index, GetNumSymbols()); + return dynsym_ + index; +} + +const ElfW(Versym) *ElfMemImage::GetVersym(int index) const { + CHECK_LT(index, GetNumSymbols()); + return versym_ + index; +} + +const ElfW(Phdr) *ElfMemImage::GetPhdr(int index) const { + CHECK_LT(index, ehdr_->e_phnum); + return GetTableElement<ElfW(Phdr)>(ehdr_, + ehdr_->e_phoff, + ehdr_->e_phentsize, + index); +} + +const char *ElfMemImage::GetDynstr(ElfW(Word) offset) const { + CHECK_LT(offset, strsize_); + return dynstr_ + offset; +} + +const void *ElfMemImage::GetSymAddr(const ElfW(Sym) *sym) const { + if (sym->st_shndx == SHN_UNDEF || sym->st_shndx >= SHN_LORESERVE) { + // Symbol corresponds to "special" (e.g. SHN_ABS) section. + return reinterpret_cast<const void *>(sym->st_value); + } + CHECK_LT(link_base_, sym->st_value); + return GetTableElement<char>(ehdr_, 0, 1, sym->st_value) - link_base_; +} + +const ElfW(Verdef) *ElfMemImage::GetVerdef(int index) const { + CHECK_LE(index, verdefnum_); + const ElfW(Verdef) *version_definition = verdef_; + while (version_definition->vd_ndx < index && version_definition->vd_next) { + const char *const version_definition_as_char = + reinterpret_cast<const char *>(version_definition); + version_definition = + reinterpret_cast<const ElfW(Verdef) *>(version_definition_as_char + + version_definition->vd_next); + } + return version_definition->vd_ndx == index ? version_definition : NULL; +} + +const ElfW(Verdaux) *ElfMemImage::GetVerdefAux( + const ElfW(Verdef) *verdef) const { + return reinterpret_cast<const ElfW(Verdaux) *>(verdef+1); +} + +const char *ElfMemImage::GetVerstr(ElfW(Word) offset) const { + CHECK_LT(offset, strsize_); + return dynstr_ + offset; +} + +void ElfMemImage::Init(const void *base) { + ehdr_ = NULL; + dynsym_ = NULL; + dynstr_ = NULL; + versym_ = NULL; + verdef_ = NULL; + hash_ = NULL; + strsize_ = 0; + verdefnum_ = 0; + link_base_ = ~0L; // Sentinel: PT_LOAD .p_vaddr can't possibly be this. + if (!base) { + return; + } + const intptr_t base_as_uintptr_t = reinterpret_cast<uintptr_t>(base); + // Fake VDSO has low bit set. + const bool fake_vdso = ((base_as_uintptr_t & 1) != 0); + base = reinterpret_cast<const void *>(base_as_uintptr_t & ~1); + const char *const base_as_char = reinterpret_cast<const char *>(base); + if (base_as_char[EI_MAG0] != ELFMAG0 || base_as_char[EI_MAG1] != ELFMAG1 || + base_as_char[EI_MAG2] != ELFMAG2 || base_as_char[EI_MAG3] != ELFMAG3) { + RAW_DCHECK(false, "no ELF magic"); // at %p", base); + return; + } + int elf_class = base_as_char[EI_CLASS]; + if (elf_class != CurrentElfClass::kElfClass) { + DCHECK_EQ(elf_class, CurrentElfClass::kElfClass); + return; + } + switch (base_as_char[EI_DATA]) { + case ELFDATA2LSB: { + if (__LITTLE_ENDIAN != __BYTE_ORDER) { + DCHECK_EQ(__LITTLE_ENDIAN, __BYTE_ORDER); // << ": wrong byte order"; + return; + } + break; + } + case ELFDATA2MSB: { + if (__BIG_ENDIAN != __BYTE_ORDER) { + DCHECK_EQ(__BIG_ENDIAN, __BYTE_ORDER); // << ": wrong byte order"; + return; + } + break; + } + default: { + RAW_DCHECK(false, "unexpected data encoding"); // << base_as_char[EI_DATA]; + return; + } + } + + ehdr_ = reinterpret_cast<const ElfW(Ehdr) *>(base); + const ElfW(Phdr) *dynamic_program_header = NULL; + for (int i = 0; i < ehdr_->e_phnum; ++i) { + const ElfW(Phdr) *const program_header = GetPhdr(i); + switch (program_header->p_type) { + case PT_LOAD: + if (link_base_ == ~0L) { + link_base_ = program_header->p_vaddr; + } + break; + case PT_DYNAMIC: + dynamic_program_header = program_header; + break; + } + } + if (link_base_ == ~0L || !dynamic_program_header) { + RAW_DCHECK(~0L != link_base_, "no PT_LOADs in VDSO"); + RAW_DCHECK(dynamic_program_header, "no PT_DYNAMIC in VDSO"); + // Mark this image as not present. Can not recur infinitely. + Init(0); + return; + } + ptrdiff_t relocation = + base_as_char - reinterpret_cast<const char *>(link_base_); + ElfW(Dyn) *dynamic_entry = + reinterpret_cast<ElfW(Dyn) *>(dynamic_program_header->p_vaddr + + relocation); + for (; dynamic_entry->d_tag != DT_NULL; ++dynamic_entry) { + ElfW(Xword) value = dynamic_entry->d_un.d_val; + if (fake_vdso) { + // A complication: in the real VDSO, dynamic entries are not relocated + // (it wasn't loaded by a dynamic loader). But when testing with a + // "fake" dlopen()ed vdso library, the loader relocates some (but + // not all!) of them before we get here. + if (dynamic_entry->d_tag == DT_VERDEF) { + // The only dynamic entry (of the ones we care about) libc-2.3.6 + // loader doesn't relocate. + value += relocation; + } + } else { + // Real VDSO. Everything needs to be relocated. + value += relocation; + } + switch (dynamic_entry->d_tag) { + case DT_HASH: + hash_ = reinterpret_cast<ElfW(Word) *>(value); + break; + case DT_SYMTAB: + dynsym_ = reinterpret_cast<ElfW(Sym) *>(value); + break; + case DT_STRTAB: + dynstr_ = reinterpret_cast<const char *>(value); + break; + case DT_VERSYM: + versym_ = reinterpret_cast<ElfW(Versym) *>(value); + break; + case DT_VERDEF: + verdef_ = reinterpret_cast<ElfW(Verdef) *>(value); + break; + case DT_VERDEFNUM: + verdefnum_ = dynamic_entry->d_un.d_val; + break; + case DT_STRSZ: + strsize_ = dynamic_entry->d_un.d_val; + break; + default: + // Unrecognized entries explicitly ignored. + break; + } + } + if (!hash_ || !dynsym_ || !dynstr_ || !versym_ || + !verdef_ || !verdefnum_ || !strsize_) { + RAW_DCHECK(hash_, "invalid VDSO (no DT_HASH)"); + RAW_DCHECK(dynsym_, "invalid VDSO (no DT_SYMTAB)"); + RAW_DCHECK(dynstr_, "invalid VDSO (no DT_STRTAB)"); + RAW_DCHECK(versym_, "invalid VDSO (no DT_VERSYM)"); + RAW_DCHECK(verdef_, "invalid VDSO (no DT_VERDEF)"); + RAW_DCHECK(verdefnum_, "invalid VDSO (no DT_VERDEFNUM)"); + RAW_DCHECK(strsize_, "invalid VDSO (no DT_STRSZ)"); + // Mark this image as not present. Can not recur infinitely. + Init(0); + return; + } +} + +bool ElfMemImage::LookupSymbol(const char *name, + const char *version, + int type, + SymbolInfo *info) const { + for (SymbolIterator it = begin(); it != end(); ++it) { + if (strcmp(it->name, name) == 0 && strcmp(it->version, version) == 0 && + CurrentElfClass::ElfType(it->symbol) == type) { + if (info) { + *info = *it; + } + return true; + } + } + return false; +} + +bool ElfMemImage::LookupSymbolByAddress(const void *address, + SymbolInfo *info_out) const { + for (SymbolIterator it = begin(); it != end(); ++it) { + const char *const symbol_start = + reinterpret_cast<const char *>(it->address); + const char *const symbol_end = symbol_start + it->symbol->st_size; + if (symbol_start <= address && address < symbol_end) { + if (info_out) { + // Client wants to know details for that symbol (the usual case). + if (CurrentElfClass::ElfBind(it->symbol) == STB_GLOBAL) { + // Strong symbol; just return it. + *info_out = *it; + return true; + } else { + // Weak or local. Record it, but keep looking for a strong one. + *info_out = *it; + } + } else { + // Client only cares if there is an overlapping symbol. + return true; + } + } + } + return false; +} + +ElfMemImage::SymbolIterator::SymbolIterator(const void *const image, int index) + : index_(index), image_(image) { +} + +const ElfMemImage::SymbolInfo *ElfMemImage::SymbolIterator::operator->() const { + return &info_; +} + +const ElfMemImage::SymbolInfo& ElfMemImage::SymbolIterator::operator*() const { + return info_; +} + +bool ElfMemImage::SymbolIterator::operator==(const SymbolIterator &rhs) const { + return this->image_ == rhs.image_ && this->index_ == rhs.index_; +} + +bool ElfMemImage::SymbolIterator::operator!=(const SymbolIterator &rhs) const { + return !(*this == rhs); +} + +ElfMemImage::SymbolIterator &ElfMemImage::SymbolIterator::operator++() { + this->Update(1); + return *this; +} + +ElfMemImage::SymbolIterator ElfMemImage::begin() const { + SymbolIterator it(this, 0); + it.Update(0); + return it; +} + +ElfMemImage::SymbolIterator ElfMemImage::end() const { + return SymbolIterator(this, GetNumSymbols()); +} + +void ElfMemImage::SymbolIterator::Update(int increment) { + const ElfMemImage *image = reinterpret_cast<const ElfMemImage *>(image_); + CHECK(image->IsPresent() || increment == 0); + if (!image->IsPresent()) { + return; + } + index_ += increment; + if (index_ >= image->GetNumSymbols()) { + index_ = image->GetNumSymbols(); + return; + } + const ElfW(Sym) *symbol = image->GetDynsym(index_); + const ElfW(Versym) *version_symbol = image->GetVersym(index_); + CHECK(symbol && version_symbol); + const char *const symbol_name = image->GetDynstr(symbol->st_name); + const ElfW(Versym) version_index = version_symbol[0] & VERSYM_VERSION; + const ElfW(Verdef) *version_definition = NULL; + const char *version_name = ""; + if (symbol->st_shndx == SHN_UNDEF) { + // Undefined symbols reference DT_VERNEED, not DT_VERDEF, and + // version_index could well be greater than verdefnum_, so calling + // GetVerdef(version_index) may trigger assertion. + } else { + version_definition = image->GetVerdef(version_index); + } + if (version_definition) { + // I am expecting 1 or 2 auxiliary entries: 1 for the version itself, + // optional 2nd if the version has a parent. + CHECK_LE(1, version_definition->vd_cnt); + CHECK_LE(version_definition->vd_cnt, 2); + const ElfW(Verdaux) *version_aux = image->GetVerdefAux(version_definition); + version_name = image->GetVerstr(version_aux->vda_name); + } + info_.name = symbol_name; + info_.version = version_name; + info_.address = image->GetSymAddr(symbol); + info_.symbol = symbol; +} + +} // namespace base + +#endif // HAVE_ELF_MEM_IMAGE diff --git a/src/third_party/gperftools-2.5/src/base/elf_mem_image.h b/src/third_party/gperftools-2.5/src/base/elf_mem_image.h new file mode 100644 index 00000000000..5fb00fffb5f --- /dev/null +++ b/src/third_party/gperftools-2.5/src/base/elf_mem_image.h @@ -0,0 +1,135 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Paul Pluzhnikov +// +// Allow dynamic symbol lookup for in-memory Elf images. + +#ifndef BASE_ELF_MEM_IMAGE_H_ +#define BASE_ELF_MEM_IMAGE_H_ + +#include <config.h> +#ifdef HAVE_FEATURES_H +#include <features.h> // for __GLIBC__ +#endif + +// Maybe one day we can rewrite this file not to require the elf +// symbol extensions in glibc, but for right now we need them. +#if defined(__ELF__) && defined(__GLIBC__) && !defined(__native_client__) + +#define HAVE_ELF_MEM_IMAGE 1 + +#include <stdlib.h> +#include <link.h> // for ElfW + +namespace base { + +// An in-memory ELF image (may not exist on disk). +class ElfMemImage { + public: + // Sentinel: there could never be an elf image at this address. + static const void *const kInvalidBase; + + // Information about a single vdso symbol. + // All pointers are into .dynsym, .dynstr, or .text of the VDSO. + // Do not free() them or modify through them. + struct SymbolInfo { + const char *name; // E.g. "__vdso_getcpu" + const char *version; // E.g. "LINUX_2.6", could be "" + // for unversioned symbol. + const void *address; // Relocated symbol address. + const ElfW(Sym) *symbol; // Symbol in the dynamic symbol table. + }; + + // Supports iteration over all dynamic symbols. + class SymbolIterator { + public: + friend class ElfMemImage; + const SymbolInfo *operator->() const; + const SymbolInfo &operator*() const; + SymbolIterator& operator++(); + bool operator!=(const SymbolIterator &rhs) const; + bool operator==(const SymbolIterator &rhs) const; + private: + SymbolIterator(const void *const image, int index); + void Update(int incr); + SymbolInfo info_; + int index_; + const void *const image_; + }; + + + explicit ElfMemImage(const void *base); + void Init(const void *base); + bool IsPresent() const { return ehdr_ != NULL; } + const ElfW(Phdr)* GetPhdr(int index) const; + const ElfW(Sym)* GetDynsym(int index) const; + const ElfW(Versym)* GetVersym(int index) const; + const ElfW(Verdef)* GetVerdef(int index) const; + const ElfW(Verdaux)* GetVerdefAux(const ElfW(Verdef) *verdef) const; + const char* GetDynstr(ElfW(Word) offset) const; + const void* GetSymAddr(const ElfW(Sym) *sym) const; + const char* GetVerstr(ElfW(Word) offset) const; + int GetNumSymbols() const; + + SymbolIterator begin() const; + SymbolIterator end() const; + + // Look up versioned dynamic symbol in the image. + // Returns false if image is not present, or doesn't contain given + // symbol/version/type combination. + // If info_out != NULL, additional details are filled in. + bool LookupSymbol(const char *name, const char *version, + int symbol_type, SymbolInfo *info_out) const; + + // Find info about symbol (if any) which overlaps given address. + // Returns true if symbol was found; false if image isn't present + // or doesn't have a symbol overlapping given address. + // If info_out != NULL, additional details are filled in. + bool LookupSymbolByAddress(const void *address, SymbolInfo *info_out) const; + + private: + const ElfW(Ehdr) *ehdr_; + const ElfW(Sym) *dynsym_; + const ElfW(Versym) *versym_; + const ElfW(Verdef) *verdef_; + const ElfW(Word) *hash_; + const char *dynstr_; + size_t strsize_; + size_t verdefnum_; + ElfW(Addr) link_base_; // Link-time base (p_vaddr of first PT_LOAD). +}; + +} // namespace base + +#endif // __ELF__ and __GLIBC__ and !__native_client__ + +#endif // BASE_ELF_MEM_IMAGE_H_ diff --git a/src/third_party/gperftools-2.5/src/base/elfcore.h b/src/third_party/gperftools-2.5/src/base/elfcore.h new file mode 100644 index 00000000000..d9599edac8d --- /dev/null +++ b/src/third_party/gperftools-2.5/src/base/elfcore.h @@ -0,0 +1,401 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +/* Copyright (c) 2005-2008, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: Markus Gutschke, Carl Crous + */ + +#ifndef _ELFCORE_H +#define _ELFCORE_H +#ifdef __cplusplus +extern "C" { +#endif + +/* We currently only support x86-32, x86-64, ARM, MIPS, PPC on Linux. + * Porting to other related platforms should not be difficult. + */ +#if (defined(__i386__) || defined(__x86_64__) || defined(__ARM_ARCH_3__) || \ + defined(__mips__) || defined(__PPC__)) && defined(__linux) + +#include <stdarg.h> +#include <stdint.h> +#include <sys/types.h> +#include <config.h> + + +/* Define the DUMPER symbol to make sure that there is exactly one + * core dumper built into the library. + */ +#define DUMPER "ELF" + +/* By the time that we get a chance to read CPU registers in the + * calling thread, they are already in a not particularly useful + * state. Besides, there will be multiple frames on the stack that are + * just making the core file confusing. To fix this problem, we take a + * snapshot of the frame pointer, stack pointer, and instruction + * pointer at an earlier time, and then insert these values into the + * core file. + */ + +#if defined(__i386__) || defined(__x86_64__) + typedef struct i386_regs { /* Normal (non-FPU) CPU registers */ + #ifdef __x86_64__ + #define BP rbp + #define SP rsp + #define IP rip + uint64_t r15,r14,r13,r12,rbp,rbx,r11,r10; + uint64_t r9,r8,rax,rcx,rdx,rsi,rdi,orig_rax; + uint64_t rip,cs,eflags; + uint64_t rsp,ss; + uint64_t fs_base, gs_base; + uint64_t ds,es,fs,gs; + #else + #define BP ebp + #define SP esp + #define IP eip + uint32_t ebx, ecx, edx, esi, edi, ebp, eax; + uint16_t ds, __ds, es, __es; + uint16_t fs, __fs, gs, __gs; + uint32_t orig_eax, eip; + uint16_t cs, __cs; + uint32_t eflags, esp; + uint16_t ss, __ss; + #endif + } i386_regs; +#elif defined(__ARM_ARCH_3__) + typedef struct arm_regs { /* General purpose registers */ + #define BP uregs[11] /* Frame pointer */ + #define SP uregs[13] /* Stack pointer */ + #define IP uregs[15] /* Program counter */ + #define LR uregs[14] /* Link register */ + long uregs[18]; + } arm_regs; +#elif defined(__mips__) + typedef struct mips_regs { + unsigned long pad[6]; /* Unused padding to match kernel structures */ + unsigned long uregs[32]; /* General purpose registers. */ + unsigned long hi; /* Used for multiplication and division. */ + unsigned long lo; + unsigned long cp0_epc; /* Program counter. */ + unsigned long cp0_badvaddr; + unsigned long cp0_status; + unsigned long cp0_cause; + unsigned long unused; + } mips_regs; +#elif defined (__PPC__) + typedef struct ppc_regs { + #define SP uregs[1] /* Stack pointer */ + #define IP rip /* Program counter */ + #define LR lr /* Link register */ + unsigned long uregs[32]; /* General Purpose Registers - r0-r31. */ + double fpr[32]; /* Floating-Point Registers - f0-f31. */ + unsigned long rip; /* Program counter. */ + unsigned long msr; + unsigned long ccr; + unsigned long lr; + unsigned long ctr; + unsigned long xeq; + unsigned long mq; + } ppc_regs; +#endif + +#if defined(__i386__) && defined(__GNUC__) + /* On x86 we provide an optimized version of the FRAME() macro, if the + * compiler supports a GCC-style asm() directive. This results in somewhat + * more accurate values for CPU registers. + */ + typedef struct Frame { + struct i386_regs uregs; + int errno_; + pid_t tid; + } Frame; + #define FRAME(f) Frame f; \ + do { \ + f.errno_ = errno; \ + f.tid = sys_gettid(); \ + __asm__ volatile ( \ + "push %%ebp\n" \ + "push %%ebx\n" \ + "mov %%ebx,0(%%eax)\n" \ + "mov %%ecx,4(%%eax)\n" \ + "mov %%edx,8(%%eax)\n" \ + "mov %%esi,12(%%eax)\n" \ + "mov %%edi,16(%%eax)\n" \ + "mov %%ebp,20(%%eax)\n" \ + "mov %%eax,24(%%eax)\n" \ + "mov %%ds,%%ebx\n" \ + "mov %%ebx,28(%%eax)\n" \ + "mov %%es,%%ebx\n" \ + "mov %%ebx,32(%%eax)\n" \ + "mov %%fs,%%ebx\n" \ + "mov %%ebx,36(%%eax)\n" \ + "mov %%gs,%%ebx\n" \ + "mov %%ebx, 40(%%eax)\n" \ + "call 0f\n" \ + "0:pop %%ebx\n" \ + "add $1f-0b,%%ebx\n" \ + "mov %%ebx,48(%%eax)\n" \ + "mov %%cs,%%ebx\n" \ + "mov %%ebx,52(%%eax)\n" \ + "pushf\n" \ + "pop %%ebx\n" \ + "mov %%ebx,56(%%eax)\n" \ + "mov %%esp,%%ebx\n" \ + "add $8,%%ebx\n" \ + "mov %%ebx,60(%%eax)\n" \ + "mov %%ss,%%ebx\n" \ + "mov %%ebx,64(%%eax)\n" \ + "pop %%ebx\n" \ + "pop %%ebp\n" \ + "1:" \ + : : "a" (&f) : "memory"); \ + } while (0) + #define SET_FRAME(f,r) \ + do { \ + errno = (f).errno_; \ + (r) = (f).uregs; \ + } while (0) +#elif defined(__x86_64__) && defined(__GNUC__) + /* The FRAME and SET_FRAME macros for x86_64. */ + typedef struct Frame { + struct i386_regs uregs; + int errno_; + pid_t tid; + } Frame; + #define FRAME(f) Frame f; \ + do { \ + f.errno_ = errno; \ + f.tid = sys_gettid(); \ + __asm__ volatile ( \ + "push %%rbp\n" \ + "push %%rbx\n" \ + "mov %%r15,0(%%rax)\n" \ + "mov %%r14,8(%%rax)\n" \ + "mov %%r13,16(%%rax)\n" \ + "mov %%r12,24(%%rax)\n" \ + "mov %%rbp,32(%%rax)\n" \ + "mov %%rbx,40(%%rax)\n" \ + "mov %%r11,48(%%rax)\n" \ + "mov %%r10,56(%%rax)\n" \ + "mov %%r9,64(%%rax)\n" \ + "mov %%r8,72(%%rax)\n" \ + "mov %%rax,80(%%rax)\n" \ + "mov %%rcx,88(%%rax)\n" \ + "mov %%rdx,96(%%rax)\n" \ + "mov %%rsi,104(%%rax)\n" \ + "mov %%rdi,112(%%rax)\n" \ + "mov %%ds,%%rbx\n" \ + "mov %%rbx,184(%%rax)\n" \ + "mov %%es,%%rbx\n" \ + "mov %%rbx,192(%%rax)\n" \ + "mov %%fs,%%rbx\n" \ + "mov %%rbx,200(%%rax)\n" \ + "mov %%gs,%%rbx\n" \ + "mov %%rbx,208(%%rax)\n" \ + "call 0f\n" \ + "0:pop %%rbx\n" \ + "add $1f-0b,%%rbx\n" \ + "mov %%rbx,128(%%rax)\n" \ + "mov %%cs,%%rbx\n" \ + "mov %%rbx,136(%%rax)\n" \ + "pushf\n" \ + "pop %%rbx\n" \ + "mov %%rbx,144(%%rax)\n" \ + "mov %%rsp,%%rbx\n" \ + "add $16,%%ebx\n" \ + "mov %%rbx,152(%%rax)\n" \ + "mov %%ss,%%rbx\n" \ + "mov %%rbx,160(%%rax)\n" \ + "pop %%rbx\n" \ + "pop %%rbp\n" \ + "1:" \ + : : "a" (&f) : "memory"); \ + } while (0) + #define SET_FRAME(f,r) \ + do { \ + errno = (f).errno_; \ + (f).uregs.fs_base = (r).fs_base; \ + (f).uregs.gs_base = (r).gs_base; \ + (r) = (f).uregs; \ + } while (0) +#elif defined(__ARM_ARCH_3__) && defined(__GNUC__) + /* ARM calling conventions are a little more tricky. A little assembly + * helps in obtaining an accurate snapshot of all registers. + */ + typedef struct Frame { + struct arm_regs arm; + int errno_; + pid_t tid; + } Frame; + #define FRAME(f) Frame f; \ + do { \ + long cpsr; \ + f.errno_ = errno; \ + f.tid = sys_gettid(); \ + __asm__ volatile( \ + "stmia %0, {r0-r15}\n" /* All integer regs */\ + : : "r"(&f.arm) : "memory"); \ + f.arm.uregs[16] = 0; \ + __asm__ volatile( \ + "mrs %0, cpsr\n" /* Condition code reg */\ + : "=r"(cpsr)); \ + f.arm.uregs[17] = cpsr; \ + } while (0) + #define SET_FRAME(f,r) \ + do { \ + /* Don't override the FPU status register. */\ + /* Use the value obtained from ptrace(). This*/\ + /* works, because our code does not perform */\ + /* any FPU operations, itself. */\ + long fps = (f).arm.uregs[16]; \ + errno = (f).errno_; \ + (r) = (f).arm; \ + (r).uregs[16] = fps; \ + } while (0) +#elif defined(__mips__) && defined(__GNUC__) + typedef struct Frame { + struct mips_regs mips_regs; + int errno_; + pid_t tid; + } Frame; + #define MIPSREG(n) ({ register unsigned long r __asm__("$"#n); r; }) + #define FRAME(f) Frame f = { 0 }; \ + do { \ + unsigned long hi, lo; \ + register unsigned long pc __asm__("$31"); \ + f.mips_regs.uregs[ 0] = MIPSREG( 0); \ + f.mips_regs.uregs[ 1] = MIPSREG( 1); \ + f.mips_regs.uregs[ 2] = MIPSREG( 2); \ + f.mips_regs.uregs[ 3] = MIPSREG( 3); \ + f.mips_regs.uregs[ 4] = MIPSREG( 4); \ + f.mips_regs.uregs[ 5] = MIPSREG( 5); \ + f.mips_regs.uregs[ 6] = MIPSREG( 6); \ + f.mips_regs.uregs[ 7] = MIPSREG( 7); \ + f.mips_regs.uregs[ 8] = MIPSREG( 8); \ + f.mips_regs.uregs[ 9] = MIPSREG( 9); \ + f.mips_regs.uregs[10] = MIPSREG(10); \ + f.mips_regs.uregs[11] = MIPSREG(11); \ + f.mips_regs.uregs[12] = MIPSREG(12); \ + f.mips_regs.uregs[13] = MIPSREG(13); \ + f.mips_regs.uregs[14] = MIPSREG(14); \ + f.mips_regs.uregs[15] = MIPSREG(15); \ + f.mips_regs.uregs[16] = MIPSREG(16); \ + f.mips_regs.uregs[17] = MIPSREG(17); \ + f.mips_regs.uregs[18] = MIPSREG(18); \ + f.mips_regs.uregs[19] = MIPSREG(19); \ + f.mips_regs.uregs[20] = MIPSREG(20); \ + f.mips_regs.uregs[21] = MIPSREG(21); \ + f.mips_regs.uregs[22] = MIPSREG(22); \ + f.mips_regs.uregs[23] = MIPSREG(23); \ + f.mips_regs.uregs[24] = MIPSREG(24); \ + f.mips_regs.uregs[25] = MIPSREG(25); \ + f.mips_regs.uregs[26] = MIPSREG(26); \ + f.mips_regs.uregs[27] = MIPSREG(27); \ + f.mips_regs.uregs[28] = MIPSREG(28); \ + f.mips_regs.uregs[29] = MIPSREG(29); \ + f.mips_regs.uregs[30] = MIPSREG(30); \ + f.mips_regs.uregs[31] = MIPSREG(31); \ + __asm__ volatile ("mfhi %0" : "=r"(hi)); \ + __asm__ volatile ("mflo %0" : "=r"(lo)); \ + __asm__ volatile ("jal 1f; 1:nop" : "=r"(pc)); \ + f.mips_regs.hi = hi; \ + f.mips_regs.lo = lo; \ + f.mips_regs.cp0_epc = pc; \ + f.errno_ = errno; \ + f.tid = sys_gettid(); \ + } while (0) + #define SET_FRAME(f,r) \ + do { \ + errno = (f).errno_; \ + memcpy((r).uregs, (f).mips_regs.uregs, \ + 32*sizeof(unsigned long)); \ + (r).hi = (f).mips_regs.hi; \ + (r).lo = (f).mips_regs.lo; \ + (r).cp0_epc = (f).mips_regs.cp0_epc; \ + } while (0) +#else + /* If we do not have a hand-optimized assembly version of the FRAME() + * macro, we cannot reliably unroll the stack. So, we show a few additional + * stack frames for the coredumper. + */ + typedef struct Frame { + pid_t tid; + } Frame; + #define FRAME(f) Frame f; do { f.tid = sys_gettid(); } while (0) + #define SET_FRAME(f,r) do { } while (0) +#endif + + +/* Internal function for generating a core file. This API can change without + * notice and is only supposed to be used internally by the core dumper. + * + * This function works for both single- and multi-threaded core + * dumps. If called as + * + * FRAME(frame); + * InternalGetCoreDump(&frame, 0, NULL, ap); + * + * it creates a core file that only contains information about the + * calling thread. + * + * Optionally, the caller can provide information about other threads + * by passing their process ids in "thread_pids". The process id of + * the caller should not be included in this array. All of the threads + * must have been attached to with ptrace(), prior to calling this + * function. They will be detached when "InternalGetCoreDump()" returns. + * + * This function either returns a file handle that can be read for obtaining + * a core dump, or "-1" in case of an error. In the latter case, "errno" + * will be set appropriately. + * + * While "InternalGetCoreDump()" is not technically async signal safe, you + * might be tempted to invoke it from a signal handler. The code goes to + * great lengths to make a best effort that this will actually work. But in + * any case, you must make sure that you preserve the value of "errno" + * yourself. It is guaranteed to be clobbered otherwise. + * + * Also, "InternalGetCoreDump" is not strictly speaking re-entrant. Again, + * it makes a best effort to behave reasonably when called in a multi- + * threaded environment, but it is ultimately the caller's responsibility + * to provide locking. + */ +int InternalGetCoreDump(void *frame, int num_threads, pid_t *thread_pids, + va_list ap + /* const struct CoreDumpParameters *params, + const char *file_name, + const char *PATH + */); + +#endif + +#ifdef __cplusplus +} +#endif +#endif /* _ELFCORE_H */ diff --git a/src/third_party/gperftools-2.5/src/base/googleinit.h b/src/third_party/gperftools-2.5/src/base/googleinit.h new file mode 100644 index 00000000000..3ea411a325a --- /dev/null +++ b/src/third_party/gperftools-2.5/src/base/googleinit.h @@ -0,0 +1,74 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Jacob Hoffman-Andrews + +#ifndef _GOOGLEINIT_H +#define _GOOGLEINIT_H + +#include "base/logging.h" + +class GoogleInitializer { + public: + typedef void (*VoidFunction)(void); + GoogleInitializer(const char* name, VoidFunction ctor, VoidFunction dtor) + : name_(name), destructor_(dtor) { + RAW_VLOG(10, "<GoogleModuleObject> constructing: %s\n", name_); + if (ctor) + ctor(); + } + ~GoogleInitializer() { + RAW_VLOG(10, "<GoogleModuleObject> destroying: %s\n", name_); + if (destructor_) + destructor_(); + } + + private: + const char* const name_; + const VoidFunction destructor_; +}; + +#define REGISTER_MODULE_INITIALIZER(name, body) \ + namespace { \ + static void google_init_module_##name () { body; } \ + GoogleInitializer google_initializer_module_##name(#name, \ + google_init_module_##name, NULL); \ + } + +#define REGISTER_MODULE_DESTRUCTOR(name, body) \ + namespace { \ + static void google_destruct_module_##name () { body; } \ + GoogleInitializer google_destructor_module_##name(#name, \ + NULL, google_destruct_module_##name); \ + } + + +#endif /* _GOOGLEINIT_H */ diff --git a/src/third_party/gperftools-2.5/src/base/linux_syscall_support.h b/src/third_party/gperftools-2.5/src/base/linux_syscall_support.h new file mode 100644 index 00000000000..5d578cd72dc --- /dev/null +++ b/src/third_party/gperftools-2.5/src/base/linux_syscall_support.h @@ -0,0 +1,2755 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +/* Copyright (c) 2005-2008, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: Markus Gutschke + */ + +/* This file includes Linux-specific support functions common to the + * coredumper and the thread lister; primarily, this is a collection + * of direct system calls, and a couple of symbols missing from + * standard header files. + * There are a few options that the including file can set to control + * the behavior of this file: + * + * SYS_CPLUSPLUS: + * The entire header file will normally be wrapped in 'extern "C" { }", + * making it suitable for compilation as both C and C++ source. If you + * do not want to do this, you can set the SYS_CPLUSPLUS macro to inhibit + * the wrapping. N.B. doing so will suppress inclusion of all prerequisite + * system header files, too. It is the caller's responsibility to provide + * the necessary definitions. + * + * SYS_ERRNO: + * All system calls will update "errno" unless overriden by setting the + * SYS_ERRNO macro prior to including this file. SYS_ERRNO should be + * an l-value. + * + * SYS_INLINE: + * New symbols will be defined "static inline", unless overridden by + * the SYS_INLINE macro. + * + * SYS_LINUX_SYSCALL_SUPPORT_H + * This macro is used to avoid multiple inclusions of this header file. + * If you need to include this file more than once, make sure to + * unset SYS_LINUX_SYSCALL_SUPPORT_H before each inclusion. + * + * SYS_PREFIX: + * New system calls will have a prefix of "sys_" unless overridden by + * the SYS_PREFIX macro. Valid values for this macro are [0..9] which + * results in prefixes "sys[0..9]_". It is also possible to set this + * macro to -1, which avoids all prefixes. + * + * This file defines a few internal symbols that all start with "LSS_". + * Do not access these symbols from outside this file. They are not part + * of the supported API. + * + * NOTE: This is a stripped down version of the official opensource + * version of linux_syscall_support.h, which lives at + * http://code.google.com/p/linux-syscall-support/ + * It includes only the syscalls that are used in perftools, plus a + * few extra. Here's the breakdown: + * 1) Perftools uses these: grep -rho 'sys_[a-z0-9_A-Z]* *(' src | sort -u + * sys__exit( + * sys_clone( + * sys_close( + * sys_fcntl( + * sys_fstat( + * sys_futex( + * sys_getcpu( + * sys_getdents64( + * sys_getppid( + * sys_gettid( + * sys_lseek( + * sys_mmap( + * sys_mremap( + * sys_munmap( + * sys_open( + * sys_pipe( + * sys_prctl( + * sys_ptrace( + * sys_ptrace_detach( + * sys_read( + * sys_sched_yield( + * sys_sigaction( + * sys_sigaltstack( + * sys_sigdelset( + * sys_sigfillset( + * sys_sigprocmask( + * sys_socket( + * sys_stat( + * sys_waitpid( + * 2) These are used as subroutines of the above: + * sys_getpid -- gettid + * sys_kill -- ptrace_detach + * sys_restore -- sigaction + * sys_restore_rt -- sigaction + * sys_socketcall -- socket + * sys_wait4 -- waitpid + * 3) I left these in even though they're not used. They either + * complement the above (write vs read) or are variants (rt_sigaction): + * sys_fstat64 + * sys_llseek + * sys_mmap2 + * sys_openat + * sys_getdents + * sys_rt_sigaction + * sys_rt_sigprocmask + * sys_sigaddset + * sys_sigemptyset + * sys_stat64 + * sys_write + */ +#ifndef SYS_LINUX_SYSCALL_SUPPORT_H +#define SYS_LINUX_SYSCALL_SUPPORT_H + +/* We currently only support x86-32, x86-64, ARM, MIPS, PPC/PPC64, Aarch64 and s390x on Linux. + * Porting to other related platforms should not be difficult. + */ +#if (defined(__i386__) || defined(__x86_64__) || defined(__arm__) || \ + defined(__mips__) || defined(__PPC__) || \ + defined(__aarch64__) || defined(__s390x__)) \ + && (defined(__linux)) + +#ifndef SYS_CPLUSPLUS +#ifdef __cplusplus +/* Some system header files in older versions of gcc neglect to properly + * handle being included from C++. As it appears to be harmless to have + * multiple nested 'extern "C"' blocks, just add another one here. + */ +extern "C" { +#endif + +#include <errno.h> +#include <signal.h> +#include <stdarg.h> +#include <stddef.h> +#include <stdint.h> +#include <string.h> +#include <sys/ptrace.h> +#include <sys/resource.h> +#include <sys/time.h> +#include <sys/types.h> +#include <syscall.h> +#include <unistd.h> +#include <linux/unistd.h> +#include <endian.h> + +#ifdef __mips__ +/* Include definitions of the ABI currently in use. */ +#include <sgidefs.h> +#endif + +#endif + +/* As glibc often provides subtly incompatible data structures (and implicit + * wrapper functions that convert them), we provide our own kernel data + * structures for use by the system calls. + * These structures have been developed by using Linux 2.6.23 headers for + * reference. Note though, we do not care about exact API compatibility + * with the kernel, and in fact the kernel often does not have a single + * API that works across architectures. Instead, we try to mimic the glibc + * API where reasonable, and only guarantee ABI compatibility with the + * kernel headers. + * Most notably, here are a few changes that were made to the structures + * defined by kernel headers: + * + * - we only define structures, but not symbolic names for kernel data + * types. For the latter, we directly use the native C datatype + * (i.e. "unsigned" instead of "mode_t"). + * - in a few cases, it is possible to define identical structures for + * both 32bit (e.g. i386) and 64bit (e.g. x86-64) platforms by + * standardizing on the 64bit version of the data types. In particular, + * this means that we use "unsigned" where the 32bit headers say + * "unsigned long". + * - overall, we try to minimize the number of cases where we need to + * conditionally define different structures. + * - the "struct kernel_sigaction" class of structures have been + * modified to more closely mimic glibc's API by introducing an + * anonymous union for the function pointer. + * - a small number of field names had to have an underscore appended to + * them, because glibc defines a global macro by the same name. + */ + +/* include/linux/dirent.h */ +struct kernel_dirent64 { + unsigned long long d_ino; + long long d_off; + unsigned short d_reclen; + unsigned char d_type; + char d_name[256]; +}; + +/* include/linux/dirent.h */ +struct kernel_dirent { + long d_ino; + long d_off; + unsigned short d_reclen; + char d_name[256]; +}; + +/* include/linux/time.h */ +struct kernel_timespec { + long tv_sec; + long tv_nsec; +}; + +/* include/linux/time.h */ +struct kernel_timeval { + long tv_sec; + long tv_usec; +}; + +/* include/linux/resource.h */ +struct kernel_rusage { + struct kernel_timeval ru_utime; + struct kernel_timeval ru_stime; + long ru_maxrss; + long ru_ixrss; + long ru_idrss; + long ru_isrss; + long ru_minflt; + long ru_majflt; + long ru_nswap; + long ru_inblock; + long ru_oublock; + long ru_msgsnd; + long ru_msgrcv; + long ru_nsignals; + long ru_nvcsw; + long ru_nivcsw; +}; + +#if defined(__i386__) || defined(__arm__) || defined(__PPC__) + +/* include/asm-{arm,i386,mips,ppc}/signal.h */ +struct kernel_old_sigaction { + union { + void (*sa_handler_)(int); + void (*sa_sigaction_)(int, siginfo_t *, void *); + }; + unsigned long sa_mask; + unsigned long sa_flags; + void (*sa_restorer)(void); +} __attribute__((packed,aligned(4))); +#elif (defined(__mips__) && _MIPS_SIM == _MIPS_SIM_ABI32) + #define kernel_old_sigaction kernel_sigaction +#elif defined(__aarch64__) || defined(__s390x__) + // No kernel_old_sigaction defined for arm64 or s390x. +#endif + +/* Some kernel functions (e.g. sigaction() in 2.6.23) require that the + * exactly match the size of the signal set, even though the API was + * intended to be extensible. We define our own KERNEL_NSIG to deal with + * this. + * Please note that glibc provides signals [1.._NSIG-1], whereas the + * kernel (and this header) provides the range [1..KERNEL_NSIG]. The + * actual number of signals is obviously the same, but the constants + * differ by one. + */ +#ifdef __mips__ +#define KERNEL_NSIG 128 +#else +#define KERNEL_NSIG 64 +#endif + +/* include/asm-{arm,i386,mips,x86_64}/signal.h */ +struct kernel_sigset_t { + unsigned long sig[(KERNEL_NSIG + 8*sizeof(unsigned long) - 1)/ + (8*sizeof(unsigned long))]; +}; + +/* include/asm-{arm,generic,i386,mips,x86_64,ppc}/signal.h */ +struct kernel_sigaction { +#ifdef __mips__ + unsigned long sa_flags; + union { + void (*sa_handler_)(int); + void (*sa_sigaction_)(int, siginfo_t *, void *); + }; + struct kernel_sigset_t sa_mask; +#else + union { + void (*sa_handler_)(int); + void (*sa_sigaction_)(int, siginfo_t *, void *); + }; + unsigned long sa_flags; + void (*sa_restorer)(void); + struct kernel_sigset_t sa_mask; +#endif +}; + +/* include/asm-{arm,i386,mips,ppc}/stat.h */ +#ifdef __mips__ +#if _MIPS_SIM == _MIPS_SIM_ABI64 +struct kernel_stat { +#else +struct kernel_stat64 { +#endif + unsigned st_dev; + unsigned __pad0[3]; + unsigned long long st_ino; + unsigned st_mode; + unsigned st_nlink; + unsigned st_uid; + unsigned st_gid; + unsigned st_rdev; + unsigned __pad1[3]; + long long st_size; + unsigned st_atime_; + unsigned st_atime_nsec_; + unsigned st_mtime_; + unsigned st_mtime_nsec_; + unsigned st_ctime_; + unsigned st_ctime_nsec_; + unsigned st_blksize; + unsigned __pad2; + unsigned long long st_blocks; +}; +#elif defined __PPC__ +struct kernel_stat64 { + unsigned long long st_dev; + unsigned long long st_ino; + unsigned st_nlink; + unsigned st_mode; + unsigned st_uid; + unsigned st_gid; + int __pad2; + unsigned long long st_rdev; + long long st_size; + long long st_blksize; + long long st_blocks; + kernel_timespec st_atim; + kernel_timespec st_mtim; + kernel_timespec st_ctim; + unsigned long __unused4; + unsigned long __unused5; + unsigned long __unused6; +}; +#else +struct kernel_stat64 { + unsigned long long st_dev; + unsigned char __pad0[4]; + unsigned __st_ino; + unsigned st_mode; + unsigned st_nlink; + unsigned st_uid; + unsigned st_gid; + unsigned long long st_rdev; + unsigned char __pad3[4]; + long long st_size; + unsigned st_blksize; + unsigned long long st_blocks; + unsigned st_atime_; + unsigned st_atime_nsec_; + unsigned st_mtime_; + unsigned st_mtime_nsec_; + unsigned st_ctime_; + unsigned st_ctime_nsec_; + unsigned long long st_ino; +}; +#endif + +/* include/asm-{arm,generic,i386,mips,x86_64,ppc,s390}/stat.h */ +#if defined(__i386__) || defined(__arm__) +struct kernel_stat { + /* The kernel headers suggest that st_dev and st_rdev should be 32bit + * quantities encoding 12bit major and 20bit minor numbers in an interleaved + * format. In reality, we do not see useful data in the top bits. So, + * we'll leave the padding in here, until we find a better solution. + */ + unsigned short st_dev; + short pad1; + unsigned st_ino; + unsigned short st_mode; + unsigned short st_nlink; + unsigned short st_uid; + unsigned short st_gid; + unsigned short st_rdev; + short pad2; + unsigned st_size; + unsigned st_blksize; + unsigned st_blocks; + unsigned st_atime_; + unsigned st_atime_nsec_; + unsigned st_mtime_; + unsigned st_mtime_nsec_; + unsigned st_ctime_; + unsigned st_ctime_nsec_; + unsigned __unused4; + unsigned __unused5; +}; +#elif defined(__x86_64__) +struct kernel_stat { + uint64_t st_dev; + uint64_t st_ino; + uint64_t st_nlink; + unsigned st_mode; + unsigned st_uid; + unsigned st_gid; + unsigned __pad0; + uint64_t st_rdev; + int64_t st_size; + int64_t st_blksize; + int64_t st_blocks; + uint64_t st_atime_; + uint64_t st_atime_nsec_; + uint64_t st_mtime_; + uint64_t st_mtime_nsec_; + uint64_t st_ctime_; + uint64_t st_ctime_nsec_; + int64_t __unused[3]; +}; +#elif defined(__PPC__) +struct kernel_stat { + unsigned long long st_dev; + unsigned long st_ino; + unsigned long st_nlink; + unsigned long st_mode; + unsigned st_uid; + unsigned st_gid; + int __pad2; + unsigned long long st_rdev; + long st_size; + unsigned long st_blksize; + unsigned long st_blocks; + kernel_timespec st_atim; + kernel_timespec st_mtim; + kernel_timespec st_ctim; + unsigned long __unused4; + unsigned long __unused5; + unsigned long __unused6; +}; +#elif (defined(__mips__) && _MIPS_SIM != _MIPS_SIM_ABI64) +struct kernel_stat { + unsigned st_dev; + int st_pad1[3]; + unsigned st_ino; + unsigned st_mode; + unsigned st_nlink; + unsigned st_uid; + unsigned st_gid; + unsigned st_rdev; + int st_pad2[2]; + long st_size; + int st_pad3; + long st_atime_; + long st_atime_nsec_; + long st_mtime_; + long st_mtime_nsec_; + long st_ctime_; + long st_ctime_nsec_; + int st_blksize; + int st_blocks; + int st_pad4[14]; +}; +#elif defined(__aarch64__) +struct kernel_stat { + unsigned long st_dev; + unsigned long st_ino; + unsigned int st_mode; + unsigned int st_nlink; + unsigned int st_uid; + unsigned int st_gid; + unsigned long st_rdev; + unsigned long __pad1; + long st_size; + int st_blksize; + int __pad2; + long st_blocks; + long st_atime_; + unsigned long st_atime_nsec_; + long st_mtime_; + unsigned long st_mtime_nsec_; + long st_ctime_; + unsigned long st_ctime_nsec_; + unsigned int __unused4; + unsigned int __unused5; +}; +#elif defined(__s390x__) +struct kernel_stat { + unsigned long st_dev; + unsigned long st_ino; + unsigned long st_nlink; + unsigned int st_mode; + unsigned int st_uid; + unsigned int st_gid; + unsigned int __pad1; + unsigned long st_rdev; + unsigned long st_size; + unsigned long st_atime_; + unsigned long st_atime_nsec_; + unsigned long st_mtime_; + unsigned long st_mtime_nsec_; + unsigned long st_ctime_; + unsigned long st_ctime_nsec_; + unsigned long st_blksize; + long st_blocks; + unsigned long __unused[3]; +}; +#endif + + +/* Definitions missing from the standard header files */ +#ifndef O_DIRECTORY +#if defined(__arm__) +#define O_DIRECTORY 0040000 +#else +#define O_DIRECTORY 0200000 +#endif +#endif +#ifndef PR_GET_DUMPABLE +#define PR_GET_DUMPABLE 3 +#endif +#ifndef PR_SET_DUMPABLE +#define PR_SET_DUMPABLE 4 +#endif +#ifndef AT_FDCWD +#define AT_FDCWD (-100) +#endif +#ifndef AT_SYMLINK_NOFOLLOW +#define AT_SYMLINK_NOFOLLOW 0x100 +#endif +#ifndef AT_REMOVEDIR +#define AT_REMOVEDIR 0x200 +#endif +#ifndef MREMAP_FIXED +#define MREMAP_FIXED 2 +#endif +#ifndef SA_RESTORER +#define SA_RESTORER 0x04000000 +#endif + +#if defined(__i386__) +#ifndef __NR_rt_sigaction +#define __NR_rt_sigaction 174 +#define __NR_rt_sigprocmask 175 +#endif +#ifndef __NR_stat64 +#define __NR_stat64 195 +#endif +#ifndef __NR_fstat64 +#define __NR_fstat64 197 +#endif +#ifndef __NR_getdents64 +#define __NR_getdents64 220 +#endif +#ifndef __NR_gettid +#define __NR_gettid 224 +#endif +#ifndef __NR_futex +#define __NR_futex 240 +#endif +#ifndef __NR_openat +#define __NR_openat 295 +#endif +#ifndef __NR_getcpu +#define __NR_getcpu 318 +#endif +/* End of i386 definitions */ +#elif defined(__arm__) +#ifndef __syscall +#if defined(__thumb__) || defined(__ARM_EABI__) +#define __SYS_REG(name) register long __sysreg __asm__("r6") = __NR_##name; +#define __SYS_REG_LIST(regs...) [sysreg] "r" (__sysreg) , ##regs +#define __syscall(name) "swi\t0" +#define __syscall_safe(name) \ + "push {r7}\n" \ + "mov r7,%[sysreg]\n" \ + __syscall(name)"\n" \ + "pop {r7}" +#else +#define __SYS_REG(name) +#define __SYS_REG_LIST(regs...) regs +#define __syscall(name) "swi\t" __sys1(__NR_##name) "" +#define __syscall_safe(name) __syscall(name) +#endif +#endif +#ifndef __NR_rt_sigaction +#define __NR_rt_sigaction (__NR_SYSCALL_BASE + 174) +#define __NR_rt_sigprocmask (__NR_SYSCALL_BASE + 175) +#endif +#ifndef __NR_stat64 +#define __NR_stat64 (__NR_SYSCALL_BASE + 195) +#endif +#ifndef __NR_fstat64 +#define __NR_fstat64 (__NR_SYSCALL_BASE + 197) +#endif +#ifndef __NR_getdents64 +#define __NR_getdents64 (__NR_SYSCALL_BASE + 217) +#endif +#ifndef __NR_gettid +#define __NR_gettid (__NR_SYSCALL_BASE + 224) +#endif +#ifndef __NR_futex +#define __NR_futex (__NR_SYSCALL_BASE + 240) +#endif +/* End of ARM definitions */ +#elif defined(__x86_64__) +#ifndef __NR_gettid +#define __NR_gettid 186 +#endif +#ifndef __NR_futex +#define __NR_futex 202 +#endif +#ifndef __NR_getdents64 +#define __NR_getdents64 217 +#endif +#ifndef __NR_openat +#define __NR_openat 257 +#endif +/* End of x86-64 definitions */ +#elif defined(__mips__) +#if _MIPS_SIM == _MIPS_SIM_ABI32 +#ifndef __NR_rt_sigaction +#define __NR_rt_sigaction (__NR_Linux + 194) +#define __NR_rt_sigprocmask (__NR_Linux + 195) +#endif +#ifndef __NR_stat64 +#define __NR_stat64 (__NR_Linux + 213) +#endif +#ifndef __NR_fstat64 +#define __NR_fstat64 (__NR_Linux + 215) +#endif +#ifndef __NR_getdents64 +#define __NR_getdents64 (__NR_Linux + 219) +#endif +#ifndef __NR_gettid +#define __NR_gettid (__NR_Linux + 222) +#endif +#ifndef __NR_futex +#define __NR_futex (__NR_Linux + 238) +#endif +#ifndef __NR_openat +#define __NR_openat (__NR_Linux + 288) +#endif +#ifndef __NR_fstatat +#define __NR_fstatat (__NR_Linux + 293) +#endif +#ifndef __NR_getcpu +#define __NR_getcpu (__NR_Linux + 312) +#endif +/* End of MIPS (old 32bit API) definitions */ +#elif _MIPS_SIM == _MIPS_SIM_ABI64 +#ifndef __NR_gettid +#define __NR_gettid (__NR_Linux + 178) +#endif +#ifndef __NR_futex +#define __NR_futex (__NR_Linux + 194) +#endif +#ifndef __NR_openat +#define __NR_openat (__NR_Linux + 247) +#endif +#ifndef __NR_fstatat +#define __NR_fstatat (__NR_Linux + 252) +#endif +#ifndef __NR_getcpu +#define __NR_getcpu (__NR_Linux + 271) +#endif +/* End of MIPS (64bit API) definitions */ +#else +#ifndef __NR_gettid +#define __NR_gettid (__NR_Linux + 178) +#endif +#ifndef __NR_futex +#define __NR_futex (__NR_Linux + 194) +#endif +#ifndef __NR_openat +#define __NR_openat (__NR_Linux + 251) +#endif +#ifndef __NR_fstatat +#define __NR_fstatat (__NR_Linux + 256) +#endif +#ifndef __NR_getcpu +#define __NR_getcpu (__NR_Linux + 275) +#endif +/* End of MIPS (new 32bit API) definitions */ +#endif +/* End of MIPS definitions */ +#elif defined(__PPC__) +#ifndef __NR_rt_sigaction +#define __NR_rt_sigaction 173 +#define __NR_rt_sigprocmask 174 +#endif +#ifndef __NR_stat64 +#define __NR_stat64 195 +#endif +#ifndef __NR_fstat64 +#define __NR_fstat64 197 +#endif +#ifndef __NR_socket +#define __NR_socket 198 +#endif +#ifndef __NR_getdents64 +#define __NR_getdents64 202 +#endif +#ifndef __NR_gettid +#define __NR_gettid 207 +#endif +#ifndef __NR_futex +#define __NR_futex 221 +#endif +#ifndef __NR_openat +#define __NR_openat 286 +#endif +#ifndef __NR_getcpu +#define __NR_getcpu 302 +#endif +/* End of powerpc defininitions */ +#elif defined(__aarch64__) +#ifndef __NR_fstatat +#define __NR_fstatat 79 +#endif +/* End of aarch64 defininitions */ +#elif defined(__s390x__) +#ifndef __NR_quotactl +#define __NR_quotactl 131 +#endif +#ifndef __NR_rt_sigreturn +#define __NR_rt_sigreturn 173 +#endif +#ifndef __NR_rt_sigaction +#define __NR_rt_sigaction 174 +#endif +#ifndef __NR_rt_sigprocmask +#define __NR_rt_sigprocmask 175 +#endif +#ifndef __NR_rt_sigpending +#define __NR_rt_sigpending 176 +#endif +#ifndef __NR_rt_sigsuspend +#define __NR_rt_sigsuspend 179 +#endif +#ifndef __NR_pread64 +#define __NR_pread64 180 +#endif +#ifndef __NR_pwrite64 +#define __NR_pwrite64 181 +#endif +#ifndef __NR_getrlimit +#define __NR_getrlimit 191 +#endif +#ifndef __NR_setresuid +#define __NR_setresuid 208 +#endif +#ifndef __NR_getresuid +#define __NR_getresuid 209 +#endif +#ifndef __NR_setresgid +#define __NR_setresgid 210 +#endif +#ifndef __NR_getresgid +#define __NR_getresgid 211 +#endif +#ifndef __NR_setfsuid +#define __NR_setfsuid 215 +#endif +#ifndef __NR_setfsgid +#define __NR_setfsgid 216 +#endif +#ifndef __NR_getdents64 +#define __NR_getdents64 220 +#endif +#ifndef __NR_readahead +#define __NR_readahead 222 +#endif +#ifndef __NR_setxattr +#define __NR_setxattr 224 +#endif +#ifndef __NR_lsetxattr +#define __NR_lsetxattr 225 +#endif +#ifndef __NR_getxattr +#define __NR_getxattr 227 +#endif +#ifndef __NR_lgetxattr +#define __NR_lgetxattr 228 +#endif +#ifndef __NR_listxattr +#define __NR_listxattr 230 +#endif +#ifndef __NR_llistxattr +#define __NR_llistxattr 231 +#endif +#ifndef __NR_gettid +#define __NR_gettid 236 +#endif +#ifndef __NR_tkill +#define __NR_tkill 237 +#endif +#ifndef __NR_futex +#define __NR_futex 238 +#endif +#ifndef __NR_sched_setaffinity +#define __NR_sched_setaffinity 239 +#endif +#ifndef __NR_sched_getaffinity +#define __NR_sched_getaffinity 240 +#endif +#ifndef __NR_set_tid_address +#define __NR_set_tid_address 252 +#endif +#ifndef __NR_fadvise64 +#define __NR_fadvise64 253 +#endif +#ifndef __NR_clock_gettime +#define __NR_clock_gettime 260 +#endif +#ifndef __NR_clock_getres +#define __NR_clock_getres 261 +#endif +#ifndef __NR_statfs64 +#define __NR_statfs64 265 +#endif +#ifndef __NR_fstatfs64 +#define __NR_fstatfs64 266 +#endif +#ifndef __NR_ioprio_set +#define __NR_ioprio_set 282 +#endif +#ifndef __NR_ioprio_get +#define __NR_ioprio_get 283 +#endif +#ifndef __NR_openat +#define __NR_openat 288 +#endif +#ifndef __NR_newfstatat +#define __NR_newfstatat 293 +#endif +#ifndef __NR_unlinkat +#define __NR_unlinkat 294 +#endif +#ifndef __NR_move_pages +#define __NR_move_pages 310 +#endif +#ifndef __NR_getcpu +#define __NR_getcpu 311 +#endif +#ifndef __NR_fallocate +#define __NR_fallocate 314 +#endif +/* End of s390x definitions */ +#endif + + +/* After forking, we must make sure to only call system calls. */ +#if __BOUNDED_POINTERS__ + #error "Need to port invocations of syscalls for bounded ptrs" +#else + /* The core dumper and the thread lister get executed after threads + * have been suspended. As a consequence, we cannot call any functions + * that acquire locks. Unfortunately, libc wraps most system calls + * (e.g. in order to implement pthread_atfork, and to make calls + * cancellable), which means we cannot call these functions. Instead, + * we have to call syscall() directly. + */ + #undef LSS_ERRNO + #ifdef SYS_ERRNO + /* Allow the including file to override the location of errno. This can + * be useful when using clone() with the CLONE_VM option. + */ + #define LSS_ERRNO SYS_ERRNO + #else + #define LSS_ERRNO errno + #endif + + #undef LSS_INLINE + #ifdef SYS_INLINE + #define LSS_INLINE SYS_INLINE + #else + #define LSS_INLINE static inline + #endif + + /* Allow the including file to override the prefix used for all new + * system calls. By default, it will be set to "sys_". + */ + #undef LSS_NAME + #ifndef SYS_PREFIX + #define LSS_NAME(name) sys_##name + #elif SYS_PREFIX < 0 + #define LSS_NAME(name) name + #elif SYS_PREFIX == 0 + #define LSS_NAME(name) sys0_##name + #elif SYS_PREFIX == 1 + #define LSS_NAME(name) sys1_##name + #elif SYS_PREFIX == 2 + #define LSS_NAME(name) sys2_##name + #elif SYS_PREFIX == 3 + #define LSS_NAME(name) sys3_##name + #elif SYS_PREFIX == 4 + #define LSS_NAME(name) sys4_##name + #elif SYS_PREFIX == 5 + #define LSS_NAME(name) sys5_##name + #elif SYS_PREFIX == 6 + #define LSS_NAME(name) sys6_##name + #elif SYS_PREFIX == 7 + #define LSS_NAME(name) sys7_##name + #elif SYS_PREFIX == 8 + #define LSS_NAME(name) sys8_##name + #elif SYS_PREFIX == 9 + #define LSS_NAME(name) sys9_##name + #endif + + #undef LSS_RETURN + #if (defined(__i386__) || defined(__x86_64__) || defined(__arm__) || \ + defined(__aarch64__) || defined(__s390x__)) + /* Failing system calls return a negative result in the range of + * -1..-4095. These are "errno" values with the sign inverted. + */ + #define LSS_RETURN(type, res) \ + do { \ + if ((unsigned long)(res) >= (unsigned long)(-4095)) { \ + LSS_ERRNO = -(res); \ + res = -1; \ + } \ + return (type) (res); \ + } while (0) + #elif defined(__mips__) + /* On MIPS, failing system calls return -1, and set errno in a + * separate CPU register. + */ + #define LSS_RETURN(type, res, err) \ + do { \ + if (err) { \ + LSS_ERRNO = (res); \ + res = -1; \ + } \ + return (type) (res); \ + } while (0) + #elif defined(__PPC__) + /* On PPC, failing system calls return -1, and set errno in a + * separate CPU register. See linux/unistd.h. + */ + #define LSS_RETURN(type, res, err) \ + do { \ + if (err & 0x10000000 ) { \ + LSS_ERRNO = (res); \ + res = -1; \ + } \ + return (type) (res); \ + } while (0) + #endif + #if defined(__i386__) + #if defined(NO_FRAME_POINTER) && (100 * __GNUC__ + __GNUC_MINOR__ >= 404) + /* This only works for GCC-4.4 and above -- the first version to use + .cfi directives for dwarf unwind info. */ + #define CFI_ADJUST_CFA_OFFSET(adjust) \ + ".cfi_adjust_cfa_offset " #adjust "\n" + #else + #define CFI_ADJUST_CFA_OFFSET(adjust) /**/ + #endif + + /* In PIC mode (e.g. when building shared libraries), gcc for i386 + * reserves ebx. Unfortunately, most distribution ship with implementations + * of _syscallX() which clobber ebx. + * Also, most definitions of _syscallX() neglect to mark "memory" as being + * clobbered. This causes problems with compilers, that do a better job + * at optimizing across __asm__ calls. + * So, we just have to redefine all of the _syscallX() macros. + */ + #undef LSS_BODY + #define LSS_BODY(type,args...) \ + long __res; \ + __asm__ __volatile__("push %%ebx\n" \ + CFI_ADJUST_CFA_OFFSET(4) \ + "movl %2,%%ebx\n" \ + "int $0x80\n" \ + "pop %%ebx\n" \ + CFI_ADJUST_CFA_OFFSET(-4) \ + args \ + : "esp", "memory"); \ + LSS_RETURN(type,__res) + #undef _syscall0 + #define _syscall0(type,name) \ + type LSS_NAME(name)(void) { \ + long __res; \ + __asm__ volatile("int $0x80" \ + : "=a" (__res) \ + : "0" (__NR_##name) \ + : "memory"); \ + LSS_RETURN(type,__res); \ + } + #undef _syscall1 + #define _syscall1(type,name,type1,arg1) \ + type LSS_NAME(name)(type1 arg1) { \ + LSS_BODY(type, \ + : "=a" (__res) \ + : "0" (__NR_##name), "ri" ((long)(arg1))); \ + } + #undef _syscall2 + #define _syscall2(type,name,type1,arg1,type2,arg2) \ + type LSS_NAME(name)(type1 arg1,type2 arg2) { \ + LSS_BODY(type, \ + : "=a" (__res) \ + : "0" (__NR_##name),"ri" ((long)(arg1)), "c" ((long)(arg2))); \ + } + #undef _syscall3 + #define _syscall3(type,name,type1,arg1,type2,arg2,type3,arg3) \ + type LSS_NAME(name)(type1 arg1,type2 arg2,type3 arg3) { \ + LSS_BODY(type, \ + : "=a" (__res) \ + : "0" (__NR_##name), "ri" ((long)(arg1)), "c" ((long)(arg2)), \ + "d" ((long)(arg3))); \ + } + #undef _syscall4 + #define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4) { \ + LSS_BODY(type, \ + : "=a" (__res) \ + : "0" (__NR_##name), "ri" ((long)(arg1)), "c" ((long)(arg2)), \ + "d" ((long)(arg3)),"S" ((long)(arg4))); \ + } + #undef _syscall5 + #define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \ + type5,arg5) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ + type5 arg5) { \ + long __res; \ + __asm__ __volatile__("push %%ebx\n" \ + "movl %2,%%ebx\n" \ + "movl %1,%%eax\n" \ + "int $0x80\n" \ + "pop %%ebx" \ + : "=a" (__res) \ + : "i" (__NR_##name), "ri" ((long)(arg1)), \ + "c" ((long)(arg2)), "d" ((long)(arg3)), \ + "S" ((long)(arg4)), "D" ((long)(arg5)) \ + : "esp", "memory"); \ + LSS_RETURN(type,__res); \ + } + #undef _syscall6 + #define _syscall6(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \ + type5,arg5,type6,arg6) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ + type5 arg5, type6 arg6) { \ + long __res; \ + struct { long __a1; long __a6; } __s = { (long)arg1, (long) arg6 }; \ + __asm__ __volatile__("push %%ebp\n" \ + "push %%ebx\n" \ + "movl 4(%2),%%ebp\n" \ + "movl 0(%2), %%ebx\n" \ + "movl %1,%%eax\n" \ + "int $0x80\n" \ + "pop %%ebx\n" \ + "pop %%ebp" \ + : "=a" (__res) \ + : "i" (__NR_##name), "0" ((long)(&__s)), \ + "c" ((long)(arg2)), "d" ((long)(arg3)), \ + "S" ((long)(arg4)), "D" ((long)(arg5)) \ + : "esp", "memory"); \ + LSS_RETURN(type,__res); \ + } + LSS_INLINE int LSS_NAME(clone)(int (*fn)(void *), void *child_stack, + int flags, void *arg, int *parent_tidptr, + void *newtls, int *child_tidptr) { + long __res; + __asm__ __volatile__(/* if (fn == NULL) + * return -EINVAL; + */ + "movl %3,%%ecx\n" + "jecxz 1f\n" + + /* if (child_stack == NULL) + * return -EINVAL; + */ + "movl %4,%%ecx\n" + "jecxz 1f\n" + + /* Set up alignment of the child stack: + * child_stack = (child_stack & ~0xF) - 20; + */ + "andl $-16,%%ecx\n" + "subl $20,%%ecx\n" + + /* Push "arg" and "fn" onto the stack that will be + * used by the child. + */ + "movl %6,%%eax\n" + "movl %%eax,4(%%ecx)\n" + "movl %3,%%eax\n" + "movl %%eax,(%%ecx)\n" + + /* %eax = syscall(%eax = __NR_clone, + * %ebx = flags, + * %ecx = child_stack, + * %edx = parent_tidptr, + * %esi = newtls, + * %edi = child_tidptr) + * Also, make sure that %ebx gets preserved as it is + * used in PIC mode. + */ + "movl %8,%%esi\n" + "movl %7,%%edx\n" + "movl %5,%%eax\n" + "movl %9,%%edi\n" + "pushl %%ebx\n" + "movl %%eax,%%ebx\n" + "movl %2,%%eax\n" + "int $0x80\n" + + /* In the parent: restore %ebx + * In the child: move "fn" into %ebx + */ + "popl %%ebx\n" + + /* if (%eax != 0) + * return %eax; + */ + "test %%eax,%%eax\n" + "jnz 1f\n" + + /* In the child, now. Terminate frame pointer chain. + */ + "movl $0,%%ebp\n" + + /* Call "fn". "arg" is already on the stack. + */ + "call *%%ebx\n" + + /* Call _exit(%ebx). Unfortunately older versions + * of gcc restrict the number of arguments that can + * be passed to asm(). So, we need to hard-code the + * system call number. + */ + "movl %%eax,%%ebx\n" + "movl $1,%%eax\n" + "int $0x80\n" + + /* Return to parent. + */ + "1:\n" + : "=a" (__res) + : "0"(-EINVAL), "i"(__NR_clone), + "m"(fn), "m"(child_stack), "m"(flags), "m"(arg), + "m"(parent_tidptr), "m"(newtls), "m"(child_tidptr) + : "esp", "memory", "ecx", "edx", "esi", "edi"); + LSS_RETURN(int, __res); + } + + LSS_INLINE void (*LSS_NAME(restore_rt)(void))(void) { + /* On i386, the kernel does not know how to return from a signal + * handler. Instead, it relies on user space to provide a + * restorer function that calls the {rt_,}sigreturn() system call. + * Unfortunately, we cannot just reference the glibc version of this + * function, as glibc goes out of its way to make it inaccessible. + */ + void (*res)(void); + __asm__ __volatile__("call 2f\n" + "0:.align 16\n" + "1:movl %1,%%eax\n" + "int $0x80\n" + "2:popl %0\n" + "addl $(1b-0b),%0\n" + : "=a" (res) + : "i" (__NR_rt_sigreturn)); + return res; + } + LSS_INLINE void (*LSS_NAME(restore)(void))(void) { + /* On i386, the kernel does not know how to return from a signal + * handler. Instead, it relies on user space to provide a + * restorer function that calls the {rt_,}sigreturn() system call. + * Unfortunately, we cannot just reference the glibc version of this + * function, as glibc goes out of its way to make it inaccessible. + */ + void (*res)(void); + __asm__ __volatile__("call 2f\n" + "0:.align 16\n" + "1:pop %%eax\n" + "movl %1,%%eax\n" + "int $0x80\n" + "2:popl %0\n" + "addl $(1b-0b),%0\n" + : "=a" (res) + : "i" (__NR_sigreturn)); + return res; + } + #elif defined(__x86_64__) + /* There are no known problems with any of the _syscallX() macros + * currently shipping for x86_64, but we still need to be able to define + * our own version so that we can override the location of the errno + * location (e.g. when using the clone() system call with the CLONE_VM + * option). + */ + #undef LSS_ENTRYPOINT + #define LSS_ENTRYPOINT "syscall\n" + + /* The x32 ABI has 32 bit longs, but the syscall interface is 64 bit. + * We need to explicitly cast to an unsigned 64 bit type to avoid implicit + * sign extension. We can't cast pointers directly because those are + * 32 bits, and gcc will dump ugly warnings about casting from a pointer + * to an integer of a different size. + */ + #undef LSS_SYSCALL_ARG + #define LSS_SYSCALL_ARG(a) ((uint64_t)(uintptr_t)(a)) + #undef _LSS_RETURN + #define _LSS_RETURN(type, res, cast) \ + do { \ + if ((uint64_t)(res) >= (uint64_t)(-4095)) { \ + LSS_ERRNO = -(res); \ + res = -1; \ + } \ + return (type)(cast)(res); \ + } while (0) + #undef LSS_RETURN + #define LSS_RETURN(type, res) _LSS_RETURN(type, res, uintptr_t) + + #undef _LSS_BODY + #define _LSS_BODY(nr, type, name, cast, ...) \ + long long __res; \ + __asm__ __volatile__(LSS_BODY_ASM##nr LSS_ENTRYPOINT \ + : "=a" (__res) \ + : "0" (__NR_##name) LSS_BODY_ARG##nr(__VA_ARGS__) \ + : LSS_BODY_CLOBBER##nr "r11", "rcx", "memory"); \ + _LSS_RETURN(type, __res, cast) + #undef LSS_BODY + #define LSS_BODY(nr, type, name, args...) \ + _LSS_BODY(nr, type, name, uintptr_t, ## args) + + #undef LSS_BODY_ASM0 + #undef LSS_BODY_ASM1 + #undef LSS_BODY_ASM2 + #undef LSS_BODY_ASM3 + #undef LSS_BODY_ASM4 + #undef LSS_BODY_ASM5 + #undef LSS_BODY_ASM6 + #define LSS_BODY_ASM0 + #define LSS_BODY_ASM1 LSS_BODY_ASM0 + #define LSS_BODY_ASM2 LSS_BODY_ASM1 + #define LSS_BODY_ASM3 LSS_BODY_ASM2 + #define LSS_BODY_ASM4 LSS_BODY_ASM3 "movq %5,%%r10;" + #define LSS_BODY_ASM5 LSS_BODY_ASM4 "movq %6,%%r8;" + #define LSS_BODY_ASM6 LSS_BODY_ASM5 "movq %7,%%r9;" + + #undef LSS_BODY_CLOBBER0 + #undef LSS_BODY_CLOBBER1 + #undef LSS_BODY_CLOBBER2 + #undef LSS_BODY_CLOBBER3 + #undef LSS_BODY_CLOBBER4 + #undef LSS_BODY_CLOBBER5 + #undef LSS_BODY_CLOBBER6 + #define LSS_BODY_CLOBBER0 + #define LSS_BODY_CLOBBER1 LSS_BODY_CLOBBER0 + #define LSS_BODY_CLOBBER2 LSS_BODY_CLOBBER1 + #define LSS_BODY_CLOBBER3 LSS_BODY_CLOBBER2 + #define LSS_BODY_CLOBBER4 LSS_BODY_CLOBBER3 "r10", + #define LSS_BODY_CLOBBER5 LSS_BODY_CLOBBER4 "r8", + #define LSS_BODY_CLOBBER6 LSS_BODY_CLOBBER5 "r9", + + #undef LSS_BODY_ARG0 + #undef LSS_BODY_ARG1 + #undef LSS_BODY_ARG2 + #undef LSS_BODY_ARG3 + #undef LSS_BODY_ARG4 + #undef LSS_BODY_ARG5 + #undef LSS_BODY_ARG6 + #define LSS_BODY_ARG0() + #define LSS_BODY_ARG1(arg1) \ + LSS_BODY_ARG0(), "D" (arg1) + #define LSS_BODY_ARG2(arg1, arg2) \ + LSS_BODY_ARG1(arg1), "S" (arg2) + #define LSS_BODY_ARG3(arg1, arg2, arg3) \ + LSS_BODY_ARG2(arg1, arg2), "d" (arg3) + #define LSS_BODY_ARG4(arg1, arg2, arg3, arg4) \ + LSS_BODY_ARG3(arg1, arg2, arg3), "r" (arg4) + #define LSS_BODY_ARG5(arg1, arg2, arg3, arg4, arg5) \ + LSS_BODY_ARG4(arg1, arg2, arg3, arg4), "r" (arg5) + #define LSS_BODY_ARG6(arg1, arg2, arg3, arg4, arg5, arg6) \ + LSS_BODY_ARG5(arg1, arg2, arg3, arg4, arg5), "r" (arg6) + + #undef _syscall0 + #define _syscall0(type,name) \ + type LSS_NAME(name)() { \ + LSS_BODY(0, type, name); \ + } + #undef _syscall1 + #define _syscall1(type,name,type1,arg1) \ + type LSS_NAME(name)(type1 arg1) { \ + LSS_BODY(1, type, name, LSS_SYSCALL_ARG(arg1)); \ + } + #undef _syscall2 + #define _syscall2(type,name,type1,arg1,type2,arg2) \ + type LSS_NAME(name)(type1 arg1, type2 arg2) { \ + LSS_BODY(2, type, name, LSS_SYSCALL_ARG(arg1), LSS_SYSCALL_ARG(arg2));\ + } + #undef _syscall3 + #define _syscall3(type,name,type1,arg1,type2,arg2,type3,arg3) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3) { \ + LSS_BODY(3, type, name, LSS_SYSCALL_ARG(arg1), LSS_SYSCALL_ARG(arg2), \ + LSS_SYSCALL_ARG(arg3)); \ + } + #undef _syscall4 + #define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4) { \ + LSS_BODY(4, type, name, LSS_SYSCALL_ARG(arg1), LSS_SYSCALL_ARG(arg2), \ + LSS_SYSCALL_ARG(arg3), LSS_SYSCALL_ARG(arg4));\ + } + #undef _syscall5 + #define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \ + type5,arg5) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ + type5 arg5) { \ + LSS_BODY(5, type, name, LSS_SYSCALL_ARG(arg1), LSS_SYSCALL_ARG(arg2), \ + LSS_SYSCALL_ARG(arg3), LSS_SYSCALL_ARG(arg4), \ + LSS_SYSCALL_ARG(arg5)); \ + } + #undef _syscall6 + #define _syscall6(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \ + type5,arg5,type6,arg6) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ + type5 arg5, type6 arg6) { \ + LSS_BODY(6, type, name, LSS_SYSCALL_ARG(arg1), LSS_SYSCALL_ARG(arg2), \ + LSS_SYSCALL_ARG(arg3), LSS_SYSCALL_ARG(arg4), \ + LSS_SYSCALL_ARG(arg5), LSS_SYSCALL_ARG(arg6));\ + } + LSS_INLINE int LSS_NAME(clone)(int (*fn)(void *), void *child_stack, + int flags, void *arg, int *parent_tidptr, + void *newtls, int *child_tidptr) { + long long __res; + { + __asm__ __volatile__(/* if (fn == NULL) + * return -EINVAL; + */ + "testq %4,%4\n" + "jz 1f\n" + + /* if (child_stack == NULL) + * return -EINVAL; + */ + "testq %5,%5\n" + "jz 1f\n" + + /* Set up alignment of the child stack: + * child_stack = (child_stack & ~0xF) - 16; + */ + "andq $-16,%5\n" + "subq $16,%5\n" + + /* Push "arg" and "fn" onto the stack that will be + * used by the child. + */ + "movq %7,8(%5)\n" + "movq %4,0(%5)\n" + + /* %rax = syscall(%rax = __NR_clone, + * %rdi = flags, + * %rsi = child_stack, + * %rdx = parent_tidptr, + * %r8 = new_tls, + * %r10 = child_tidptr) + */ + "movq %2,%%rax\n" + "movq %9,%%r8\n" + "movq %10,%%r10\n" + "syscall\n" + + /* if (%rax != 0) + * return; + */ + "testq %%rax,%%rax\n" + "jnz 1f\n" + + /* In the child. Terminate frame pointer chain. + */ + "xorq %%rbp,%%rbp\n" + + /* Call "fn(arg)". + */ + "popq %%rax\n" + "popq %%rdi\n" + "call *%%rax\n" + + /* Call _exit(%ebx). + */ + "movq %%rax,%%rdi\n" + "movq %3,%%rax\n" + "syscall\n" + + /* Return to parent. + */ + "1:\n" + : "=a" (__res) + : "0"(-EINVAL), "i"(__NR_clone), "i"(__NR_exit), + "r"(LSS_SYSCALL_ARG(fn)), + "S"(LSS_SYSCALL_ARG(child_stack)), + "D"(LSS_SYSCALL_ARG(flags)), + "r"(LSS_SYSCALL_ARG(arg)), + "d"(LSS_SYSCALL_ARG(parent_tidptr)), + "r"(LSS_SYSCALL_ARG(newtls)), + "r"(LSS_SYSCALL_ARG(child_tidptr)) + : "rsp", "memory", "r8", "r10", "r11", "rcx"); + } + LSS_RETURN(int, __res); + } + + LSS_INLINE void (*LSS_NAME(restore_rt)(void))(void) { + /* On x86-64, the kernel does not know how to return from + * a signal handler. Instead, it relies on user space to provide a + * restorer function that calls the rt_sigreturn() system call. + * Unfortunately, we cannot just reference the glibc version of this + * function, as glibc goes out of its way to make it inaccessible. + */ + long long res; + __asm__ __volatile__("call 2f\n" + "0:.align 16\n" + "1:movq %1,%%rax\n" + "syscall\n" + "2:popq %0\n" + "addq $(1b-0b),%0\n" + : "=a" (res) + : "i" (__NR_rt_sigreturn)); + return (void (*)(void))(uintptr_t)res; + } + #elif defined(__arm__) + /* Most definitions of _syscallX() neglect to mark "memory" as being + * clobbered. This causes problems with compilers, that do a better job + * at optimizing across __asm__ calls. + * So, we just have to redefine all fo the _syscallX() macros. + */ + #undef LSS_REG + #define LSS_REG(r,a) register long __r##r __asm__("r"#r) = (long)a + + /* r0..r3 are scratch registers and not preserved across function + * calls. We need to first evaluate the first 4 syscall arguments + * and store them on stack. They must be loaded into r0..r3 after + * all function calls to avoid r0..r3 being clobbered. + */ + #undef LSS_SAVE_ARG + #define LSS_SAVE_ARG(r,a) long __tmp##r = (long)a + #undef LSS_LOAD_ARG + #define LSS_LOAD_ARG(r) register long __r##r __asm__("r"#r) = __tmp##r + + #undef LSS_BODY + #define LSS_BODY(type, name, args...) \ + register long __res_r0 __asm__("r0"); \ + long __res; \ + __SYS_REG(name) \ + __asm__ __volatile__ (__syscall_safe(name) \ + : "=r"(__res_r0) \ + : __SYS_REG_LIST(args) \ + : "lr", "memory"); \ + __res = __res_r0; \ + LSS_RETURN(type, __res) + #undef _syscall0 + #define _syscall0(type, name) \ + type LSS_NAME(name)() { \ + LSS_BODY(type, name); \ + } + #undef _syscall1 + #define _syscall1(type, name, type1, arg1) \ + type LSS_NAME(name)(type1 arg1) { \ + /* There is no need for using a volatile temp. */ \ + LSS_REG(0, arg1); \ + LSS_BODY(type, name, "r"(__r0)); \ + } + #undef _syscall2 + #define _syscall2(type, name, type1, arg1, type2, arg2) \ + type LSS_NAME(name)(type1 arg1, type2 arg2) { \ + LSS_SAVE_ARG(0, arg1); \ + LSS_SAVE_ARG(1, arg2); \ + LSS_LOAD_ARG(0); \ + LSS_LOAD_ARG(1); \ + LSS_BODY(type, name, "r"(__r0), "r"(__r1)); \ + } + #undef _syscall3 + #define _syscall3(type, name, type1, arg1, type2, arg2, type3, arg3) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3) { \ + LSS_SAVE_ARG(0, arg1); \ + LSS_SAVE_ARG(1, arg2); \ + LSS_SAVE_ARG(2, arg3); \ + LSS_LOAD_ARG(0); \ + LSS_LOAD_ARG(1); \ + LSS_LOAD_ARG(2); \ + LSS_BODY(type, name, "r"(__r0), "r"(__r1), "r"(__r2)); \ + } + #undef _syscall4 + #define _syscall4(type, name, type1, arg1, type2, arg2, type3, arg3, \ + type4, arg4) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4) { \ + LSS_SAVE_ARG(0, arg1); \ + LSS_SAVE_ARG(1, arg2); \ + LSS_SAVE_ARG(2, arg3); \ + LSS_SAVE_ARG(3, arg4); \ + LSS_LOAD_ARG(0); \ + LSS_LOAD_ARG(1); \ + LSS_LOAD_ARG(2); \ + LSS_LOAD_ARG(3); \ + LSS_BODY(type, name, "r"(__r0), "r"(__r1), "r"(__r2), "r"(__r3)); \ + } + #undef _syscall5 + #define _syscall5(type, name, type1, arg1, type2, arg2, type3, arg3, \ + type4, arg4, type5, arg5) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ + type5 arg5) { \ + LSS_SAVE_ARG(0, arg1); \ + LSS_SAVE_ARG(1, arg2); \ + LSS_SAVE_ARG(2, arg3); \ + LSS_SAVE_ARG(3, arg4); \ + LSS_REG(4, arg5); \ + LSS_LOAD_ARG(0); \ + LSS_LOAD_ARG(1); \ + LSS_LOAD_ARG(2); \ + LSS_LOAD_ARG(3); \ + LSS_BODY(type, name, "r"(__r0), "r"(__r1), "r"(__r2), "r"(__r3), \ + "r"(__r4)); \ + } + #undef _syscall6 + #define _syscall6(type, name, type1, arg1, type2, arg2, type3, arg3, \ + type4, arg4, type5, arg5, type6, arg6) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ + type5 arg5, type6 arg6) { \ + LSS_SAVE_ARG(0, arg1); \ + LSS_SAVE_ARG(1, arg2); \ + LSS_SAVE_ARG(2, arg3); \ + LSS_SAVE_ARG(3, arg4); \ + LSS_REG(4, arg5); \ + LSS_REG(5, arg6); \ + LSS_LOAD_ARG(0); \ + LSS_LOAD_ARG(1); \ + LSS_LOAD_ARG(2); \ + LSS_LOAD_ARG(3); \ + LSS_BODY(type, name, "r"(__r0), "r"(__r1), "r"(__r2), "r"(__r3), \ + "r"(__r4), "r"(__r5)); \ + } + LSS_INLINE int LSS_NAME(clone)(int (*fn)(void *), void *child_stack, + int flags, void *arg, int *parent_tidptr, + void *newtls, int *child_tidptr) { + register long __res __asm__("r5"); + { + if (fn == NULL || child_stack == NULL) { + __res = -EINVAL; + goto clone_exit; + } + + /* stash first 4 arguments on stack first because we can only load + * them after all function calls. + */ + int tmp_flags = flags; + int * tmp_stack = (int*) child_stack; + void * tmp_ptid = parent_tidptr; + void * tmp_tls = newtls; + + register int *__ctid __asm__("r4") = child_tidptr; + + /* Push "arg" and "fn" onto the stack that will be + * used by the child. + */ + *(--tmp_stack) = (int) arg; + *(--tmp_stack) = (int) fn; + + /* We must load r0..r3 last after all possible function calls. */ + register int __flags __asm__("r0") = tmp_flags; + register void *__stack __asm__("r1") = tmp_stack; + register void *__ptid __asm__("r2") = tmp_ptid; + register void *__tls __asm__("r3") = tmp_tls; + + /* %r0 = syscall(%r0 = flags, + * %r1 = child_stack, + * %r2 = parent_tidptr, + * %r3 = newtls, + * %r4 = child_tidptr) + */ + __SYS_REG(clone) + __asm__ __volatile__(/* %r0 = syscall(%r0 = flags, + * %r1 = child_stack, + * %r2 = parent_tidptr, + * %r3 = newtls, + * %r4 = child_tidptr) + */ + "push {r7}\n" + "mov r7,%1\n" + __syscall(clone)"\n" + + /* if (%r0 != 0) + * return %r0; + */ + "movs %0,r0\n" + "bne 1f\n" + + /* In the child, now. Call "fn(arg)". + */ + "ldr r0,[sp, #4]\n" + "mov lr,pc\n" + "ldr pc,[sp]\n" + + /* Call _exit(%r0), which never returns. We only + * need to set r7 for EABI syscall ABI but we do + * this always to simplify code sharing between + * old and new syscall ABIs. + */ + "mov r7,%2\n" + __syscall(exit)"\n" + + /* Pop r7 from the stack only in the parent. + */ + "1: pop {r7}\n" + : "=r" (__res) + : "r"(__sysreg), + "i"(__NR_exit), "r"(__stack), "r"(__flags), + "r"(__ptid), "r"(__tls), "r"(__ctid) + : "cc", "lr", "memory"); + } + clone_exit: + LSS_RETURN(int, __res); + } + #elif defined(__mips__) + #undef LSS_REG + #define LSS_REG(r,a) register unsigned long __r##r __asm__("$"#r) = \ + (unsigned long)(a) + + #if _MIPS_SIM == _MIPS_SIM_ABI32 + // See http://sources.redhat.com/ml/libc-alpha/2004-10/msg00050.html + // or http://www.linux-mips.org/archives/linux-mips/2004-10/msg00142.html + #define MIPS_SYSCALL_CLOBBERS "$1", "$3", "$8", "$9", "$10", "$11", "$12",\ + "$13", "$14", "$15", "$24", "$25", "memory" + #else + #define MIPS_SYSCALL_CLOBBERS "$1", "$3", "$10", "$11", "$12", "$13", \ + "$14", "$15", "$24", "$25", "memory" + #endif + + #undef LSS_BODY + #define LSS_BODY(type,name,r7,...) \ + register unsigned long __v0 __asm__("$2") = __NR_##name; \ + __asm__ __volatile__ ("syscall\n" \ + : "=&r"(__v0), r7 (__r7) \ + : "0"(__v0), ##__VA_ARGS__ \ + : MIPS_SYSCALL_CLOBBERS); \ + LSS_RETURN(type, __v0, __r7) + #undef _syscall0 + #define _syscall0(type, name) \ + type LSS_NAME(name)() { \ + register unsigned long __r7 __asm__("$7"); \ + LSS_BODY(type, name, "=r"); \ + } + #undef _syscall1 + #define _syscall1(type, name, type1, arg1) \ + type LSS_NAME(name)(type1 arg1) { \ + register unsigned long __r7 __asm__("$7"); \ + LSS_REG(4, arg1); LSS_BODY(type, name, "=r", "r"(__r4)); \ + } + #undef _syscall2 + #define _syscall2(type, name, type1, arg1, type2, arg2) \ + type LSS_NAME(name)(type1 arg1, type2 arg2) { \ + register unsigned long __r7 __asm__("$7"); \ + LSS_REG(4, arg1); LSS_REG(5, arg2); \ + LSS_BODY(type, name, "=r", "r"(__r4), "r"(__r5)); \ + } + #undef _syscall3 + #define _syscall3(type, name, type1, arg1, type2, arg2, type3, arg3) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3) { \ + register unsigned long __r7 __asm__("$7"); \ + LSS_REG(4, arg1); LSS_REG(5, arg2); LSS_REG(6, arg3); \ + LSS_BODY(type, name, "=r", "r"(__r4), "r"(__r5), "r"(__r6)); \ + } + #undef _syscall4 + #define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4) { \ + LSS_REG(4, arg1); LSS_REG(5, arg2); LSS_REG(6, arg3); \ + LSS_REG(7, arg4); \ + LSS_BODY(type, name, "+r", "r"(__r4), "r"(__r5), "r"(__r6)); \ + } + #undef _syscall5 + #if _MIPS_SIM == _MIPS_SIM_ABI32 + /* The old 32bit MIPS system call API passes the fifth and sixth argument + * on the stack, whereas the new APIs use registers "r8" and "r9". + */ + #define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \ + type5,arg5) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ + type5 arg5) { \ + LSS_REG(4, arg1); LSS_REG(5, arg2); LSS_REG(6, arg3); \ + LSS_REG(7, arg4); \ + register unsigned long __v0 __asm__("$2"); \ + __asm__ __volatile__ (".set noreorder\n" \ + "lw $2, %6\n" \ + "subu $29, 32\n" \ + "sw $2, 16($29)\n" \ + "li $2, %2\n" \ + "syscall\n" \ + "addiu $29, 32\n" \ + ".set reorder\n" \ + : "=&r"(__v0), "+r" (__r7) \ + : "i" (__NR_##name), "r"(__r4), "r"(__r5), \ + "r"(__r6), "m" ((unsigned long)arg5) \ + : MIPS_SYSCALL_CLOBBERS); \ + LSS_RETURN(type, __v0, __r7); \ + } + #else + #define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \ + type5,arg5) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ + type5 arg5) { \ + LSS_REG(4, arg1); LSS_REG(5, arg2); LSS_REG(6, arg3); \ + LSS_REG(7, arg4); LSS_REG(8, arg5); \ + LSS_BODY(type, name, "+r", "r"(__r4), "r"(__r5), "r"(__r6), \ + "r"(__r8)); \ + } + #endif + #undef _syscall6 + #if _MIPS_SIM == _MIPS_SIM_ABI32 + /* The old 32bit MIPS system call API passes the fifth and sixth argument + * on the stack, whereas the new APIs use registers "r8" and "r9". + */ + #define _syscall6(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \ + type5,arg5,type6,arg6) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ + type5 arg5, type6 arg6) { \ + LSS_REG(4, arg1); LSS_REG(5, arg2); LSS_REG(6, arg3); \ + LSS_REG(7, arg4); \ + register unsigned long __v0 __asm__("$2"); \ + __asm__ __volatile__ (".set noreorder\n" \ + "lw $2, %6\n" \ + "lw $8, %7\n" \ + "subu $29, 32\n" \ + "sw $2, 16($29)\n" \ + "sw $8, 20($29)\n" \ + "li $2, %2\n" \ + "syscall\n" \ + "addiu $29, 32\n" \ + ".set reorder\n" \ + : "=&r"(__v0), "+r" (__r7) \ + : "i" (__NR_##name), "r"(__r4), "r"(__r5), \ + "r"(__r6), "m" ((unsigned long)arg5), \ + "m" ((unsigned long)arg6) \ + : MIPS_SYSCALL_CLOBBERS); \ + LSS_RETURN(type, __v0, __r7); \ + } + #else + #define _syscall6(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \ + type5,arg5,type6,arg6) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ + type5 arg5,type6 arg6) { \ + LSS_REG(4, arg1); LSS_REG(5, arg2); LSS_REG(6, arg3); \ + LSS_REG(7, arg4); LSS_REG(8, arg5); LSS_REG(9, arg6); \ + LSS_BODY(type, name, "+r", "r"(__r4), "r"(__r5), "r"(__r6), \ + "r"(__r8), "r"(__r9)); \ + } + #endif + LSS_INLINE int LSS_NAME(clone)(int (*fn)(void *), void *child_stack, + int flags, void *arg, int *parent_tidptr, + void *newtls, int *child_tidptr) { + register unsigned long __v0 __asm__("$2"); + register unsigned long __r7 __asm__("$7") = (unsigned long)newtls; + { + register int __flags __asm__("$4") = flags; + register void *__stack __asm__("$5") = child_stack; + register void *__ptid __asm__("$6") = parent_tidptr; + register int *__ctid __asm__("$8") = child_tidptr; + __asm__ __volatile__( + #if _MIPS_SIM == _MIPS_SIM_ABI32 && _MIPS_SZPTR == 32 + "subu $29,24\n" + #elif _MIPS_SIM == _MIPS_SIM_NABI32 + "sub $29,16\n" + #else + "dsubu $29,16\n" + #endif + + /* if (fn == NULL || child_stack == NULL) + * return -EINVAL; + */ + "li %0,%2\n" + "beqz %5,1f\n" + "beqz %6,1f\n" + + /* Push "arg" and "fn" onto the stack that will be + * used by the child. + */ + #if _MIPS_SIM == _MIPS_SIM_ABI32 && _MIPS_SZPTR == 32 + "subu %6,32\n" + "sw %5,0(%6)\n" + "sw %8,4(%6)\n" + #elif _MIPS_SIM == _MIPS_SIM_NABI32 + "sub %6,32\n" + "sw %5,0(%6)\n" + "sw %8,8(%6)\n" + #else + "dsubu %6,32\n" + "sd %5,0(%6)\n" + "sd %8,8(%6)\n" + #endif + + /* $7 = syscall($4 = flags, + * $5 = child_stack, + * $6 = parent_tidptr, + * $7 = newtls, + * $8 = child_tidptr) + */ + "li $2,%3\n" + "syscall\n" + + /* if ($7 != 0) + * return $2; + */ + "bnez $7,1f\n" + "bnez $2,1f\n" + + /* In the child, now. Call "fn(arg)". + */ + #if _MIPS_SIM == _MIPS_SIM_ABI32 && _MIPS_SZPTR == 32 + "lw $25,0($29)\n" + "lw $4,4($29)\n" + #elif _MIPS_SIM == _MIPS_SIM_NABI32 + "lw $25,0($29)\n" + "lw $4,8($29)\n" + #else + "ld $25,0($29)\n" + "ld $4,8($29)\n" + #endif + "jalr $25\n" + + /* Call _exit($2) + */ + "move $4,$2\n" + "li $2,%4\n" + "syscall\n" + + "1:\n" + #if _MIPS_SIM == _MIPS_SIM_ABI32 && _MIPS_SZPTR == 32 + "addu $29, 24\n" + #elif _MIPS_SIM == _MIPS_SIM_NABI32 + "add $29, 16\n" + #else + "daddu $29,16\n" + #endif + : "=&r" (__v0), "=r" (__r7) + : "i"(-EINVAL), "i"(__NR_clone), "i"(__NR_exit), + "r"(fn), "r"(__stack), "r"(__flags), "r"(arg), + "r"(__ptid), "r"(__r7), "r"(__ctid) + : "$9", "$10", "$11", "$12", "$13", "$14", "$15", + "$24", "memory"); + } + LSS_RETURN(int, __v0, __r7); + } + #elif defined (__PPC__) + #undef LSS_LOADARGS_0 + #define LSS_LOADARGS_0(name, dummy...) \ + __sc_0 = __NR_##name + #undef LSS_LOADARGS_1 + #define LSS_LOADARGS_1(name, arg1) \ + LSS_LOADARGS_0(name); \ + __sc_3 = (unsigned long) (arg1) + #undef LSS_LOADARGS_2 + #define LSS_LOADARGS_2(name, arg1, arg2) \ + LSS_LOADARGS_1(name, arg1); \ + __sc_4 = (unsigned long) (arg2) + #undef LSS_LOADARGS_3 + #define LSS_LOADARGS_3(name, arg1, arg2, arg3) \ + LSS_LOADARGS_2(name, arg1, arg2); \ + __sc_5 = (unsigned long) (arg3) + #undef LSS_LOADARGS_4 + #define LSS_LOADARGS_4(name, arg1, arg2, arg3, arg4) \ + LSS_LOADARGS_3(name, arg1, arg2, arg3); \ + __sc_6 = (unsigned long) (arg4) + #undef LSS_LOADARGS_5 + #define LSS_LOADARGS_5(name, arg1, arg2, arg3, arg4, arg5) \ + LSS_LOADARGS_4(name, arg1, arg2, arg3, arg4); \ + __sc_7 = (unsigned long) (arg5) + #undef LSS_LOADARGS_6 + #define LSS_LOADARGS_6(name, arg1, arg2, arg3, arg4, arg5, arg6) \ + LSS_LOADARGS_5(name, arg1, arg2, arg3, arg4, arg5); \ + __sc_8 = (unsigned long) (arg6) + #undef LSS_ASMINPUT_0 + #define LSS_ASMINPUT_0 "0" (__sc_0) + #undef LSS_ASMINPUT_1 + #define LSS_ASMINPUT_1 LSS_ASMINPUT_0, "1" (__sc_3) + #undef LSS_ASMINPUT_2 + #define LSS_ASMINPUT_2 LSS_ASMINPUT_1, "2" (__sc_4) + #undef LSS_ASMINPUT_3 + #define LSS_ASMINPUT_3 LSS_ASMINPUT_2, "3" (__sc_5) + #undef LSS_ASMINPUT_4 + #define LSS_ASMINPUT_4 LSS_ASMINPUT_3, "4" (__sc_6) + #undef LSS_ASMINPUT_5 + #define LSS_ASMINPUT_5 LSS_ASMINPUT_4, "5" (__sc_7) + #undef LSS_ASMINPUT_6 + #define LSS_ASMINPUT_6 LSS_ASMINPUT_5, "6" (__sc_8) + #undef LSS_BODY + #define LSS_BODY(nr, type, name, args...) \ + long __sc_ret, __sc_err; \ + { \ + register unsigned long __sc_0 __asm__ ("r0"); \ + register unsigned long __sc_3 __asm__ ("r3"); \ + register unsigned long __sc_4 __asm__ ("r4"); \ + register unsigned long __sc_5 __asm__ ("r5"); \ + register unsigned long __sc_6 __asm__ ("r6"); \ + register unsigned long __sc_7 __asm__ ("r7"); \ + register unsigned long __sc_8 __asm__ ("r8"); \ + \ + LSS_LOADARGS_##nr(name, args); \ + __asm__ __volatile__ \ + ("sc\n\t" \ + "mfcr %0" \ + : "=&r" (__sc_0), \ + "=&r" (__sc_3), "=&r" (__sc_4), \ + "=&r" (__sc_5), "=&r" (__sc_6), \ + "=&r" (__sc_7), "=&r" (__sc_8) \ + : LSS_ASMINPUT_##nr \ + : "cr0", "ctr", "memory", \ + "r9", "r10", "r11", "r12"); \ + __sc_ret = __sc_3; \ + __sc_err = __sc_0; \ + } \ + LSS_RETURN(type, __sc_ret, __sc_err) + #undef _syscall0 + #define _syscall0(type, name) \ + type LSS_NAME(name)(void) { \ + LSS_BODY(0, type, name); \ + } + #undef _syscall1 + #define _syscall1(type, name, type1, arg1) \ + type LSS_NAME(name)(type1 arg1) { \ + LSS_BODY(1, type, name, arg1); \ + } + #undef _syscall2 + #define _syscall2(type, name, type1, arg1, type2, arg2) \ + type LSS_NAME(name)(type1 arg1, type2 arg2) { \ + LSS_BODY(2, type, name, arg1, arg2); \ + } + #undef _syscall3 + #define _syscall3(type, name, type1, arg1, type2, arg2, type3, arg3) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3) { \ + LSS_BODY(3, type, name, arg1, arg2, arg3); \ + } + #undef _syscall4 + #define _syscall4(type, name, type1, arg1, type2, arg2, type3, arg3, \ + type4, arg4) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4) { \ + LSS_BODY(4, type, name, arg1, arg2, arg3, arg4); \ + } + #undef _syscall5 + #define _syscall5(type, name, type1, arg1, type2, arg2, type3, arg3, \ + type4, arg4, type5, arg5) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ + type5 arg5) { \ + LSS_BODY(5, type, name, arg1, arg2, arg3, arg4, arg5); \ + } + #undef _syscall6 + #define _syscall6(type, name, type1, arg1, type2, arg2, type3, arg3, \ + type4, arg4, type5, arg5, type6, arg6) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ + type5 arg5, type6 arg6) { \ + LSS_BODY(6, type, name, arg1, arg2, arg3, arg4, arg5, arg6); \ + } + /* clone function adapted from glibc 2.18 clone.S */ + LSS_INLINE int LSS_NAME(clone)(int (*fn)(void *), void *child_stack, + int flags, void *arg, int *parent_tidptr, + void *newtls, int *child_tidptr) { + long __ret, __err; + { +#if defined(__PPC64__) + +/* Stack frame offsets. */ +#if _CALL_ELF != 2 +#define FRAME_MIN_SIZE 112 +#define FRAME_TOC_SAVE 40 +#else +#define FRAME_MIN_SIZE 32 +#define FRAME_TOC_SAVE 24 +#endif + + + register int (*__fn)(void *) __asm__ ("r3") = fn; + register void *__cstack __asm__ ("r4") = child_stack; + register int __flags __asm__ ("r5") = flags; + register void * __arg __asm__ ("r6") = arg; + register int * __ptidptr __asm__ ("r7") = parent_tidptr; + register void * __newtls __asm__ ("r8") = newtls; + register int * __ctidptr __asm__ ("r9") = child_tidptr; + __asm__ __volatile__( + /* check for fn == NULL + * and child_stack == NULL + */ + "cmpdi cr0, %6, 0\n\t" + "cmpdi cr1, %7, 0\n\t" + "cror cr0*4+eq, cr1*4+eq, cr0*4+eq\n\t" + "beq- cr0, 1f\n\t" + + /* set up stack frame for child */ + "clrrdi %7, %7, 4\n\t" + "li 0, 0\n\t" + "stdu 0, -%13(%7)\n\t" + + /* fn, arg, child_stack are saved acrVoss the syscall */ + "mr 28, %6\n\t" + "mr 29, %7\n\t" + "mr 27, %9\n\t" + + /* syscall + r3 == flags + r4 == child_stack + r5 == parent_tidptr + r6 == newtls + r7 == child_tidptr */ + "mr 3, %8\n\t" + "mr 5, %10\n\t" + "mr 6, %11\n\t" + "mr 7, %12\n\t" + "li 0, %4\n\t" + "sc\n\t" + + /* Test if syscall was successful */ + "cmpdi cr1, 3, 0\n\t" + "crandc cr1*4+eq, cr1*4+eq, cr0*4+so\n\t" + "bne- cr1, 1f\n\t" + + /* Do the function call */ + "std 2, %14(1)\n\t" +#if _CALL_ELF != 2 + "ld 0, 0(28)\n\t" + "ld 2, 8(28)\n\t" + "mtctr 0\n\t" +#else + "mr 12, 28\n\t" + "mtctr 12\n\t" +#endif + "mr 3, 27\n\t" + "bctrl\n\t" + "ld 2, %14(1)\n\t" + + /* Call _exit(r3) */ + "li 0, %5\n\t" + "sc\n\t" + + /* Return to parent */ + "1:\n\t" + "mr %0, 3\n\t" + : "=r" (__ret), "=r" (__err) + : "0" (-1), "i" (EINVAL), + "i" (__NR_clone), "i" (__NR_exit), + "r" (__fn), "r" (__cstack), "r" (__flags), + "r" (__arg), "r" (__ptidptr), "r" (__newtls), + "r" (__ctidptr), "i" (FRAME_MIN_SIZE), "i" (FRAME_TOC_SAVE) + : "cr0", "cr1", "memory", "ctr", + "r0", "r29", "r27", "r28"); +#else + register int (*__fn)(void *) __asm__ ("r8") = fn; + register void *__cstack __asm__ ("r4") = child_stack; + register int __flags __asm__ ("r3") = flags; + register void * __arg __asm__ ("r9") = arg; + register int * __ptidptr __asm__ ("r5") = parent_tidptr; + register void * __newtls __asm__ ("r6") = newtls; + register int * __ctidptr __asm__ ("r7") = child_tidptr; + __asm__ __volatile__( + /* check for fn == NULL + * and child_stack == NULL + */ + "cmpwi cr0, %6, 0\n\t" + "cmpwi cr1, %7, 0\n\t" + "cror cr0*4+eq, cr1*4+eq, cr0*4+eq\n\t" + "beq- cr0, 1f\n\t" + + /* set up stack frame for child */ + "clrrwi %7, %7, 4\n\t" + "li 0, 0\n\t" + "stwu 0, -16(%7)\n\t" + + /* fn, arg, child_stack are saved across the syscall: r28-30 */ + "mr 28, %6\n\t" + "mr 29, %7\n\t" + "mr 27, %9\n\t" + + /* syscall */ + "li 0, %4\n\t" + /* flags already in r3 + * child_stack already in r4 + * ptidptr already in r5 + * newtls already in r6 + * ctidptr already in r7 + */ + "sc\n\t" + + /* Test if syscall was successful */ + "cmpwi cr1, 3, 0\n\t" + "crandc cr1*4+eq, cr1*4+eq, cr0*4+so\n\t" + "bne- cr1, 1f\n\t" + + /* Do the function call */ + "mtctr 28\n\t" + "mr 3, 27\n\t" + "bctrl\n\t" + + /* Call _exit(r3) */ + "li 0, %5\n\t" + "sc\n\t" + + /* Return to parent */ + "1:\n" + "mfcr %1\n\t" + "mr %0, 3\n\t" + : "=r" (__ret), "=r" (__err) + : "0" (-1), "1" (EINVAL), + "i" (__NR_clone), "i" (__NR_exit), + "r" (__fn), "r" (__cstack), "r" (__flags), + "r" (__arg), "r" (__ptidptr), "r" (__newtls), + "r" (__ctidptr) + : "cr0", "cr1", "memory", "ctr", + "r0", "r29", "r27", "r28"); + +#endif + } + LSS_RETURN(int, __ret, __err); + } + #elif defined(__aarch64__) + #undef LSS_REG + #define LSS_REG(r,a) register long __x##r __asm__("x"#r) = (long)a + #undef LSS_BODY + #define LSS_BODY(type,name,args...) \ + register long __res_x0 __asm__("x0"); \ + long __res; \ + __asm__ __volatile__ ("mov x8, %1\n" \ + "svc 0x0\n" \ + : "=r"(__res_x0) \ + : "i"(__NR_##name) , ## args \ + : "memory"); \ + __res = __res_x0; \ + LSS_RETURN(type, __res) + #undef _syscall0 + #define _syscall0(type, name) \ + type LSS_NAME(name)(void) { \ + LSS_BODY(type, name); \ + } + #undef _syscall1 + #define _syscall1(type, name, type1, arg1) \ + type LSS_NAME(name)(type1 arg1) { \ + LSS_REG(0, arg1); LSS_BODY(type, name, "r"(__x0)); \ + } + #undef _syscall2 + #define _syscall2(type, name, type1, arg1, type2, arg2) \ + type LSS_NAME(name)(type1 arg1, type2 arg2) { \ + LSS_REG(0, arg1); LSS_REG(1, arg2); \ + LSS_BODY(type, name, "r"(__x0), "r"(__x1)); \ + } + #undef _syscall3 + #define _syscall3(type, name, type1, arg1, type2, arg2, type3, arg3) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3) { \ + LSS_REG(0, arg1); LSS_REG(1, arg2); LSS_REG(2, arg3); \ + LSS_BODY(type, name, "r"(__x0), "r"(__x1), "r"(__x2)); \ + } + #undef _syscall4 + #define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4) { \ + LSS_REG(0, arg1); LSS_REG(1, arg2); LSS_REG(2, arg3); \ + LSS_REG(3, arg4); \ + LSS_BODY(type, name, "r"(__x0), "r"(__x1), "r"(__x2), "r"(__x3)); \ + } + #undef _syscall5 + #define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \ + type5,arg5) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ + type5 arg5) { \ + LSS_REG(0, arg1); LSS_REG(1, arg2); LSS_REG(2, arg3); \ + LSS_REG(3, arg4); LSS_REG(4, arg5); \ + LSS_BODY(type, name, "r"(__x0), "r"(__x1), "r"(__x2), "r"(__x3), \ + "r"(__x4)); \ + } + #undef _syscall6 + #define _syscall6(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \ + type5,arg5,type6,arg6) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ + type5 arg5, type6 arg6) { \ + LSS_REG(0, arg1); LSS_REG(1, arg2); LSS_REG(2, arg3); \ + LSS_REG(3, arg4); LSS_REG(4, arg5); LSS_REG(5, arg6); \ + LSS_BODY(type, name, "r"(__x0), "r"(__x1), "x"(__x2), "r"(__x3), \ + "r"(__x4), "r"(__x5)); \ + } + /* clone function adapted from glibc 2.18 clone.S */ + LSS_INLINE int LSS_NAME(clone)(int (*fn)(void *), void *child_stack, + int flags, void *arg, int *parent_tidptr, + void *newtls, int *child_tidptr) { + long __res; + { + register int (*__fn)(void *) __asm__("x0") = fn; + register void *__stack __asm__("x1") = child_stack; + register int __flags __asm__("x2") = flags; + register void *__arg __asm__("x3") = arg; + register int *__ptid __asm__("x4") = parent_tidptr; + register void *__tls __asm__("x5") = newtls; + register int *__ctid __asm__("x6") = child_tidptr; + __asm__ __volatile__(/* if (fn == NULL || child_stack == NULL) + * return -EINVAL; + */ + "cbz x0,1f\n" + "cbz x1,1f\n" + + /* Push "arg" and "fn" onto the stack that will be + * used by the child. + */ + "stp x0,x3, [x1, #-16]!\n" + + "mov x0,x2\n" /* flags */ + "mov x2,x4\n" /* ptid */ + "mov x3,x5\n" /* tls */ + "mov x4,x6\n" /* ctid */ + "mov x8,%9\n" /* clone */ + + "svc 0x0\n" + + /* if (%r0 != 0) + * return %r0; + */ + "cmp x0, #0\n" + "bne 2f\n" + + /* In the child, now. Call "fn(arg)". + */ + "ldp x1, x0, [sp], #16\n" + "blr x1\n" + + /* Call _exit(%r0). + */ + "mov x8, %10\n" + "svc 0x0\n" + "1:\n" + "mov x8, %1\n" + "2:\n" + : "=r" (__res) + : "i"(-EINVAL), + "r"(__fn), "r"(__stack), "r"(__flags), "r"(__arg), + "r"(__ptid), "r"(__tls), "r"(__ctid), + "i"(__NR_clone), "i"(__NR_exit) + : "x30", "memory"); + } + LSS_RETURN(int, __res); + } + #elif defined(__s390x__) + #undef LSS_REG + #define LSS_REG(r, a) register unsigned long __r##r __asm__("r"#r) = (unsigned long) a + #undef LSS_BODY + #define LSS_BODY(type, name, args...) \ + register long __res_r2 __asm__("r2"); \ + long __res; \ + __asm__ __volatile__ \ + ("lgfi %%r1, %1\n\t" \ + "svc 0\n\t" \ + : "=&r"(__res_r2) \ + : "i"(__NR_##name), ## args \ + : "r1", "memory"); \ + __res = __res_r2; \ + LSS_RETURN(type, __res) + #undef _syscall0 + #define _syscall0(type, name) \ + type LSS_NAME(name)(void) { \ + LSS_BODY(type, name); \ + } + #undef _syscall1 + #define _syscall1(type, name, type1, arg1) \ + type LSS_NAME(name)(type1 arg1) { \ + LSS_REG(2, arg1); \ + LSS_BODY(type, name, "0"(__r2)); \ + } + #undef _syscall2 + #define _syscall2(type, name, type1, arg1, type2, arg2) \ + type LSS_NAME(name)(type1 arg1, type2 arg2) { \ + LSS_REG(2, arg1); LSS_REG(3, arg2); \ + LSS_BODY(type, name, "0"(__r2), "r"(__r3)); \ + } + #undef _syscall3 + #define _syscall3(type, name, type1, arg1, type2, arg2, type3, arg3) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3) { \ + LSS_REG(2, arg1); LSS_REG(3, arg2); LSS_REG(4, arg3); \ + LSS_BODY(type, name, "0"(__r2), "r"(__r3), "r"(__r4)); \ + } + #undef _syscall4 + #define _syscall4(type, name, type1, arg1, type2, arg2, type3, arg3, \ + type4, arg4) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, \ + type4 arg4) { \ + LSS_REG(2, arg1); LSS_REG(3, arg2); LSS_REG(4, arg3); \ + LSS_REG(5, arg4); \ + LSS_BODY(type, name, "0"(__r2), "r"(__r3), "r"(__r4), \ + "r"(__r5)); \ + } + #undef _syscall5 + #define _syscall5(type, name, type1, arg1, type2, arg2, type3, arg3, \ + type4, arg4, type5, arg5) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, \ + type4 arg4, type5 arg5) { \ + LSS_REG(2, arg1); LSS_REG(3, arg2); LSS_REG(4, arg3); \ + LSS_REG(5, arg4); LSS_REG(6, arg5); \ + LSS_BODY(type, name, "0"(__r2), "r"(__r3), "r"(__r4), \ + "r"(__r5), "r"(__r6)); \ + } + #undef _syscall6 + #define _syscall6(type, name, type1, arg1, type2, arg2, type3, arg3, \ + type4, arg4, type5, arg5, type6, arg6) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, \ + type4 arg4, type5 arg5, type6 arg6) { \ + LSS_REG(2, arg1); LSS_REG(3, arg2); LSS_REG(4, arg3); \ + LSS_REG(5, arg4); LSS_REG(6, arg5); LSS_REG(7, arg6); \ + LSS_BODY(type, name, "0"(__r2), "r"(__r3), "r"(__r4), \ + "r"(__r5), "r"(__r6), "r"(__r7)); \ + } + LSS_INLINE int LSS_NAME(clone)(int (*fn)(void *), void *child_stack, + int flags, void *arg, int *parent_tidptr, + void *newtls, int *child_tidptr) { + long __ret; + { + register int (*__fn)(void *) __asm__ ("r1") = fn; + register void *__cstack __asm__ ("r2") = child_stack; + register int __flags __asm__ ("r3") = flags; + register void *__arg __asm__ ("r0") = arg; + register int *__ptidptr __asm__ ("r4") = parent_tidptr; + register void *__newtls __asm__ ("r6") = newtls; + register int *__ctidptr __asm__ ("r5") = child_tidptr; + __asm__ __volatile__ ( + /* arg already in r0 */ + "ltgr %4, %4\n\t" /* check fn, which is already in r1 */ + "jz 1f\n\t" /* NULL function pointer, return -EINVAL */ + "ltgr %5, %5\n\t" /* check child_stack, which is already in r2 */ + "jz 1f\n\t" /* NULL stack pointer, return -EINVAL */ + /* flags already in r3 */ + /* parent_tidptr already in r4 */ + /* child_tidptr already in r5 */ + /* newtls already in r6 */ + "svc %2\n\t" /* invoke clone syscall */ + "ltgr %0, %%r2\n\t" /* load return code into __ret and test */ + "jnz 1f\n\t" /* return to parent if non-zero */ + /* start child thread */ + "lgr %%r2, %7\n\t" /* set first parameter to void *arg */ + "aghi %%r15, -160\n\t" /* make room on the stack for the save area */ + "xc 0(8,%%r15), 0(%%r15)\n\t" + "basr %%r14, %4\n\t" /* jump to fn */ + "svc %3\n" /* invoke exit syscall */ + + "1:\n" + : "=r" (__ret) + : "0" (-EINVAL), "i" (__NR_clone), "i" (__NR_exit), + "r" (__fn), "r" (__cstack), "r" (__flags), "r" (__arg), + "r" (__ptidptr), "r" (__newtls), "r" (__ctidptr) + : "cc", "r14", "memory" + ); + } + LSS_RETURN(int, __ret); + } + #endif + #define __NR__exit __NR_exit + #define __NR__gettid __NR_gettid + #define __NR__mremap __NR_mremap + LSS_INLINE _syscall1(int, close, int, f) + LSS_INLINE _syscall1(int, _exit, int, e) + LSS_INLINE _syscall3(int, fcntl, int, f, + int, c, long, a) + LSS_INLINE _syscall2(int, fstat, int, f, + struct kernel_stat*, b) + LSS_INLINE _syscall6(int, futex, int*, a, + int, o, int, v, + struct kernel_timespec*, t, + int*, a2, + int, v3) +#ifdef __NR_getdents64 + LSS_INLINE _syscall3(int, getdents64, int, f, + struct kernel_dirent64*, d, int, c) +#define KERNEL_DIRENT kernel_dirent64 +#define GETDENTS sys_getdents64 +#else + LSS_INLINE _syscall3(int, getdents, int, f, + struct kernel_dirent*, d, int, c) +#define KERNEL_DIRENT kernel_dirent +#define GETDENTS sys_getdents +#endif + LSS_INLINE _syscall0(pid_t, getpid) + LSS_INLINE _syscall0(pid_t, getppid) + LSS_INLINE _syscall0(pid_t, _gettid) + LSS_INLINE _syscall2(int, kill, pid_t, p, + int, s) + #if defined(__x86_64__) + /* Need to make sure off_t isn't truncated to 32-bits under x32. */ + LSS_INLINE off_t LSS_NAME(lseek)(int f, off_t o, int w) { + _LSS_BODY(3, off_t, lseek, off_t, LSS_SYSCALL_ARG(f), (uint64_t)(o), + LSS_SYSCALL_ARG(w)); + } + #else + LSS_INLINE _syscall3(off_t, lseek, int, f, + off_t, o, int, w) + #endif + LSS_INLINE _syscall2(int, munmap, void*, s, + size_t, l) + LSS_INLINE _syscall5(void*, _mremap, void*, o, + size_t, os, size_t, ns, + unsigned long, f, void *, a) + LSS_INLINE _syscall2(int, prctl, int, o, + long, a) + LSS_INLINE _syscall4(long, ptrace, int, r, + pid_t, p, void *, a, void *, d) + LSS_INLINE _syscall3(ssize_t, read, int, f, + void *, b, size_t, c) + LSS_INLINE _syscall4(int, rt_sigaction, int, s, + const struct kernel_sigaction*, a, + struct kernel_sigaction*, o, size_t, c) + LSS_INLINE _syscall4(int, rt_sigprocmask, int, h, + const struct kernel_sigset_t*, s, + struct kernel_sigset_t*, o, size_t, c); + LSS_INLINE _syscall0(int, sched_yield) + LSS_INLINE _syscall2(int, sigaltstack, const stack_t*, s, + const stack_t*, o) + #if defined(__NR_fstatat) + LSS_INLINE _syscall4(int, fstatat, int, d, const char *, p, + struct kernel_stat*, b, int, flags) + LSS_INLINE int LSS_NAME(stat)(const char* p, struct kernel_stat* b) { + return LSS_NAME(fstatat)(AT_FDCWD,p,b,0); + } + #else + LSS_INLINE _syscall2(int, stat, const char*, f, + struct kernel_stat*, b) + #endif + LSS_INLINE _syscall3(ssize_t, write, int, f, + const void *, b, size_t, c) + #if defined(__NR_getcpu) + LSS_INLINE _syscall3(long, getcpu, unsigned *, cpu, + unsigned *, node, void *, unused); + #endif + #if defined(__x86_64__) || defined(__aarch64__) || \ + (defined(__mips__) && _MIPS_SIM != _MIPS_SIM_ABI32) + LSS_INLINE _syscall3(int, socket, int, d, + int, t, int, p) + #endif + #if defined(__x86_64__) || defined(__s390x__) + #if defined(__s390x__) + LSS_INLINE _syscall1(void*, mmap, void*, a) + #else + /* Need to make sure __off64_t isn't truncated to 32-bits under x32. */ + LSS_INLINE void* LSS_NAME(mmap)(void *s, size_t l, int p, int f, int d, + __off64_t o) { + LSS_BODY(6, void*, mmap, LSS_SYSCALL_ARG(s), LSS_SYSCALL_ARG(l), + LSS_SYSCALL_ARG(p), LSS_SYSCALL_ARG(f), + LSS_SYSCALL_ARG(d), (uint64_t)(o)); + } + #endif + + LSS_INLINE int LSS_NAME(sigaction)(int signum, + const struct kernel_sigaction *act, + struct kernel_sigaction *oldact) { + #if defined(__x86_64__) + /* On x86_64, the kernel requires us to always set our own + * SA_RESTORER in order to be able to return from a signal handler. + * This function must have a "magic" signature that the "gdb" + * (and maybe the kernel?) can recognize. + */ + if (act != NULL && !(act->sa_flags & SA_RESTORER)) { + struct kernel_sigaction a = *act; + a.sa_flags |= SA_RESTORER; + a.sa_restorer = LSS_NAME(restore_rt)(); + return LSS_NAME(rt_sigaction)(signum, &a, oldact, + (KERNEL_NSIG+7)/8); + } else + #endif + { + return LSS_NAME(rt_sigaction)(signum, act, oldact, + (KERNEL_NSIG+7)/8); + } + } + + LSS_INLINE int LSS_NAME(sigprocmask)(int how, + const struct kernel_sigset_t *set, + struct kernel_sigset_t *oldset) { + return LSS_NAME(rt_sigprocmask)(how, set, oldset, (KERNEL_NSIG+7)/8); + } + #endif + #if (defined(__aarch64__)) || \ + (defined(__mips__) && (_MIPS_ISA == _MIPS_ISA_MIPS64)) + LSS_INLINE _syscall6(void*, mmap, void*, s, + size_t, l, int, p, + int, f, int, d, + __off64_t, o) + LSS_INLINE int LSS_NAME(sigaction)(int signum, + const struct kernel_sigaction *act, + struct kernel_sigaction *oldact) { + return LSS_NAME(rt_sigaction)(signum, act, oldact, (KERNEL_NSIG+7)/8); + + } + LSS_INLINE int LSS_NAME(sigprocmask)(int how, + const struct kernel_sigset_t *set, + struct kernel_sigset_t *oldset) { + return LSS_NAME(rt_sigprocmask)(how, set, oldset, (KERNEL_NSIG+7)/8); + } + #endif + #ifdef __NR_wait4 + LSS_INLINE _syscall4(pid_t, wait4, pid_t, p, + int*, s, int, o, + struct kernel_rusage*, r) + LSS_INLINE pid_t LSS_NAME(waitpid)(pid_t pid, int *status, int options){ + return LSS_NAME(wait4)(pid, status, options, 0); + } + #else + LSS_INLINE _syscall3(pid_t, waitpid, pid_t, p, + int*, s, int, o) + #endif + #ifdef __NR_openat + LSS_INLINE _syscall4(int, openat, int, d, const char *, p, int, f, int, m) + LSS_INLINE int LSS_NAME(open)(const char* p, int f, int m) { + return LSS_NAME(openat)(AT_FDCWD,p,f,m ); + } + #else + LSS_INLINE _syscall3(int, open, const char*, p, + int, f, int, m) + #endif + LSS_INLINE int LSS_NAME(sigemptyset)(struct kernel_sigset_t *set) { + memset(&set->sig, 0, sizeof(set->sig)); + return 0; + } + + LSS_INLINE int LSS_NAME(sigfillset)(struct kernel_sigset_t *set) { + memset(&set->sig, -1, sizeof(set->sig)); + return 0; + } + + LSS_INLINE int LSS_NAME(sigaddset)(struct kernel_sigset_t *set, + int signum) { + if (signum < 1 || signum > (int)(8*sizeof(set->sig))) { + LSS_ERRNO = EINVAL; + return -1; + } else { + set->sig[(signum - 1)/(8*sizeof(set->sig[0]))] + |= 1UL << ((signum - 1) % (8*sizeof(set->sig[0]))); + return 0; + } + } + + LSS_INLINE int LSS_NAME(sigdelset)(struct kernel_sigset_t *set, + int signum) { + if (signum < 1 || signum > (int)(8*sizeof(set->sig))) { + LSS_ERRNO = EINVAL; + return -1; + } else { + set->sig[(signum - 1)/(8*sizeof(set->sig[0]))] + &= ~(1UL << ((signum - 1) % (8*sizeof(set->sig[0])))); + return 0; + } + } + + #if defined(__i386__) || \ + defined(__arm__) || \ + (defined(__mips__) && _MIPS_SIM == _MIPS_SIM_ABI32) || defined(__PPC__) + #define __NR__sigaction __NR_sigaction + #define __NR__sigprocmask __NR_sigprocmask + LSS_INLINE _syscall2(int, fstat64, int, f, + struct kernel_stat64 *, b) + LSS_INLINE _syscall5(int, _llseek, uint, fd, ulong, hi, ulong, lo, + loff_t *, res, uint, wh) +#ifdef __PPC64__ + LSS_INLINE _syscall6(void*, mmap, void*, s, + size_t, l, int, p, + int, f, int, d, + off_t, o) +#else + #ifndef __ARM_EABI__ + /* Not available on ARM EABI Linux. */ + LSS_INLINE _syscall1(void*, mmap, void*, a) + #endif + LSS_INLINE _syscall6(void*, mmap2, void*, s, + size_t, l, int, p, + int, f, int, d, + off_t, o) +#endif + LSS_INLINE _syscall3(int, _sigaction, int, s, + const struct kernel_old_sigaction*, a, + struct kernel_old_sigaction*, o) + LSS_INLINE _syscall3(int, _sigprocmask, int, h, + const unsigned long*, s, + unsigned long*, o) + LSS_INLINE _syscall2(int, stat64, const char *, p, + struct kernel_stat64 *, b) + + LSS_INLINE int LSS_NAME(sigaction)(int signum, + const struct kernel_sigaction *act, + struct kernel_sigaction *oldact) { + int old_errno = LSS_ERRNO; + int rc; + struct kernel_sigaction a; + if (act != NULL) { + a = *act; + #ifdef __i386__ + /* On i386, the kernel requires us to always set our own + * SA_RESTORER when using realtime signals. Otherwise, it does not + * know how to return from a signal handler. This function must have + * a "magic" signature that the "gdb" (and maybe the kernel?) can + * recognize. + * Apparently, a SA_RESTORER is implicitly set by the kernel, when + * using non-realtime signals. + * + * TODO: Test whether ARM needs a restorer + */ + if (!(a.sa_flags & SA_RESTORER)) { + a.sa_flags |= SA_RESTORER; + a.sa_restorer = (a.sa_flags & SA_SIGINFO) + ? LSS_NAME(restore_rt)() : LSS_NAME(restore)(); + } + #endif + } + rc = LSS_NAME(rt_sigaction)(signum, act ? &a : act, oldact, + (KERNEL_NSIG+7)/8); + if (rc < 0 && LSS_ERRNO == ENOSYS) { + struct kernel_old_sigaction oa, ooa, *ptr_a = &oa, *ptr_oa = &ooa; + if (!act) { + ptr_a = NULL; + } else { + oa.sa_handler_ = act->sa_handler_; + memcpy(&oa.sa_mask, &act->sa_mask, sizeof(oa.sa_mask)); + #ifndef __mips__ + oa.sa_restorer = act->sa_restorer; + #endif + oa.sa_flags = act->sa_flags; + } + if (!oldact) { + ptr_oa = NULL; + } + LSS_ERRNO = old_errno; + rc = LSS_NAME(_sigaction)(signum, ptr_a, ptr_oa); + if (rc == 0 && oldact) { + if (act) { + memcpy(oldact, act, sizeof(*act)); + } else { + memset(oldact, 0, sizeof(*oldact)); + } + oldact->sa_handler_ = ptr_oa->sa_handler_; + oldact->sa_flags = ptr_oa->sa_flags; + memcpy(&oldact->sa_mask, &ptr_oa->sa_mask, sizeof(ptr_oa->sa_mask)); + #ifndef __mips__ + oldact->sa_restorer = ptr_oa->sa_restorer; + #endif + } + } + return rc; + } + + LSS_INLINE int LSS_NAME(sigprocmask)(int how, + const struct kernel_sigset_t *set, + struct kernel_sigset_t *oldset) { + int olderrno = LSS_ERRNO; + int rc = LSS_NAME(rt_sigprocmask)(how, set, oldset, (KERNEL_NSIG+7)/8); + if (rc < 0 && LSS_ERRNO == ENOSYS) { + LSS_ERRNO = olderrno; + if (oldset) { + LSS_NAME(sigemptyset)(oldset); + } + rc = LSS_NAME(_sigprocmask)(how, + set ? &set->sig[0] : NULL, + oldset ? &oldset->sig[0] : NULL); + } + return rc; + } + #endif + #if defined(__i386__) || \ + defined(__PPC__) || \ + (defined(__arm__) && !defined(__ARM_EABI__)) || \ + (defined(__mips__) && _MIPS_SIM == _MIPS_SIM_ABI32) || \ + defined(__s390x__) + + /* See sys_socketcall in net/socket.c in kernel source. + * It de-multiplexes on its first arg and unpacks the arglist + * array in its second arg. + */ + LSS_INLINE _syscall2(int, socketcall, int, c, unsigned long*, a) + + LSS_INLINE int LSS_NAME(socket)(int domain, int type, int protocol) { + unsigned long args[3] = { + (unsigned long) domain, + (unsigned long) type, + (unsigned long) protocol + }; + return LSS_NAME(socketcall)(1, args); + } + #elif defined(__ARM_EABI__) + LSS_INLINE _syscall3(int, socket, int, d, + int, t, int, p) + #endif + #if defined(__mips__) + /* sys_pipe() on MIPS has non-standard calling conventions, as it returns + * both file handles through CPU registers. + */ + LSS_INLINE int LSS_NAME(pipe)(int *p) { + register unsigned long __v0 __asm__("$2") = __NR_pipe; + register unsigned long __v1 __asm__("$3"); + register unsigned long __r7 __asm__("$7"); + __asm__ __volatile__ ("syscall\n" + : "=&r"(__v0), "=&r"(__v1), "+r" (__r7) + : "0"(__v0) + : "$8", "$9", "$10", "$11", "$12", + "$13", "$14", "$15", "$24", "memory"); + if (__r7) { + LSS_ERRNO = __v0; + return -1; + } else { + p[0] = __v0; + p[1] = __v1; + return 0; + } + } + #elif defined(__NR_pipe2) + LSS_INLINE _syscall2(int, pipe2, int *, p, + int, f ) + LSS_INLINE int LSS_NAME(pipe)( int * p) { + return LSS_NAME(pipe2)(p, 0); + } + #else + LSS_INLINE _syscall1(int, pipe, int *, p) + #endif + + LSS_INLINE pid_t LSS_NAME(gettid)() { + pid_t tid = LSS_NAME(_gettid)(); + if (tid != -1) { + return tid; + } + return LSS_NAME(getpid)(); + } + + LSS_INLINE void *LSS_NAME(mremap)(void *old_address, size_t old_size, + size_t new_size, int flags, ...) { + va_list ap; + void *new_address, *rc; + va_start(ap, flags); + new_address = va_arg(ap, void *); + rc = LSS_NAME(_mremap)(old_address, old_size, new_size, + flags, new_address); + va_end(ap); + return rc; + } + + LSS_INLINE int LSS_NAME(ptrace_detach)(pid_t pid) { + /* PTRACE_DETACH can sometimes forget to wake up the tracee and it + * then sends job control signals to the real parent, rather than to + * the tracer. We reduce the risk of this happening by starting a + * whole new time slice, and then quickly sending a SIGCONT signal + * right after detaching from the tracee. + */ + int rc, err; + LSS_NAME(sched_yield)(); + rc = LSS_NAME(ptrace)(PTRACE_DETACH, pid, (void *)0, (void *)0); + err = LSS_ERRNO; + LSS_NAME(kill)(pid, SIGCONT); + LSS_ERRNO = err; + return rc; + } +#endif + +#if defined(__cplusplus) && !defined(SYS_CPLUSPLUS) +} +#endif + +#endif +#endif diff --git a/src/third_party/gperftools-2.5/src/base/linuxthreads.cc b/src/third_party/gperftools-2.5/src/base/linuxthreads.cc new file mode 100644 index 00000000000..891e70c88c4 --- /dev/null +++ b/src/third_party/gperftools-2.5/src/base/linuxthreads.cc @@ -0,0 +1,707 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +/* Copyright (c) 2005-2007, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: Markus Gutschke + */ + +#include "base/linuxthreads.h" + +#ifdef THREADS +#ifdef __cplusplus +extern "C" { +#endif + +#include <sched.h> +#include <signal.h> +#include <stdlib.h> +#include <string.h> +#include <fcntl.h> +#include <sys/socket.h> +#include <sys/wait.h> +#include <sys/prctl.h> +#include <semaphore.h> + +#include "base/linux_syscall_support.h" +#include "base/thread_lister.h" + +#ifndef CLONE_UNTRACED +#define CLONE_UNTRACED 0x00800000 +#endif + + +/* Synchronous signals that should not be blocked while in the lister thread. + */ +static const int sync_signals[] = { SIGABRT, SIGILL, SIGFPE, SIGSEGV, SIGBUS, + SIGXCPU, SIGXFSZ }; + +/* itoa() is not a standard function, and we cannot safely call printf() + * after suspending threads. So, we just implement our own copy. A + * recursive approach is the easiest here. + */ +static char *local_itoa(char *buf, int i) { + if (i < 0) { + *buf++ = '-'; + return local_itoa(buf, -i); + } else { + if (i >= 10) + buf = local_itoa(buf, i/10); + *buf++ = (i%10) + '0'; + *buf = '\000'; + return buf; + } +} + + +/* Wrapper around clone() that runs "fn" on the same stack as the + * caller! Unlike fork(), the cloned thread shares the same address space. + * The caller must be careful to use only minimal amounts of stack until + * the cloned thread has returned. + * There is a good chance that the cloned thread and the caller will share + * the same copy of errno! + */ +#ifdef __GNUC__ +#if __GNUC__ == 3 && __GNUC_MINOR__ >= 1 || __GNUC__ > 3 +/* Try to force this function into a separate stack frame, and make sure + * that arguments are passed on the stack. + */ +static int local_clone (int (*fn)(void *), void *arg, ...) + __attribute__ ((noinline)); +#endif +#endif + +/* To avoid the gap cross page boundaries, increase by the large parge + * size mostly PowerPC system uses. */ +#ifdef __PPC64__ +#define CLONE_STACK_SIZE 65536 +#else +#define CLONE_STACK_SIZE 4096 +#endif + +static int local_clone (int (*fn)(void *), void *arg, ...) { + /* Leave 4kB of gap between the callers stack and the new clone. This + * should be more than sufficient for the caller to call waitpid() until + * the cloned thread terminates. + * + * It is important that we set the CLONE_UNTRACED flag, because newer + * versions of "gdb" otherwise attempt to attach to our thread, and will + * attempt to reap its status codes. This subsequently results in the + * caller hanging indefinitely in waitpid(), waiting for a change in + * status that will never happen. By setting the CLONE_UNTRACED flag, we + * prevent "gdb" from stealing events, but we still expect the thread + * lister to fail, because it cannot PTRACE_ATTACH to the process that + * is being debugged. This is OK and the error code will be reported + * correctly. + */ + return sys_clone(fn, (char *)&arg - CLONE_STACK_SIZE, + CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_UNTRACED, arg, 0, 0, 0); +} + + +/* Local substitute for the atoi() function, which is not necessarily safe + * to call once threads are suspended (depending on whether libc looks up + * locale information, when executing atoi()). + */ +static int local_atoi(const char *s) { + int n = 0; + int neg = *s == '-'; + if (neg) + s++; + while (*s >= '0' && *s <= '9') + n = 10*n + (*s++ - '0'); + return neg ? -n : n; +} + + +/* Re-runs fn until it doesn't cause EINTR + */ +#define NO_INTR(fn) do {} while ((fn) < 0 && errno == EINTR) + + +/* Wrap a class around system calls, in order to give us access to + * a private copy of errno. This only works in C++, but it has the + * advantage of not needing nested functions, which are a non-standard + * language extension. + */ +#ifdef __cplusplus +namespace { + class SysCalls { + public: + #define SYS_CPLUSPLUS + #define SYS_ERRNO my_errno + #define SYS_INLINE inline + #define SYS_PREFIX -1 + #undef SYS_LINUX_SYSCALL_SUPPORT_H + #include "linux_syscall_support.h" + SysCalls() : my_errno(0) { } + int my_errno; + }; +} +#define ERRNO sys.my_errno +#else +#define ERRNO my_errno +#endif + + +/* Wrapper for open() which is guaranteed to never return EINTR. + */ +static int c_open(const char *fname, int flags, int mode) { + ssize_t rc; + NO_INTR(rc = sys_open(fname, flags, mode)); + return rc; +} + + +/* abort() is not safely reentrant, and changes it's behavior each time + * it is called. This means, if the main application ever called abort() + * we cannot safely call it again. This would happen if we were called + * from a SIGABRT signal handler in the main application. So, document + * that calling SIGABRT from the thread lister makes it not signal safe + * (and vice-versa). + * Also, since we share address space with the main application, we + * cannot call abort() from the callback and expect the main application + * to behave correctly afterwards. In fact, the only thing we can do, is + * to terminate the main application with extreme prejudice (aka + * PTRACE_KILL). + * We set up our own SIGABRT handler to do this. + * In order to find the main application from the signal handler, we + * need to store information about it in global variables. This is + * safe, because the main application should be suspended at this + * time. If the callback ever called TCMalloc_ResumeAllProcessThreads(), then + * we are running a higher risk, though. So, try to avoid calling + * abort() after calling TCMalloc_ResumeAllProcessThreads. + */ +static volatile int *sig_pids, sig_num_threads, sig_proc, sig_marker; + + +/* Signal handler to help us recover from dying while we are attached to + * other threads. + */ +static void SignalHandler(int signum, siginfo_t *si, void *data) { + if (sig_pids != NULL) { + if (signum == SIGABRT) { + while (sig_num_threads-- > 0) { + /* Not sure if sched_yield is really necessary here, but it does not */ + /* hurt, and it might be necessary for the same reasons that we have */ + /* to do so in sys_ptrace_detach(). */ + sys_sched_yield(); + sys_ptrace(PTRACE_KILL, sig_pids[sig_num_threads], 0, 0); + } + } else if (sig_num_threads > 0) { + TCMalloc_ResumeAllProcessThreads(sig_num_threads, (int *)sig_pids); + } + } + sig_pids = NULL; + if (sig_marker >= 0) + NO_INTR(sys_close(sig_marker)); + sig_marker = -1; + if (sig_proc >= 0) + NO_INTR(sys_close(sig_proc)); + sig_proc = -1; + + sys__exit(signum == SIGABRT ? 1 : 2); +} + + +/* Try to dirty the stack, and hope that the compiler is not smart enough + * to optimize this function away. Or worse, the compiler could inline the + * function and permanently allocate the data on the stack. + */ +static void DirtyStack(size_t amount) { + char buf[amount]; + memset(buf, 0, amount); + sys_read(-1, buf, amount); +} + + +/* Data structure for passing arguments to the lister thread. + */ +#define ALT_STACKSIZE (MINSIGSTKSZ + 4096) + +struct ListerParams { + int result, err; + char *altstack_mem; + ListAllProcessThreadsCallBack callback; + void *parameter; + va_list ap; + sem_t *lock; +}; + + +static void ListerThread(struct ListerParams *args) { + int found_parent = 0; + pid_t clone_pid = sys_gettid(), ppid = sys_getppid(); + char proc_self_task[80], marker_name[48], *marker_path; + const char *proc_paths[3]; + const char *const *proc_path = proc_paths; + int proc = -1, marker = -1, num_threads = 0; + int max_threads = 0, sig; + struct kernel_stat marker_sb, proc_sb; + stack_t altstack; + + /* Wait for parent thread to set appropriate permissions + * to allow ptrace activity + */ + if (sem_wait(args->lock) < 0) { + goto failure; + } + + /* Create "marker" that we can use to detect threads sharing the same + * address space and the same file handles. By setting the FD_CLOEXEC flag + * we minimize the risk of misidentifying child processes as threads; + * and since there is still a race condition, we will filter those out + * later, anyway. + */ + if ((marker = sys_socket(PF_LOCAL, SOCK_DGRAM, 0)) < 0 || + sys_fcntl(marker, F_SETFD, FD_CLOEXEC) < 0) { + failure: + args->result = -1; + args->err = errno; + if (marker >= 0) + NO_INTR(sys_close(marker)); + sig_marker = marker = -1; + if (proc >= 0) + NO_INTR(sys_close(proc)); + sig_proc = proc = -1; + sys__exit(1); + } + + /* Compute search paths for finding thread directories in /proc */ + local_itoa(strrchr(strcpy(proc_self_task, "/proc/"), '\000'), ppid); + strcpy(marker_name, proc_self_task); + marker_path = marker_name + strlen(marker_name); + strcat(proc_self_task, "/task/"); + proc_paths[0] = proc_self_task; /* /proc/$$/task/ */ + proc_paths[1] = "/proc/"; /* /proc/ */ + proc_paths[2] = NULL; + + /* Compute path for marker socket in /proc */ + local_itoa(strcpy(marker_path, "/fd/") + 4, marker); + if (sys_stat(marker_name, &marker_sb) < 0) { + goto failure; + } + + /* Catch signals on an alternate pre-allocated stack. This way, we can + * safely execute the signal handler even if we ran out of memory. + */ + memset(&altstack, 0, sizeof(altstack)); + altstack.ss_sp = args->altstack_mem; + altstack.ss_flags = 0; + altstack.ss_size = ALT_STACKSIZE; + sys_sigaltstack(&altstack, (const stack_t *)NULL); + + /* Some kernels forget to wake up traced processes, when the + * tracer dies. So, intercept synchronous signals and make sure + * that we wake up our tracees before dying. It is the caller's + * responsibility to ensure that asynchronous signals do not + * interfere with this function. + */ + sig_marker = marker; + sig_proc = -1; + for (sig = 0; sig < sizeof(sync_signals)/sizeof(*sync_signals); sig++) { + struct kernel_sigaction sa; + memset(&sa, 0, sizeof(sa)); + sa.sa_sigaction_ = SignalHandler; + sys_sigfillset(&sa.sa_mask); + sa.sa_flags = SA_ONSTACK|SA_SIGINFO|SA_RESETHAND; + sys_sigaction(sync_signals[sig], &sa, (struct kernel_sigaction *)NULL); + } + + /* Read process directories in /proc/... */ + for (;;) { + /* Some kernels know about threads, and hide them in "/proc" + * (although they are still there, if you know the process + * id). Threads are moved into a separate "task" directory. We + * check there first, and then fall back on the older naming + * convention if necessary. + */ + if ((sig_proc = proc = c_open(*proc_path, O_RDONLY|O_DIRECTORY, 0)) < 0) { + if (*++proc_path != NULL) + continue; + goto failure; + } + if (sys_fstat(proc, &proc_sb) < 0) + goto failure; + + /* Since we are suspending threads, we cannot call any libc + * functions that might acquire locks. Most notably, we cannot + * call malloc(). So, we have to allocate memory on the stack, + * instead. Since we do not know how much memory we need, we + * make a best guess. And if we guessed incorrectly we retry on + * a second iteration (by jumping to "detach_threads"). + * + * Unless the number of threads is increasing very rapidly, we + * should never need to do so, though, as our guestimate is very + * conservative. + */ + if (max_threads < proc_sb.st_nlink + 100) + max_threads = proc_sb.st_nlink + 100; + + /* scope */ { + pid_t pids[max_threads]; + int added_entries = 0; + sig_num_threads = num_threads; + sig_pids = pids; + for (;;) { + struct KERNEL_DIRENT *entry; + char buf[4096]; + ssize_t nbytes = GETDENTS(proc, (struct KERNEL_DIRENT *)buf, + sizeof(buf)); + if (nbytes < 0) + goto failure; + else if (nbytes == 0) { + if (added_entries) { + /* Need to keep iterating over "/proc" in multiple + * passes until we no longer find any more threads. This + * algorithm eventually completes, when all threads have + * been suspended. + */ + added_entries = 0; + sys_lseek(proc, 0, SEEK_SET); + continue; + } + break; + } + for (entry = (struct KERNEL_DIRENT *)buf; + entry < (struct KERNEL_DIRENT *)&buf[nbytes]; + entry = (struct KERNEL_DIRENT *)((char *)entry+entry->d_reclen)) { + if (entry->d_ino != 0) { + const char *ptr = entry->d_name; + pid_t pid; + + /* Some kernels hide threads by preceding the pid with a '.' */ + if (*ptr == '.') + ptr++; + + /* If the directory is not numeric, it cannot be a + * process/thread + */ + if (*ptr < '0' || *ptr > '9') + continue; + pid = local_atoi(ptr); + + /* Attach (and suspend) all threads */ + if (pid && pid != clone_pid) { + struct kernel_stat tmp_sb; + char fname[entry->d_reclen + 48]; + strcat(strcat(strcpy(fname, "/proc/"), + entry->d_name), marker_path); + + /* Check if the marker is identical to the one we created */ + if (sys_stat(fname, &tmp_sb) >= 0 && + marker_sb.st_ino == tmp_sb.st_ino) { + long i, j; + + /* Found one of our threads, make sure it is no duplicate */ + for (i = 0; i < num_threads; i++) { + /* Linear search is slow, but should not matter much for + * the typically small number of threads. + */ + if (pids[i] == pid) { + /* Found a duplicate; most likely on second pass */ + goto next_entry; + } + } + + /* Check whether data structure needs growing */ + if (num_threads >= max_threads) { + /* Back to square one, this time with more memory */ + NO_INTR(sys_close(proc)); + goto detach_threads; + } + + /* Attaching to thread suspends it */ + pids[num_threads++] = pid; + sig_num_threads = num_threads; + if (sys_ptrace(PTRACE_ATTACH, pid, (void *)0, + (void *)0) < 0) { + /* If operation failed, ignore thread. Maybe it + * just died? There might also be a race + * condition with a concurrent core dumper or + * with a debugger. In that case, we will just + * make a best effort, rather than failing + * entirely. + */ + num_threads--; + sig_num_threads = num_threads; + goto next_entry; + } + while (sys_waitpid(pid, (int *)0, __WALL) < 0) { + if (errno != EINTR) { + sys_ptrace_detach(pid); + num_threads--; + sig_num_threads = num_threads; + goto next_entry; + } + } + + if (sys_ptrace(PTRACE_PEEKDATA, pid, &i, &j) || i++ != j || + sys_ptrace(PTRACE_PEEKDATA, pid, &i, &j) || i != j) { + /* Address spaces are distinct, even though both + * processes show the "marker". This is probably + * a forked child process rather than a thread. + */ + sys_ptrace_detach(pid); + num_threads--; + sig_num_threads = num_threads; + } else { + found_parent |= pid == ppid; + added_entries++; + } + } + } + } + next_entry:; + } + } + NO_INTR(sys_close(proc)); + sig_proc = proc = -1; + + /* If we failed to find any threads, try looking somewhere else in + * /proc. Maybe, threads are reported differently on this system. + */ + if (num_threads > 1 || !*++proc_path) { + NO_INTR(sys_close(marker)); + sig_marker = marker = -1; + + /* If we never found the parent process, something is very wrong. + * Most likely, we are running in debugger. Any attempt to operate + * on the threads would be very incomplete. Let's just report an + * error to the caller. + */ + if (!found_parent) { + TCMalloc_ResumeAllProcessThreads(num_threads, pids); + sys__exit(3); + } + + /* Now we are ready to call the callback, + * which takes care of resuming the threads for us. + */ + args->result = args->callback(args->parameter, num_threads, + pids, args->ap); + args->err = errno; + + /* Callback should have resumed threads, but better safe than sorry */ + if (TCMalloc_ResumeAllProcessThreads(num_threads, pids)) { + /* Callback forgot to resume at least one thread, report error */ + args->err = EINVAL; + args->result = -1; + } + + sys__exit(0); + } + detach_threads: + /* Resume all threads prior to retrying the operation */ + TCMalloc_ResumeAllProcessThreads(num_threads, pids); + sig_pids = NULL; + num_threads = 0; + sig_num_threads = num_threads; + max_threads += 100; + } + } +} + + +/* This function gets the list of all linux threads of the current process + * passes them to the 'callback' along with the 'parameter' pointer; at the + * call back call time all the threads are paused via + * PTRACE_ATTACH. + * The callback is executed from a separate thread which shares only the + * address space, the filesystem, and the filehandles with the caller. Most + * notably, it does not share the same pid and ppid; and if it terminates, + * the rest of the application is still there. 'callback' is supposed to do + * or arrange for TCMalloc_ResumeAllProcessThreads. This happens automatically, if + * the thread raises a synchronous signal (e.g. SIGSEGV); asynchronous + * signals are blocked. If the 'callback' decides to unblock them, it must + * ensure that they cannot terminate the application, or that + * TCMalloc_ResumeAllProcessThreads will get called. + * It is an error for the 'callback' to make any library calls that could + * acquire locks. Most notably, this means that most system calls have to + * avoid going through libc. Also, this means that it is not legal to call + * exit() or abort(). + * We return -1 on error and the return value of 'callback' on success. + */ +int TCMalloc_ListAllProcessThreads(void *parameter, + ListAllProcessThreadsCallBack callback, ...) { + char altstack_mem[ALT_STACKSIZE]; + struct ListerParams args; + pid_t clone_pid; + int dumpable = 1, sig; + struct kernel_sigset_t sig_blocked, sig_old; + sem_t lock; + + va_start(args.ap, callback); + + /* If we are short on virtual memory, initializing the alternate stack + * might trigger a SIGSEGV. Let's do this early, before it could get us + * into more trouble (i.e. before signal handlers try to use the alternate + * stack, and before we attach to other threads). + */ + memset(altstack_mem, 0, sizeof(altstack_mem)); + + /* Some of our cleanup functions could conceivable use more stack space. + * Try to touch the stack right now. This could be defeated by the compiler + * being too smart for it's own good, so try really hard. + */ + DirtyStack(32768); + + /* Make this process "dumpable". This is necessary in order to ptrace() + * after having called setuid(). + */ + dumpable = sys_prctl(PR_GET_DUMPABLE, 0); + if (!dumpable) + sys_prctl(PR_SET_DUMPABLE, 1); + + /* Fill in argument block for dumper thread */ + args.result = -1; + args.err = 0; + args.altstack_mem = altstack_mem; + args.parameter = parameter; + args.callback = callback; + args.lock = &lock; + + /* Before cloning the thread lister, block all asynchronous signals, as we */ + /* are not prepared to handle them. */ + sys_sigfillset(&sig_blocked); + for (sig = 0; sig < sizeof(sync_signals)/sizeof(*sync_signals); sig++) { + sys_sigdelset(&sig_blocked, sync_signals[sig]); + } + if (sys_sigprocmask(SIG_BLOCK, &sig_blocked, &sig_old)) { + args.err = errno; + args.result = -1; + goto failed; + } + + /* scope */ { + /* After cloning, both the parent and the child share the same instance + * of errno. We must make sure that at least one of these processes + * (in our case, the parent) uses modified syscall macros that update + * a local copy of errno, instead. + */ + #ifdef __cplusplus + #define sys0_sigprocmask sys.sigprocmask + #define sys0_waitpid sys.waitpid + SysCalls sys; + #else + int my_errno; + #define SYS_ERRNO my_errno + #define SYS_INLINE inline + #define SYS_PREFIX 0 + #undef SYS_LINUX_SYSCALL_SUPPORT_H + #include "linux_syscall_support.h" + #endif + + /* Lock before clone so that parent can set + * ptrace permissions (if necessary) prior + * to ListerThread actually executing + */ + if (sem_init(&lock, 0, 0) == 0) { + + int clone_errno; + clone_pid = local_clone((int (*)(void *))ListerThread, &args); + clone_errno = errno; + + sys_sigprocmask(SIG_SETMASK, &sig_old, &sig_old); + + if (clone_pid >= 0) { +#ifdef PR_SET_PTRACER + /* In newer versions of glibc permission must explicitly + * be given to allow for ptrace. + */ + prctl(PR_SET_PTRACER, clone_pid, 0, 0, 0); +#endif + /* Releasing the lock here allows the + * ListerThread to execute and ptrace us. + */ + sem_post(&lock); + int status, rc; + while ((rc = sys0_waitpid(clone_pid, &status, __WALL)) < 0 && + ERRNO == EINTR) { + /* Keep waiting */ + } + if (rc < 0) { + args.err = ERRNO; + args.result = -1; + } else if (WIFEXITED(status)) { + switch (WEXITSTATUS(status)) { + case 0: break; /* Normal process termination */ + case 2: args.err = EFAULT; /* Some fault (e.g. SIGSEGV) detected */ + args.result = -1; + break; + case 3: args.err = EPERM; /* Process is already being traced */ + args.result = -1; + break; + default:args.err = ECHILD; /* Child died unexpectedly */ + args.result = -1; + break; + } + } else if (!WIFEXITED(status)) { + args.err = EFAULT; /* Terminated due to an unhandled signal*/ + args.result = -1; + } + sem_destroy(&lock); + } else { + args.result = -1; + args.err = clone_errno; + } + } else { + args.result = -1; + args.err = errno; + } + } + + /* Restore the "dumpable" state of the process */ +failed: + if (!dumpable) + sys_prctl(PR_SET_DUMPABLE, dumpable); + + va_end(args.ap); + + errno = args.err; + return args.result; +} + +/* This function resumes the list of all linux threads that + * TCMalloc_ListAllProcessThreads pauses before giving to its callback. + * The function returns non-zero if at least one thread was + * suspended and has now been resumed. + */ +int TCMalloc_ResumeAllProcessThreads(int num_threads, pid_t *thread_pids) { + int detached_at_least_one = 0; + while (num_threads-- > 0) { + detached_at_least_one |= sys_ptrace_detach(thread_pids[num_threads]) >= 0; + } + return detached_at_least_one; +} + +#ifdef __cplusplus +} +#endif +#endif diff --git a/src/third_party/gperftools-2.5/src/base/linuxthreads.h b/src/third_party/gperftools-2.5/src/base/linuxthreads.h new file mode 100644 index 00000000000..82965af905d --- /dev/null +++ b/src/third_party/gperftools-2.5/src/base/linuxthreads.h @@ -0,0 +1,54 @@ +/* Copyright (c) 2005-2007, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: Markus Gutschke + */ + +#ifndef _LINUXTHREADS_H +#define _LINUXTHREADS_H + +/* Include thread_lister.h to get the interface that we implement for linux. + */ + +/* We currently only support x86-32 and x86-64 on Linux. Porting to other + * related platforms should not be difficult. + */ +#if (defined(__i386__) || defined(__x86_64__) || defined(__ARM_ARCH_3__) || \ + defined(__mips__) || defined(__PPC__) || defined(__aarch64__) || \ + defined(__s390x__)) && defined(__linux) + +/* Define the THREADS symbol to make sure that there is exactly one core dumper + * built into the library. + */ +#define THREADS "Linux /proc" + +#endif + +#endif /* _LINUXTHREADS_H */ diff --git a/src/third_party/gperftools-2.5/src/base/logging.cc b/src/third_party/gperftools-2.5/src/base/logging.cc new file mode 100644 index 00000000000..761c2fd582e --- /dev/null +++ b/src/third_party/gperftools-2.5/src/base/logging.cc @@ -0,0 +1,108 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2007, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// This file just provides storage for FLAGS_verbose. + +#include <config.h> +#include "base/logging.h" +#include "base/commandlineflags.h" + +DEFINE_int32(verbose, EnvToInt("PERFTOOLS_VERBOSE", 0), + "Set to numbers >0 for more verbose output, or <0 for less. " + "--verbose == -4 means we log fatal errors only."); + + +#if defined(_WIN32) || defined(__CYGWIN__) || defined(__CYGWIN32__) + +// While windows does have a POSIX-compatible API +// (_open/_write/_close), it acquires memory. Using this lower-level +// windows API is the closest we can get to being "raw". +RawFD RawOpenForWriting(const char* filename) { + // CreateFile allocates memory if file_name isn't absolute, so if + // that ever becomes a problem then we ought to compute the absolute + // path on its behalf (perhaps the ntdll/kernel function isn't aware + // of the working directory?) + RawFD fd = CreateFileA(filename, GENERIC_WRITE, 0, NULL, + CREATE_ALWAYS, 0, NULL); + if (fd != kIllegalRawFD && GetLastError() == ERROR_ALREADY_EXISTS) + SetEndOfFile(fd); // truncate the existing file + return fd; +} + +void RawWrite(RawFD handle, const char* buf, size_t len) { + while (len > 0) { + DWORD wrote; + BOOL ok = WriteFile(handle, buf, len, &wrote, NULL); + // We do not use an asynchronous file handle, so ok==false means an error + if (!ok) break; + buf += wrote; + len -= wrote; + } +} + +void RawClose(RawFD handle) { + CloseHandle(handle); +} + +#else // _WIN32 || __CYGWIN__ || __CYGWIN32__ + +#ifdef HAVE_SYS_TYPES_H +#include <sys/types.h> +#endif +#ifdef HAVE_UNISTD_H +#include <unistd.h> +#endif +#ifdef HAVE_FCNTL_H +#include <fcntl.h> +#endif + +// Re-run fn until it doesn't cause EINTR. +#define NO_INTR(fn) do {} while ((fn) < 0 && errno == EINTR) + +RawFD RawOpenForWriting(const char* filename) { + return open(filename, O_WRONLY|O_CREAT|O_TRUNC, 0664); +} + +void RawWrite(RawFD fd, const char* buf, size_t len) { + while (len > 0) { + ssize_t r; + NO_INTR(r = write(fd, buf, len)); + if (r <= 0) break; + buf += r; + len -= r; + } +} + +void RawClose(RawFD fd) { + NO_INTR(close(fd)); +} + +#endif // _WIN32 || __CYGWIN__ || __CYGWIN32__ diff --git a/src/third_party/gperftools-2.5/src/base/logging.h b/src/third_party/gperftools-2.5/src/base/logging.h new file mode 100644 index 00000000000..a1afe4dca6e --- /dev/null +++ b/src/third_party/gperftools-2.5/src/base/logging.h @@ -0,0 +1,259 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// This file contains #include information about logging-related stuff. +// Pretty much everybody needs to #include this file so that they can +// log various happenings. +// +#ifndef _LOGGING_H_ +#define _LOGGING_H_ + +#include <config.h> +#include <stdarg.h> +#include <stdlib.h> +#include <stdio.h> +#ifdef HAVE_UNISTD_H +#include <unistd.h> // for write() +#endif +#include <string.h> // for strlen(), strcmp() +#include <assert.h> +#include <errno.h> // for errno +#include "base/commandlineflags.h" + +// On some systems (like freebsd), we can't call write() at all in a +// global constructor, perhaps because errno hasn't been set up. +// (In windows, we can't call it because it might call malloc.) +// Calling the write syscall is safer (it doesn't set errno), so we +// prefer that. Note we don't care about errno for logging: we just +// do logging on a best-effort basis. +#if defined(_MSC_VER) +#define WRITE_TO_STDERR(buf, len) WriteToStderr(buf, len); // in port.cc +#elif defined(HAVE_SYS_SYSCALL_H) +#include <sys/syscall.h> +#define WRITE_TO_STDERR(buf, len) syscall(SYS_write, STDERR_FILENO, buf, len) +#else +#define WRITE_TO_STDERR(buf, len) write(STDERR_FILENO, buf, len) +#endif + +// MSVC and mingw define their own, safe version of vnsprintf (the +// windows one in broken) in port.cc. Everyone else can use the +// version here. We had to give it a unique name for windows. +#ifndef _WIN32 +# define perftools_vsnprintf vsnprintf +#endif + + +// We log all messages at this log-level and below. +// INFO == -1, WARNING == -2, ERROR == -3, FATAL == -4 +DECLARE_int32(verbose); + +// CHECK dies with a fatal error if condition is not true. It is *not* +// controlled by NDEBUG, so the check will be executed regardless of +// compilation mode. Therefore, it is safe to do things like: +// CHECK(fp->Write(x) == 4) +// Note we use write instead of printf/puts to avoid the risk we'll +// call malloc(). +#define CHECK(condition) \ + do { \ + if (!(condition)) { \ + WRITE_TO_STDERR("Check failed: " #condition "\n", \ + sizeof("Check failed: " #condition "\n")-1); \ + abort(); \ + } \ + } while (0) + +// This takes a message to print. The name is historical. +#define RAW_CHECK(condition, message) \ + do { \ + if (!(condition)) { \ + WRITE_TO_STDERR("Check failed: " #condition ": " message "\n", \ + sizeof("Check failed: " #condition ": " message "\n")-1);\ + abort(); \ + } \ + } while (0) + +// This is like RAW_CHECK, but only in debug-mode +#ifdef NDEBUG +enum { DEBUG_MODE = 0 }; +#define RAW_DCHECK(condition, message) +#else +enum { DEBUG_MODE = 1 }; +#define RAW_DCHECK(condition, message) RAW_CHECK(condition, message) +#endif + +// This prints errno as well. Note we use write instead of printf/puts to +// avoid the risk we'll call malloc(). +#define PCHECK(condition) \ + do { \ + if (!(condition)) { \ + const int err_no = errno; \ + WRITE_TO_STDERR("Check failed: " #condition ": ", \ + sizeof("Check failed: " #condition ": ")-1); \ + WRITE_TO_STDERR(strerror(err_no), strlen(strerror(err_no))); \ + WRITE_TO_STDERR("\n", sizeof("\n")-1); \ + abort(); \ + } \ + } while (0) + +// Helper macro for binary operators; prints the two values on error +// Don't use this macro directly in your code, use CHECK_EQ et al below + +// WARNING: These don't compile correctly if one of the arguments is a pointer +// and the other is NULL. To work around this, simply static_cast NULL to the +// type of the desired pointer. + +// TODO(jandrews): Also print the values in case of failure. Requires some +// sort of type-sensitive ToString() function. +#define CHECK_OP(op, val1, val2) \ + do { \ + if (!((val1) op (val2))) { \ + fprintf(stderr, "Check failed: %s %s %s\n", #val1, #op, #val2); \ + abort(); \ + } \ + } while (0) + +#define CHECK_EQ(val1, val2) CHECK_OP(==, val1, val2) +#define CHECK_NE(val1, val2) CHECK_OP(!=, val1, val2) +#define CHECK_LE(val1, val2) CHECK_OP(<=, val1, val2) +#define CHECK_LT(val1, val2) CHECK_OP(< , val1, val2) +#define CHECK_GE(val1, val2) CHECK_OP(>=, val1, val2) +#define CHECK_GT(val1, val2) CHECK_OP(> , val1, val2) + +// Synonyms for CHECK_* that are used in some unittests. +#define EXPECT_EQ(val1, val2) CHECK_EQ(val1, val2) +#define EXPECT_NE(val1, val2) CHECK_NE(val1, val2) +#define EXPECT_LE(val1, val2) CHECK_LE(val1, val2) +#define EXPECT_LT(val1, val2) CHECK_LT(val1, val2) +#define EXPECT_GE(val1, val2) CHECK_GE(val1, val2) +#define EXPECT_GT(val1, val2) CHECK_GT(val1, val2) +#define ASSERT_EQ(val1, val2) EXPECT_EQ(val1, val2) +#define ASSERT_NE(val1, val2) EXPECT_NE(val1, val2) +#define ASSERT_LE(val1, val2) EXPECT_LE(val1, val2) +#define ASSERT_LT(val1, val2) EXPECT_LT(val1, val2) +#define ASSERT_GE(val1, val2) EXPECT_GE(val1, val2) +#define ASSERT_GT(val1, val2) EXPECT_GT(val1, val2) +// As are these variants. +#define EXPECT_TRUE(cond) CHECK(cond) +#define EXPECT_FALSE(cond) CHECK(!(cond)) +#define EXPECT_STREQ(a, b) CHECK(strcmp(a, b) == 0) +#define ASSERT_TRUE(cond) EXPECT_TRUE(cond) +#define ASSERT_FALSE(cond) EXPECT_FALSE(cond) +#define ASSERT_STREQ(a, b) EXPECT_STREQ(a, b) + +// Used for (libc) functions that return -1 and set errno +#define CHECK_ERR(invocation) PCHECK((invocation) != -1) + +// A few more checks that only happen in debug mode +#ifdef NDEBUG +#define DCHECK_EQ(val1, val2) +#define DCHECK_NE(val1, val2) +#define DCHECK_LE(val1, val2) +#define DCHECK_LT(val1, val2) +#define DCHECK_GE(val1, val2) +#define DCHECK_GT(val1, val2) +#else +#define DCHECK_EQ(val1, val2) CHECK_EQ(val1, val2) +#define DCHECK_NE(val1, val2) CHECK_NE(val1, val2) +#define DCHECK_LE(val1, val2) CHECK_LE(val1, val2) +#define DCHECK_LT(val1, val2) CHECK_LT(val1, val2) +#define DCHECK_GE(val1, val2) CHECK_GE(val1, val2) +#define DCHECK_GT(val1, val2) CHECK_GT(val1, val2) +#endif + + +#ifdef ERROR +#undef ERROR // may conflict with ERROR macro on windows +#endif +enum LogSeverity {INFO = -1, WARNING = -2, ERROR = -3, FATAL = -4}; + +// NOTE: we add a newline to the end of the output if it's not there already +inline void LogPrintf(int severity, const char* pat, va_list ap) { + // We write directly to the stderr file descriptor and avoid FILE + // buffering because that may invoke malloc() + char buf[600]; + perftools_vsnprintf(buf, sizeof(buf)-1, pat, ap); + if (buf[0] != '\0' && buf[strlen(buf)-1] != '\n') { + assert(strlen(buf)+1 < sizeof(buf)); + strcat(buf, "\n"); + } + WRITE_TO_STDERR(buf, strlen(buf)); + if ((severity) == FATAL) + abort(); // LOG(FATAL) indicates a big problem, so don't run atexit() calls +} + +// Note that since the order of global constructors is unspecified, +// global code that calls RAW_LOG may execute before FLAGS_verbose is set. +// Such code will run with verbosity == 0 no matter what. +#define VLOG_IS_ON(severity) (FLAGS_verbose >= severity) + +// In a better world, we'd use __VA_ARGS__, but VC++ 7 doesn't support it. +#define LOG_PRINTF(severity, pat) do { \ + if (VLOG_IS_ON(severity)) { \ + va_list ap; \ + va_start(ap, pat); \ + LogPrintf(severity, pat, ap); \ + va_end(ap); \ + } \ +} while (0) + +// RAW_LOG is the main function; some synonyms are used in unittests. +inline void RAW_LOG(int lvl, const char* pat, ...) { LOG_PRINTF(lvl, pat); } +inline void RAW_VLOG(int lvl, const char* pat, ...) { LOG_PRINTF(lvl, pat); } +inline void LOG(int lvl, const char* pat, ...) { LOG_PRINTF(lvl, pat); } +inline void VLOG(int lvl, const char* pat, ...) { LOG_PRINTF(lvl, pat); } +inline void LOG_IF(int lvl, bool cond, const char* pat, ...) { + if (cond) LOG_PRINTF(lvl, pat); +} + +// This isn't technically logging, but it's also IO and also is an +// attempt to be "raw" -- that is, to not use any higher-level libc +// routines that might allocate memory or (ideally) try to allocate +// locks. We use an opaque file handle (not necessarily an int) +// to allow even more low-level stuff in the future. +// Like other "raw" routines, these functions are best effort, and +// thus don't return error codes (except RawOpenForWriting()). +#if defined(_WIN32) || defined(__CYGWIN__) || defined(__CYGWIN32__) +#ifndef NOMINMAX +#define NOMINMAX // @#!$& windows +#endif +#include <windows.h> +typedef HANDLE RawFD; +const RawFD kIllegalRawFD = INVALID_HANDLE_VALUE; +#else +typedef int RawFD; +const RawFD kIllegalRawFD = -1; // what open returns if it fails +#endif // defined(_WIN32) || defined(__CYGWIN__) || defined(__CYGWIN32__) + +RawFD RawOpenForWriting(const char* filename); // uses default permissions +void RawWrite(RawFD fd, const char* buf, size_t len); +void RawClose(RawFD fd); + +#endif // _LOGGING_H_ diff --git a/src/third_party/gperftools-2.5/src/base/low_level_alloc.cc b/src/third_party/gperftools-2.5/src/base/low_level_alloc.cc new file mode 100644 index 00000000000..6b467cff123 --- /dev/null +++ b/src/third_party/gperftools-2.5/src/base/low_level_alloc.cc @@ -0,0 +1,582 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +/* Copyright (c) 2006, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +// A low-level allocator that can be used by other low-level +// modules without introducing dependency cycles. +// This allocator is slow and wasteful of memory; +// it should not be used when performance is key. + +#include "base/low_level_alloc.h" +#include "base/dynamic_annotations.h" +#include "base/spinlock.h" +#include "base/logging.h" +#include "malloc_hook-inl.h" +#include <gperftools/malloc_hook.h> +#include <errno.h> +#ifdef HAVE_UNISTD_H +#include <unistd.h> +#endif +#ifdef HAVE_MMAP +#include <sys/mman.h> +#endif +#include <new> // for placement-new + +// On systems (like freebsd) that don't define MAP_ANONYMOUS, use the old +// form of the name instead. +#ifndef MAP_ANONYMOUS +# define MAP_ANONYMOUS MAP_ANON +#endif + +// A first-fit allocator with amortized logarithmic free() time. + +LowLevelAlloc::PagesAllocator::~PagesAllocator() { +} + +// --------------------------------------------------------------------------- +static const int kMaxLevel = 30; + +// We put this class-only struct in a namespace to avoid polluting the +// global namespace with this struct name (thus risking an ODR violation). +namespace low_level_alloc_internal { + // This struct describes one allocated block, or one free block. + struct AllocList { + struct Header { + intptr_t size; // size of entire region, including this field. Must be + // first. Valid in both allocated and unallocated blocks + intptr_t magic; // kMagicAllocated or kMagicUnallocated xor this + LowLevelAlloc::Arena *arena; // pointer to parent arena + void *dummy_for_alignment; // aligns regions to 0 mod 2*sizeof(void*) + } header; + + // Next two fields: in unallocated blocks: freelist skiplist data + // in allocated blocks: overlaps with client data + int levels; // levels in skiplist used + AllocList *next[kMaxLevel]; // actually has levels elements. + // The AllocList node may not have room for + // all kMaxLevel entries. See max_fit in + // LLA_SkiplistLevels() + }; +} +using low_level_alloc_internal::AllocList; + + +// --------------------------------------------------------------------------- +// A trivial skiplist implementation. This is used to keep the freelist +// in address order while taking only logarithmic time per insert and delete. + +// An integer approximation of log2(size/base) +// Requires size >= base. +static int IntLog2(size_t size, size_t base) { + int result = 0; + for (size_t i = size; i > base; i >>= 1) { // i == floor(size/2**result) + result++; + } + // floor(size / 2**result) <= base < floor(size / 2**(result-1)) + // => log2(size/(base+1)) <= result < 1+log2(size/base) + // => result ~= log2(size/base) + return result; +} + +// Return a random integer n: p(n)=1/(2**n) if 1 <= n; p(n)=0 if n < 1. +static int Random() { + static uint32 r = 1; // no locking---it's not critical + ANNOTATE_BENIGN_RACE(&r, "benign race, not critical."); + int result = 1; + while ((((r = r*1103515245 + 12345) >> 30) & 1) == 0) { + result++; + } + return result; +} + +// Return a number of skiplist levels for a node of size bytes, where +// base is the minimum node size. Compute level=log2(size / base)+n +// where n is 1 if random is false and otherwise a random number generated with +// the standard distribution for a skiplist: See Random() above. +// Bigger nodes tend to have more skiplist levels due to the log2(size / base) +// term, so first-fit searches touch fewer nodes. "level" is clipped so +// level<kMaxLevel and next[level-1] will fit in the node. +// 0 < LLA_SkiplistLevels(x,y,false) <= LLA_SkiplistLevels(x,y,true) < kMaxLevel +static int LLA_SkiplistLevels(size_t size, size_t base, bool random) { + // max_fit is the maximum number of levels that will fit in a node for the + // given size. We can't return more than max_fit, no matter what the + // random number generator says. + int max_fit = (size-OFFSETOF_MEMBER(AllocList, next)) / sizeof (AllocList *); + int level = IntLog2(size, base) + (random? Random() : 1); + if (level > max_fit) level = max_fit; + if (level > kMaxLevel-1) level = kMaxLevel - 1; + RAW_CHECK(level >= 1, "block not big enough for even one level"); + return level; +} + +// Return "atleast", the first element of AllocList *head s.t. *atleast >= *e. +// For 0 <= i < head->levels, set prev[i] to "no_greater", where no_greater +// points to the last element at level i in the AllocList less than *e, or is +// head if no such element exists. +static AllocList *LLA_SkiplistSearch(AllocList *head, + AllocList *e, AllocList **prev) { + AllocList *p = head; + for (int level = head->levels - 1; level >= 0; level--) { + for (AllocList *n; (n = p->next[level]) != 0 && n < e; p = n) { + } + prev[level] = p; + } + return (head->levels == 0) ? 0 : prev[0]->next[0]; +} + +// Insert element *e into AllocList *head. Set prev[] as LLA_SkiplistSearch. +// Requires that e->levels be previously set by the caller (using +// LLA_SkiplistLevels()) +static void LLA_SkiplistInsert(AllocList *head, AllocList *e, + AllocList **prev) { + LLA_SkiplistSearch(head, e, prev); + for (; head->levels < e->levels; head->levels++) { // extend prev pointers + prev[head->levels] = head; // to all *e's levels + } + for (int i = 0; i != e->levels; i++) { // add element to list + e->next[i] = prev[i]->next[i]; + prev[i]->next[i] = e; + } +} + +// Remove element *e from AllocList *head. Set prev[] as LLA_SkiplistSearch(). +// Requires that e->levels be previous set by the caller (using +// LLA_SkiplistLevels()) +static void LLA_SkiplistDelete(AllocList *head, AllocList *e, + AllocList **prev) { + AllocList *found = LLA_SkiplistSearch(head, e, prev); + RAW_CHECK(e == found, "element not in freelist"); + for (int i = 0; i != e->levels && prev[i]->next[i] == e; i++) { + prev[i]->next[i] = e->next[i]; + } + while (head->levels > 0 && head->next[head->levels - 1] == 0) { + head->levels--; // reduce head->levels if level unused + } +} + +// --------------------------------------------------------------------------- +// Arena implementation + +struct LowLevelAlloc::Arena { + Arena() : mu(SpinLock::LINKER_INITIALIZED) {} // does nothing; for static init + explicit Arena(int) : pagesize(0) {} // set pagesize to zero explicitly + // for non-static init + + SpinLock mu; // protects freelist, allocation_count, + // pagesize, roundup, min_size + AllocList freelist; // head of free list; sorted by addr (under mu) + int32 allocation_count; // count of allocated blocks (under mu) + int32 flags; // flags passed to NewArena (ro after init) + size_t pagesize; // ==getpagesize() (init under mu, then ro) + size_t roundup; // lowest power of 2 >= max(16,sizeof (AllocList)) + // (init under mu, then ro) + size_t min_size; // smallest allocation block size + // (init under mu, then ro) + PagesAllocator *allocator; +}; + +// The default arena, which is used when 0 is passed instead of an Arena +// pointer. +static struct LowLevelAlloc::Arena default_arena; + +// Non-malloc-hooked arenas: used only to allocate metadata for arenas that +// do not want malloc hook reporting, so that for them there's no malloc hook +// reporting even during arena creation. +static struct LowLevelAlloc::Arena unhooked_arena; +static struct LowLevelAlloc::Arena unhooked_async_sig_safe_arena; + +namespace { + + class DefaultPagesAllocator : public LowLevelAlloc::PagesAllocator { + public: + virtual ~DefaultPagesAllocator() {}; + virtual void *MapPages(int32 flags, size_t size); + virtual void UnMapPages(int32 flags, void *addr, size_t size); + }; + +} + +// magic numbers to identify allocated and unallocated blocks +static const intptr_t kMagicAllocated = 0x4c833e95; +static const intptr_t kMagicUnallocated = ~kMagicAllocated; + +namespace { + class SCOPED_LOCKABLE ArenaLock { + public: + explicit ArenaLock(LowLevelAlloc::Arena *arena) + EXCLUSIVE_LOCK_FUNCTION(arena->mu) + : left_(false), mask_valid_(false), arena_(arena) { + if ((arena->flags & LowLevelAlloc::kAsyncSignalSafe) != 0) { + // We've decided not to support async-signal-safe arena use until + // there a demonstrated need. Here's how one could do it though + // (would need to be made more portable). +#if 0 + sigset_t all; + sigfillset(&all); + this->mask_valid_ = + (pthread_sigmask(SIG_BLOCK, &all, &this->mask_) == 0); +#else + RAW_CHECK(false, "We do not yet support async-signal-safe arena."); +#endif + } + this->arena_->mu.Lock(); + } + ~ArenaLock() { RAW_CHECK(this->left_, "haven't left Arena region"); } + void Leave() /*UNLOCK_FUNCTION()*/ { + this->arena_->mu.Unlock(); +#if 0 + if (this->mask_valid_) { + pthread_sigmask(SIG_SETMASK, &this->mask_, 0); + } +#endif + this->left_ = true; + } + private: + bool left_; // whether left region + bool mask_valid_; +#if 0 + sigset_t mask_; // old mask of blocked signals +#endif + LowLevelAlloc::Arena *arena_; + DISALLOW_COPY_AND_ASSIGN(ArenaLock); + }; +} // anonymous namespace + +// create an appropriate magic number for an object at "ptr" +// "magic" should be kMagicAllocated or kMagicUnallocated +inline static intptr_t Magic(intptr_t magic, AllocList::Header *ptr) { + return magic ^ reinterpret_cast<intptr_t>(ptr); +} + +// Initialize the fields of an Arena +static void ArenaInit(LowLevelAlloc::Arena *arena) { + if (arena->pagesize == 0) { + arena->pagesize = getpagesize(); + // Round up block sizes to a power of two close to the header size. + arena->roundup = 16; + while (arena->roundup < sizeof (arena->freelist.header)) { + arena->roundup += arena->roundup; + } + // Don't allocate blocks less than twice the roundup size to avoid tiny + // free blocks. + arena->min_size = 2 * arena->roundup; + arena->freelist.header.size = 0; + arena->freelist.header.magic = + Magic(kMagicUnallocated, &arena->freelist.header); + arena->freelist.header.arena = arena; + arena->freelist.levels = 0; + memset(arena->freelist.next, 0, sizeof (arena->freelist.next)); + arena->allocation_count = 0; + if (arena == &default_arena) { + // Default arena should be hooked, e.g. for heap-checker to trace + // pointer chains through objects in the default arena. + arena->flags = LowLevelAlloc::kCallMallocHook; + } else if (arena == &unhooked_async_sig_safe_arena) { + arena->flags = LowLevelAlloc::kAsyncSignalSafe; + } else { + arena->flags = 0; // other arenas' flags may be overridden by client, + // but unhooked_arena will have 0 in 'flags'. + } + arena->allocator = LowLevelAlloc::GetDefaultPagesAllocator(); + } +} + +// L < meta_data_arena->mu +LowLevelAlloc::Arena *LowLevelAlloc::NewArena(int32 flags, + Arena *meta_data_arena) { + return NewArenaWithCustomAlloc(flags, meta_data_arena, NULL); +} + +// L < meta_data_arena->mu +LowLevelAlloc::Arena *LowLevelAlloc::NewArenaWithCustomAlloc(int32 flags, + Arena *meta_data_arena, + PagesAllocator *allocator) { + RAW_CHECK(meta_data_arena != 0, "must pass a valid arena"); + if (meta_data_arena == &default_arena) { + if ((flags & LowLevelAlloc::kAsyncSignalSafe) != 0) { + meta_data_arena = &unhooked_async_sig_safe_arena; + } else if ((flags & LowLevelAlloc::kCallMallocHook) == 0) { + meta_data_arena = &unhooked_arena; + } + } + // Arena(0) uses the constructor for non-static contexts + Arena *result = + new (AllocWithArena(sizeof (*result), meta_data_arena)) Arena(0); + ArenaInit(result); + result->flags = flags; + if (allocator) { + result->allocator = allocator; + } + return result; +} + +// L < arena->mu, L < arena->arena->mu +bool LowLevelAlloc::DeleteArena(Arena *arena) { + RAW_CHECK(arena != 0 && arena != &default_arena && arena != &unhooked_arena, + "may not delete default arena"); + ArenaLock section(arena); + bool empty = (arena->allocation_count == 0); + section.Leave(); + if (empty) { + while (arena->freelist.next[0] != 0) { + AllocList *region = arena->freelist.next[0]; + size_t size = region->header.size; + arena->freelist.next[0] = region->next[0]; + RAW_CHECK(region->header.magic == + Magic(kMagicUnallocated, ®ion->header), + "bad magic number in DeleteArena()"); + RAW_CHECK(region->header.arena == arena, + "bad arena pointer in DeleteArena()"); + RAW_CHECK(size % arena->pagesize == 0, + "empty arena has non-page-aligned block size"); + RAW_CHECK(reinterpret_cast<intptr_t>(region) % arena->pagesize == 0, + "empty arena has non-page-aligned block"); + int munmap_result; + if ((arena->flags & LowLevelAlloc::kAsyncSignalSafe) == 0) { + munmap_result = munmap(region, size); + } else { + munmap_result = MallocHook::UnhookedMUnmap(region, size); + } + RAW_CHECK(munmap_result == 0, + "LowLevelAlloc::DeleteArena: munmap failed address"); + } + Free(arena); + } + return empty; +} + +// --------------------------------------------------------------------------- + +// Return value rounded up to next multiple of align. +// align must be a power of two. +static intptr_t RoundUp(intptr_t addr, intptr_t align) { + return (addr + align - 1) & ~(align - 1); +} + +// Equivalent to "return prev->next[i]" but with sanity checking +// that the freelist is in the correct order, that it +// consists of regions marked "unallocated", and that no two regions +// are adjacent in memory (they should have been coalesced). +// L < arena->mu +static AllocList *Next(int i, AllocList *prev, LowLevelAlloc::Arena *arena) { + RAW_CHECK(i < prev->levels, "too few levels in Next()"); + AllocList *next = prev->next[i]; + if (next != 0) { + RAW_CHECK(next->header.magic == Magic(kMagicUnallocated, &next->header), + "bad magic number in Next()"); + RAW_CHECK(next->header.arena == arena, + "bad arena pointer in Next()"); + if (prev != &arena->freelist) { + RAW_CHECK(prev < next, "unordered freelist"); + RAW_CHECK(reinterpret_cast<char *>(prev) + prev->header.size < + reinterpret_cast<char *>(next), "malformed freelist"); + } + } + return next; +} + +// Coalesce list item "a" with its successor if they are adjacent. +static void Coalesce(AllocList *a) { + AllocList *n = a->next[0]; + if (n != 0 && reinterpret_cast<char *>(a) + a->header.size == + reinterpret_cast<char *>(n)) { + LowLevelAlloc::Arena *arena = a->header.arena; + a->header.size += n->header.size; + n->header.magic = 0; + n->header.arena = 0; + AllocList *prev[kMaxLevel]; + LLA_SkiplistDelete(&arena->freelist, n, prev); + LLA_SkiplistDelete(&arena->freelist, a, prev); + a->levels = LLA_SkiplistLevels(a->header.size, arena->min_size, true); + LLA_SkiplistInsert(&arena->freelist, a, prev); + } +} + +// Adds block at location "v" to the free list +// L >= arena->mu +static void AddToFreelist(void *v, LowLevelAlloc::Arena *arena) { + AllocList *f = reinterpret_cast<AllocList *>( + reinterpret_cast<char *>(v) - sizeof (f->header)); + RAW_CHECK(f->header.magic == Magic(kMagicAllocated, &f->header), + "bad magic number in AddToFreelist()"); + RAW_CHECK(f->header.arena == arena, + "bad arena pointer in AddToFreelist()"); + f->levels = LLA_SkiplistLevels(f->header.size, arena->min_size, true); + AllocList *prev[kMaxLevel]; + LLA_SkiplistInsert(&arena->freelist, f, prev); + f->header.magic = Magic(kMagicUnallocated, &f->header); + Coalesce(f); // maybe coalesce with successor + Coalesce(prev[0]); // maybe coalesce with predecessor +} + +// Frees storage allocated by LowLevelAlloc::Alloc(). +// L < arena->mu +void LowLevelAlloc::Free(void *v) { + if (v != 0) { + AllocList *f = reinterpret_cast<AllocList *>( + reinterpret_cast<char *>(v) - sizeof (f->header)); + RAW_CHECK(f->header.magic == Magic(kMagicAllocated, &f->header), + "bad magic number in Free()"); + LowLevelAlloc::Arena *arena = f->header.arena; + if ((arena->flags & kCallMallocHook) != 0) { + MallocHook::InvokeDeleteHook(v); + } + ArenaLock section(arena); + AddToFreelist(v, arena); + RAW_CHECK(arena->allocation_count > 0, "nothing in arena to free"); + arena->allocation_count--; + section.Leave(); + } +} + +// allocates and returns a block of size bytes, to be freed with Free() +// L < arena->mu +static void *DoAllocWithArena(size_t request, LowLevelAlloc::Arena *arena) { + void *result = 0; + if (request != 0) { + AllocList *s; // will point to region that satisfies request + ArenaLock section(arena); + ArenaInit(arena); + // round up with header + size_t req_rnd = RoundUp(request + sizeof (s->header), arena->roundup); + for (;;) { // loop until we find a suitable region + // find the minimum levels that a block of this size must have + int i = LLA_SkiplistLevels(req_rnd, arena->min_size, false) - 1; + if (i < arena->freelist.levels) { // potential blocks exist + AllocList *before = &arena->freelist; // predecessor of s + while ((s = Next(i, before, arena)) != 0 && s->header.size < req_rnd) { + before = s; + } + if (s != 0) { // we found a region + break; + } + } + // we unlock before mmap() both because mmap() may call a callback hook, + // and because it may be slow. + arena->mu.Unlock(); + // mmap generous 64K chunks to decrease + // the chances/impact of fragmentation: + size_t new_pages_size = RoundUp(req_rnd, arena->pagesize * 16); + void *new_pages = arena->allocator->MapPages(arena->flags, new_pages_size); + arena->mu.Lock(); + s = reinterpret_cast<AllocList *>(new_pages); + s->header.size = new_pages_size; + // Pretend the block is allocated; call AddToFreelist() to free it. + s->header.magic = Magic(kMagicAllocated, &s->header); + s->header.arena = arena; + AddToFreelist(&s->levels, arena); // insert new region into free list + } + AllocList *prev[kMaxLevel]; + LLA_SkiplistDelete(&arena->freelist, s, prev); // remove from free list + // s points to the first free region that's big enough + if (req_rnd + arena->min_size <= s->header.size) { // big enough to split + AllocList *n = reinterpret_cast<AllocList *> + (req_rnd + reinterpret_cast<char *>(s)); + n->header.size = s->header.size - req_rnd; + n->header.magic = Magic(kMagicAllocated, &n->header); + n->header.arena = arena; + s->header.size = req_rnd; + AddToFreelist(&n->levels, arena); + } + s->header.magic = Magic(kMagicAllocated, &s->header); + RAW_CHECK(s->header.arena == arena, ""); + arena->allocation_count++; + section.Leave(); + result = &s->levels; + } + ANNOTATE_NEW_MEMORY(result, request); + return result; +} + +void *LowLevelAlloc::Alloc(size_t request) { + void *result = DoAllocWithArena(request, &default_arena); + if ((default_arena.flags & kCallMallocHook) != 0) { + // this call must be directly in the user-called allocator function + // for MallocHook::GetCallerStackTrace to work properly + MallocHook::InvokeNewHook(result, request); + } + return result; +} + +void *LowLevelAlloc::AllocWithArena(size_t request, Arena *arena) { + RAW_CHECK(arena != 0, "must pass a valid arena"); + void *result = DoAllocWithArena(request, arena); + if ((arena->flags & kCallMallocHook) != 0) { + // this call must be directly in the user-called allocator function + // for MallocHook::GetCallerStackTrace to work properly + MallocHook::InvokeNewHook(result, request); + } + return result; +} + +LowLevelAlloc::Arena *LowLevelAlloc::DefaultArena() { + return &default_arena; +} + +static DefaultPagesAllocator *default_pages_allocator; +static union { + char chars[sizeof(DefaultPagesAllocator)]; + void *ptr; +} debug_pages_allocator_space; + +LowLevelAlloc::PagesAllocator *LowLevelAlloc::GetDefaultPagesAllocator(void) { + if (default_pages_allocator) { + return default_pages_allocator; + } + default_pages_allocator = new (debug_pages_allocator_space.chars) DefaultPagesAllocator(); + return default_pages_allocator; +} + +void *DefaultPagesAllocator::MapPages(int32 flags, size_t size) { + void *new_pages; + if ((flags & LowLevelAlloc::kAsyncSignalSafe) != 0) { + new_pages = MallocHook::UnhookedMMap(0, size, + PROT_WRITE|PROT_READ, + MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); + } else { + new_pages = mmap(0, size, + PROT_WRITE|PROT_READ, + MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); + } + RAW_CHECK(new_pages != MAP_FAILED, "mmap error"); + + return new_pages; +} + +void DefaultPagesAllocator::UnMapPages(int32 flags, void *region, size_t size) { + int munmap_result; + if ((flags & LowLevelAlloc::kAsyncSignalSafe) == 0) { + munmap_result = munmap(region, size); + } else { + munmap_result = MallocHook::UnhookedMUnmap(region, size); + } + RAW_CHECK(munmap_result == 0, + "LowLevelAlloc::DeleteArena: munmap failed address"); +} diff --git a/src/third_party/gperftools-2.5/src/base/low_level_alloc.h b/src/third_party/gperftools-2.5/src/base/low_level_alloc.h new file mode 100644 index 00000000000..d8dfc8f3929 --- /dev/null +++ b/src/third_party/gperftools-2.5/src/base/low_level_alloc.h @@ -0,0 +1,120 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +/* Copyright (c) 2006, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#if !defined(_BASE_LOW_LEVEL_ALLOC_H_) +#define _BASE_LOW_LEVEL_ALLOC_H_ + +// A simple thread-safe memory allocator that does not depend on +// mutexes or thread-specific data. It is intended to be used +// sparingly, and only when malloc() would introduce an unwanted +// dependency, such as inside the heap-checker. + +#include <config.h> +#include <stddef.h> // for size_t +#include "base/basictypes.h" + +class LowLevelAlloc { + public: + class PagesAllocator { + public: + virtual ~PagesAllocator(); + virtual void *MapPages(int32 flags, size_t size) = 0; + virtual void UnMapPages(int32 flags, void *addr, size_t size) = 0; + }; + + static PagesAllocator *GetDefaultPagesAllocator(void); + + struct Arena; // an arena from which memory may be allocated + + // Returns a pointer to a block of at least "request" bytes + // that have been newly allocated from the specific arena. + // for Alloc() call the DefaultArena() is used. + // Returns 0 if passed request==0. + // Does not return 0 under other circumstances; it crashes if memory + // is not available. + static void *Alloc(size_t request) + ATTRIBUTE_SECTION(malloc_hook); + static void *AllocWithArena(size_t request, Arena *arena) + ATTRIBUTE_SECTION(malloc_hook); + + // Deallocates a region of memory that was previously allocated with + // Alloc(). Does nothing if passed 0. "s" must be either 0, + // or must have been returned from a call to Alloc() and not yet passed to + // Free() since that call to Alloc(). The space is returned to the arena + // from which it was allocated. + static void Free(void *s) ATTRIBUTE_SECTION(malloc_hook); + + // ATTRIBUTE_SECTION(malloc_hook) for Alloc* and Free + // are to put all callers of MallocHook::Invoke* in this module + // into special section, + // so that MallocHook::GetCallerStackTrace can function accurately. + + // Create a new arena. + // The root metadata for the new arena is allocated in the + // meta_data_arena; the DefaultArena() can be passed for meta_data_arena. + // These values may be ored into flags: + enum { + // Report calls to Alloc() and Free() via the MallocHook interface. + // Set in the DefaultArena. + kCallMallocHook = 0x0001, + + // Make calls to Alloc(), Free() be async-signal-safe. Not set in + // DefaultArena(). + kAsyncSignalSafe = 0x0002, + + // When used with DefaultArena(), the NewArena() and DeleteArena() calls + // obey the flags given explicitly in the NewArena() call, even if those + // flags differ from the settings in DefaultArena(). So the call + // NewArena(kAsyncSignalSafe, DefaultArena()) is itself async-signal-safe, + // as well as generatating an arena that provides async-signal-safe + // Alloc/Free. + }; + static Arena *NewArena(int32 flags, Arena *meta_data_arena); + + // note: pages allocator will never be destroyed and allocated pages will never be freed + // When allocator is NULL, it's same as NewArena + static Arena *NewArenaWithCustomAlloc(int32 flags, Arena *meta_data_arena, PagesAllocator *allocator); + + // Destroys an arena allocated by NewArena and returns true, + // provided no allocated blocks remain in the arena. + // If allocated blocks remain in the arena, does nothing and + // returns false. + // It is illegal to attempt to destroy the DefaultArena(). + static bool DeleteArena(Arena *arena); + + // The default arena that always exists. + static Arena *DefaultArena(); + + private: + LowLevelAlloc(); // no instances +}; + +#endif diff --git a/src/third_party/gperftools-2.5/src/base/simple_mutex.h b/src/third_party/gperftools-2.5/src/base/simple_mutex.h new file mode 100644 index 00000000000..a1886e46f31 --- /dev/null +++ b/src/third_party/gperftools-2.5/src/base/simple_mutex.h @@ -0,0 +1,332 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2007, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// --- +// Author: Craig Silverstein. +// +// A simple mutex wrapper, supporting locks and read-write locks. +// You should assume the locks are *not* re-entrant. +// +// To use: you should define the following macros in your configure.ac: +// ACX_PTHREAD +// AC_RWLOCK +// The latter is defined in ../autoconf. +// +// This class is meant to be internal-only and should be wrapped by an +// internal namespace. Before you use this module, please give the +// name of your internal namespace for this module. Or, if you want +// to expose it, you'll want to move it to the Google namespace. We +// cannot put this class in global namespace because there can be some +// problems when we have multiple versions of Mutex in each shared object. +// +// NOTE: TryLock() is broken for NO_THREADS mode, at least in NDEBUG +// mode. +// +// CYGWIN NOTE: Cygwin support for rwlock seems to be buggy: +// http://www.cygwin.com/ml/cygwin/2008-12/msg00017.html +// Because of that, we might as well use windows locks for +// cygwin. They seem to be more reliable than the cygwin pthreads layer. +// +// TRICKY IMPLEMENTATION NOTE: +// This class is designed to be safe to use during +// dynamic-initialization -- that is, by global constructors that are +// run before main() starts. The issue in this case is that +// dynamic-initialization happens in an unpredictable order, and it +// could be that someone else's dynamic initializer could call a +// function that tries to acquire this mutex -- but that all happens +// before this mutex's constructor has run. (This can happen even if +// the mutex and the function that uses the mutex are in the same .cc +// file.) Basically, because Mutex does non-trivial work in its +// constructor, it's not, in the naive implementation, safe to use +// before dynamic initialization has run on it. +// +// The solution used here is to pair the actual mutex primitive with a +// bool that is set to true when the mutex is dynamically initialized. +// (Before that it's false.) Then we modify all mutex routines to +// look at the bool, and not try to lock/unlock until the bool makes +// it to true (which happens after the Mutex constructor has run.) +// +// This works because before main() starts -- particularly, during +// dynamic initialization -- there are no threads, so a) it's ok that +// the mutex operations are a no-op, since we don't need locking then +// anyway; and b) we can be quite confident our bool won't change +// state between a call to Lock() and a call to Unlock() (that would +// require a global constructor in one translation unit to call Lock() +// and another global constructor in another translation unit to call +// Unlock() later, which is pretty perverse). +// +// That said, it's tricky, and can conceivably fail; it's safest to +// avoid trying to acquire a mutex in a global constructor, if you +// can. One way it can fail is that a really smart compiler might +// initialize the bool to true at static-initialization time (too +// early) rather than at dynamic-initialization time. To discourage +// that, we set is_safe_ to true in code (not the constructor +// colon-initializer) and set it to true via a function that always +// evaluates to true, but that the compiler can't know always +// evaluates to true. This should be good enough. +// +// A related issue is code that could try to access the mutex +// after it's been destroyed in the global destructors (because +// the Mutex global destructor runs before some other global +// destructor, that tries to acquire the mutex). The way we +// deal with this is by taking a constructor arg that global +// mutexes should pass in, that causes the destructor to do no +// work. We still depend on the compiler not doing anything +// weird to a Mutex's memory after it is destroyed, but for a +// static global variable, that's pretty safe. + +#ifndef GOOGLE_MUTEX_H_ +#define GOOGLE_MUTEX_H_ + +#include <config.h> + +#if defined(NO_THREADS) + typedef int MutexType; // to keep a lock-count +#elif defined(_WIN32) || defined(__CYGWIN__) || defined(__CYGWIN32__) +# ifndef WIN32_LEAN_AND_MEAN +# define WIN32_LEAN_AND_MEAN // We only need minimal includes +# endif + // We need Windows NT or later for TryEnterCriticalSection(). If you + // don't need that functionality, you can remove these _WIN32_WINNT + // lines, and change TryLock() to assert(0) or something. +# ifndef _WIN32_WINNT +# define _WIN32_WINNT 0x0400 +# endif +# include <windows.h> + typedef CRITICAL_SECTION MutexType; +#elif defined(HAVE_PTHREAD) && defined(HAVE_RWLOCK) + // Needed for pthread_rwlock_*. If it causes problems, you could take it + // out, but then you'd have to unset HAVE_RWLOCK (at least on linux -- it + // *does* cause problems for FreeBSD, or MacOSX, but isn't needed + // for locking there.) +# ifdef __linux__ +# define _XOPEN_SOURCE 500 // may be needed to get the rwlock calls +# endif +# include <pthread.h> + typedef pthread_rwlock_t MutexType; +#elif defined(HAVE_PTHREAD) +# include <pthread.h> + typedef pthread_mutex_t MutexType; +#else +# error Need to implement mutex.h for your architecture, or #define NO_THREADS +#endif + +#include <assert.h> +#include <stdlib.h> // for abort() + +#define MUTEX_NAMESPACE perftools_mutex_namespace + +namespace MUTEX_NAMESPACE { + +class Mutex { + public: + // This is used for the single-arg constructor + enum LinkerInitialized { LINKER_INITIALIZED }; + + // Create a Mutex that is not held by anybody. This constructor is + // typically used for Mutexes allocated on the heap or the stack. + inline Mutex(); + // This constructor should be used for global, static Mutex objects. + // It inhibits work being done by the destructor, which makes it + // safer for code that tries to acqiure this mutex in their global + // destructor. + inline Mutex(LinkerInitialized); + + // Destructor + inline ~Mutex(); + + inline void Lock(); // Block if needed until free then acquire exclusively + inline void Unlock(); // Release a lock acquired via Lock() + inline bool TryLock(); // If free, Lock() and return true, else return false + // Note that on systems that don't support read-write locks, these may + // be implemented as synonyms to Lock() and Unlock(). So you can use + // these for efficiency, but don't use them anyplace where being able + // to do shared reads is necessary to avoid deadlock. + inline void ReaderLock(); // Block until free or shared then acquire a share + inline void ReaderUnlock(); // Release a read share of this Mutex + inline void WriterLock() { Lock(); } // Acquire an exclusive lock + inline void WriterUnlock() { Unlock(); } // Release a lock from WriterLock() + + private: + MutexType mutex_; + // We want to make sure that the compiler sets is_safe_ to true only + // when we tell it to, and never makes assumptions is_safe_ is + // always true. volatile is the most reliable way to do that. + volatile bool is_safe_; + // This indicates which constructor was called. + bool destroy_; + + inline void SetIsSafe() { is_safe_ = true; } + + // Catch the error of writing Mutex when intending MutexLock. + Mutex(Mutex* /*ignored*/) {} + // Disallow "evil" constructors + Mutex(const Mutex&); + void operator=(const Mutex&); +}; + +// Now the implementation of Mutex for various systems +#if defined(NO_THREADS) + +// When we don't have threads, we can be either reading or writing, +// but not both. We can have lots of readers at once (in no-threads +// mode, that's most likely to happen in recursive function calls), +// but only one writer. We represent this by having mutex_ be -1 when +// writing and a number > 0 when reading (and 0 when no lock is held). +// +// In debug mode, we assert these invariants, while in non-debug mode +// we do nothing, for efficiency. That's why everything is in an +// assert. + +Mutex::Mutex() : mutex_(0) { } +Mutex::Mutex(Mutex::LinkerInitialized) : mutex_(0) { } +Mutex::~Mutex() { assert(mutex_ == 0); } +void Mutex::Lock() { assert(--mutex_ == -1); } +void Mutex::Unlock() { assert(mutex_++ == -1); } +bool Mutex::TryLock() { if (mutex_) return false; Lock(); return true; } +void Mutex::ReaderLock() { assert(++mutex_ > 0); } +void Mutex::ReaderUnlock() { assert(mutex_-- > 0); } + +#elif defined(_WIN32) || defined(__CYGWIN__) || defined(__CYGWIN32__) + +Mutex::Mutex() : destroy_(true) { + InitializeCriticalSection(&mutex_); + SetIsSafe(); +} +Mutex::Mutex(LinkerInitialized) : destroy_(false) { + InitializeCriticalSection(&mutex_); + SetIsSafe(); +} +Mutex::~Mutex() { if (destroy_) DeleteCriticalSection(&mutex_); } +void Mutex::Lock() { if (is_safe_) EnterCriticalSection(&mutex_); } +void Mutex::Unlock() { if (is_safe_) LeaveCriticalSection(&mutex_); } +bool Mutex::TryLock() { return is_safe_ ? + TryEnterCriticalSection(&mutex_) != 0 : true; } +void Mutex::ReaderLock() { Lock(); } // we don't have read-write locks +void Mutex::ReaderUnlock() { Unlock(); } + +#elif defined(HAVE_PTHREAD) && defined(HAVE_RWLOCK) + +#define SAFE_PTHREAD(fncall) do { /* run fncall if is_safe_ is true */ \ + if (is_safe_ && fncall(&mutex_) != 0) abort(); \ +} while (0) + +Mutex::Mutex() : destroy_(true) { + SetIsSafe(); + if (is_safe_ && pthread_rwlock_init(&mutex_, NULL) != 0) abort(); +} +Mutex::Mutex(Mutex::LinkerInitialized) : destroy_(false) { + SetIsSafe(); + if (is_safe_ && pthread_rwlock_init(&mutex_, NULL) != 0) abort(); +} +Mutex::~Mutex() { if (destroy_) SAFE_PTHREAD(pthread_rwlock_destroy); } +void Mutex::Lock() { SAFE_PTHREAD(pthread_rwlock_wrlock); } +void Mutex::Unlock() { SAFE_PTHREAD(pthread_rwlock_unlock); } +bool Mutex::TryLock() { return is_safe_ ? + pthread_rwlock_trywrlock(&mutex_) == 0 : true; } +void Mutex::ReaderLock() { SAFE_PTHREAD(pthread_rwlock_rdlock); } +void Mutex::ReaderUnlock() { SAFE_PTHREAD(pthread_rwlock_unlock); } +#undef SAFE_PTHREAD + +#elif defined(HAVE_PTHREAD) + +#define SAFE_PTHREAD(fncall) do { /* run fncall if is_safe_ is true */ \ + if (is_safe_ && fncall(&mutex_) != 0) abort(); \ +} while (0) + +Mutex::Mutex() : destroy_(true) { + SetIsSafe(); + if (is_safe_ && pthread_mutex_init(&mutex_, NULL) != 0) abort(); +} +Mutex::Mutex(Mutex::LinkerInitialized) : destroy_(false) { + SetIsSafe(); + if (is_safe_ && pthread_mutex_init(&mutex_, NULL) != 0) abort(); +} +Mutex::~Mutex() { if (destroy_) SAFE_PTHREAD(pthread_mutex_destroy); } +void Mutex::Lock() { SAFE_PTHREAD(pthread_mutex_lock); } +void Mutex::Unlock() { SAFE_PTHREAD(pthread_mutex_unlock); } +bool Mutex::TryLock() { return is_safe_ ? + pthread_mutex_trylock(&mutex_) == 0 : true; } +void Mutex::ReaderLock() { Lock(); } +void Mutex::ReaderUnlock() { Unlock(); } +#undef SAFE_PTHREAD + +#endif + +// -------------------------------------------------------------------------- +// Some helper classes + +// MutexLock(mu) acquires mu when constructed and releases it when destroyed. +class MutexLock { + public: + explicit MutexLock(Mutex *mu) : mu_(mu) { mu_->Lock(); } + ~MutexLock() { mu_->Unlock(); } + private: + Mutex * const mu_; + // Disallow "evil" constructors + MutexLock(const MutexLock&); + void operator=(const MutexLock&); +}; + +// ReaderMutexLock and WriterMutexLock do the same, for rwlocks +class ReaderMutexLock { + public: + explicit ReaderMutexLock(Mutex *mu) : mu_(mu) { mu_->ReaderLock(); } + ~ReaderMutexLock() { mu_->ReaderUnlock(); } + private: + Mutex * const mu_; + // Disallow "evil" constructors + ReaderMutexLock(const ReaderMutexLock&); + void operator=(const ReaderMutexLock&); +}; + +class WriterMutexLock { + public: + explicit WriterMutexLock(Mutex *mu) : mu_(mu) { mu_->WriterLock(); } + ~WriterMutexLock() { mu_->WriterUnlock(); } + private: + Mutex * const mu_; + // Disallow "evil" constructors + WriterMutexLock(const WriterMutexLock&); + void operator=(const WriterMutexLock&); +}; + +// Catch bug where variable name is omitted, e.g. MutexLock (&mu); +#define MutexLock(x) COMPILE_ASSERT(0, mutex_lock_decl_missing_var_name) +#define ReaderMutexLock(x) COMPILE_ASSERT(0, rmutex_lock_decl_missing_var_name) +#define WriterMutexLock(x) COMPILE_ASSERT(0, wmutex_lock_decl_missing_var_name) + +} // namespace MUTEX_NAMESPACE + +using namespace MUTEX_NAMESPACE; + +#undef MUTEX_NAMESPACE + +#endif /* #define GOOGLE_SIMPLE_MUTEX_H_ */ diff --git a/src/third_party/gperftools-2.5/src/base/spinlock.cc b/src/third_party/gperftools-2.5/src/base/spinlock.cc new file mode 100644 index 00000000000..85ff21ed404 --- /dev/null +++ b/src/third_party/gperftools-2.5/src/base/spinlock.cc @@ -0,0 +1,129 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +/* Copyright (c) 2006, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: Sanjay Ghemawat + */ + +#include <config.h> +#include "base/spinlock.h" +#include "base/spinlock_internal.h" +#include "base/sysinfo.h" /* for GetSystemCPUsCount() */ + +// NOTE on the Lock-state values: +// +// kSpinLockFree represents the unlocked state +// kSpinLockHeld represents the locked state with no waiters +// kSpinLockSleeper represents the locked state with waiters + +static int adaptive_spin_count = 0; + +const base::LinkerInitialized SpinLock::LINKER_INITIALIZED = + base::LINKER_INITIALIZED; + +namespace { +struct SpinLock_InitHelper { + SpinLock_InitHelper() { + // On multi-cpu machines, spin for longer before yielding + // the processor or sleeping. Reduces idle time significantly. + if (GetSystemCPUsCount() > 1) { + adaptive_spin_count = 1000; + } + } +}; + +// Hook into global constructor execution: +// We do not do adaptive spinning before that, +// but nothing lock-intensive should be going on at that time. +static SpinLock_InitHelper init_helper; + +inline void SpinlockPause(void) { +#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) + __asm__ __volatile__("rep; nop" : : ); +#endif +} + +} // unnamed namespace + +// Monitor the lock to see if its value changes within some time +// period (adaptive_spin_count loop iterations). The last value read +// from the lock is returned from the method. +Atomic32 SpinLock::SpinLoop() { + int c = adaptive_spin_count; + while (base::subtle::NoBarrier_Load(&lockword_) != kSpinLockFree && --c > 0) { + SpinlockPause(); + } + return base::subtle::Acquire_CompareAndSwap(&lockword_, kSpinLockFree, + kSpinLockSleeper); +} + +void SpinLock::SlowLock() { + Atomic32 lock_value = SpinLoop(); + + int lock_wait_call_count = 0; + while (lock_value != kSpinLockFree) { + // If the lock is currently held, but not marked as having a sleeper, mark + // it as having a sleeper. + if (lock_value == kSpinLockHeld) { + // Here, just "mark" that the thread is going to sleep. Don't store the + // lock wait time in the lock as that will cause the current lock + // owner to think it experienced contention. + lock_value = base::subtle::Acquire_CompareAndSwap(&lockword_, + kSpinLockHeld, + kSpinLockSleeper); + if (lock_value == kSpinLockHeld) { + // Successfully transitioned to kSpinLockSleeper. Pass + // kSpinLockSleeper to the SpinLockDelay routine to properly indicate + // the last lock_value observed. + lock_value = kSpinLockSleeper; + } else if (lock_value == kSpinLockFree) { + // Lock is free again, so try and acquire it before sleeping. The + // new lock state will be the number of cycles this thread waited if + // this thread obtains the lock. + lock_value = base::subtle::Acquire_CompareAndSwap(&lockword_, + kSpinLockFree, + kSpinLockSleeper); + continue; // skip the delay at the end of the loop + } + } + + // Wait for an OS specific delay. + base::internal::SpinLockDelay(&lockword_, lock_value, + ++lock_wait_call_count); + // Spin again after returning from the wait routine to give this thread + // some chance of obtaining the lock. + lock_value = SpinLoop(); + } +} + +void SpinLock::SlowUnlock() { + // wake waiter if necessary + base::internal::SpinLockWake(&lockword_, false); +} diff --git a/src/third_party/gperftools-2.5/src/base/spinlock.h b/src/third_party/gperftools-2.5/src/base/spinlock.h new file mode 100644 index 00000000000..7243aeaaefd --- /dev/null +++ b/src/third_party/gperftools-2.5/src/base/spinlock.h @@ -0,0 +1,143 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +/* Copyright (c) 2006, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: Sanjay Ghemawat + */ + +// SpinLock is async signal safe. +// If used within a signal handler, all lock holders +// should block the signal even outside the signal handler. + +#ifndef BASE_SPINLOCK_H_ +#define BASE_SPINLOCK_H_ + +#include <config.h> +#include "base/atomicops.h" +#include "base/basictypes.h" +#include "base/dynamic_annotations.h" +#include "base/thread_annotations.h" + +class LOCKABLE SpinLock { + public: + SpinLock() : lockword_(kSpinLockFree) { } + + // Special constructor for use with static SpinLock objects. E.g., + // + // static SpinLock lock(base::LINKER_INITIALIZED); + // + // When intialized using this constructor, we depend on the fact + // that the linker has already initialized the memory appropriately. + // A SpinLock constructed like this can be freely used from global + // initializers without worrying about the order in which global + // initializers run. + explicit SpinLock(base::LinkerInitialized /*x*/) { + // Does nothing; lockword_ is already initialized + } + + // Acquire this SpinLock. + // TODO(csilvers): uncomment the annotation when we figure out how to + // support this macro with 0 args (see thread_annotations.h) + inline void Lock() /*EXCLUSIVE_LOCK_FUNCTION()*/ { + if (base::subtle::Acquire_CompareAndSwap(&lockword_, kSpinLockFree, + kSpinLockHeld) != kSpinLockFree) { + SlowLock(); + } + ANNOTATE_RWLOCK_ACQUIRED(this, 1); + } + + // Try to acquire this SpinLock without blocking and return true if the + // acquisition was successful. If the lock was not acquired, false is + // returned. If this SpinLock is free at the time of the call, TryLock + // will return true with high probability. + inline bool TryLock() EXCLUSIVE_TRYLOCK_FUNCTION(true) { + bool res = + (base::subtle::Acquire_CompareAndSwap(&lockword_, kSpinLockFree, + kSpinLockHeld) == kSpinLockFree); + if (res) { + ANNOTATE_RWLOCK_ACQUIRED(this, 1); + } + return res; + } + + // Release this SpinLock, which must be held by the calling thread. + // TODO(csilvers): uncomment the annotation when we figure out how to + // support this macro with 0 args (see thread_annotations.h) + inline void Unlock() /*UNLOCK_FUNCTION()*/ { + ANNOTATE_RWLOCK_RELEASED(this, 1); + uint64 prev_value = static_cast<uint64>( + base::subtle::Release_AtomicExchange(&lockword_, kSpinLockFree)); + if (prev_value != kSpinLockHeld) { + // Speed the wakeup of any waiter. + SlowUnlock(); + } + } + + // Determine if the lock is held. When the lock is held by the invoking + // thread, true will always be returned. Intended to be used as + // CHECK(lock.IsHeld()). + inline bool IsHeld() const { + return base::subtle::NoBarrier_Load(&lockword_) != kSpinLockFree; + } + + static const base::LinkerInitialized LINKER_INITIALIZED; // backwards compat + private: + enum { kSpinLockFree = 0 }; + enum { kSpinLockHeld = 1 }; + enum { kSpinLockSleeper = 2 }; + + volatile Atomic32 lockword_; + + void SlowLock(); + void SlowUnlock(); + Atomic32 SpinLoop(); + + DISALLOW_COPY_AND_ASSIGN(SpinLock); +}; + +// Corresponding locker object that arranges to acquire a spinlock for +// the duration of a C++ scope. +class SCOPED_LOCKABLE SpinLockHolder { + private: + SpinLock* lock_; + public: + inline explicit SpinLockHolder(SpinLock* l) EXCLUSIVE_LOCK_FUNCTION(l) + : lock_(l) { + l->Lock(); + } + // TODO(csilvers): uncomment the annotation when we figure out how to + // support this macro with 0 args (see thread_annotations.h) + inline ~SpinLockHolder() /*UNLOCK_FUNCTION()*/ { lock_->Unlock(); } +}; +// Catch bug where variable name is omitted, e.g. SpinLockHolder (&lock); +#define SpinLockHolder(x) COMPILE_ASSERT(0, spin_lock_decl_missing_var_name) + + +#endif // BASE_SPINLOCK_H_ diff --git a/src/third_party/gperftools-2.5/src/base/spinlock_internal.cc b/src/third_party/gperftools-2.5/src/base/spinlock_internal.cc new file mode 100644 index 00000000000..d9629717be1 --- /dev/null +++ b/src/third_party/gperftools-2.5/src/base/spinlock_internal.cc @@ -0,0 +1,102 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +/* Copyright (c) 2010, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +// The OS-specific header included below must provide two calls: +// base::internal::SpinLockDelay() and base::internal::SpinLockWake(). +// See spinlock_internal.h for the spec of SpinLockWake(). + +// void SpinLockDelay(volatile Atomic32 *w, int32 value, int loop) +// SpinLockDelay() generates an apprproate spin delay on iteration "loop" of a +// spin loop on location *w, whose previously observed value was "value". +// SpinLockDelay() may do nothing, may yield the CPU, may sleep a clock tick, +// or may wait for a delay that can be truncated by a call to SpinlockWake(w). +// In all cases, it must return in bounded time even if SpinlockWake() is not +// called. + +#include "base/spinlock_internal.h" + +// forward declaration for use by spinlock_*-inl.h +namespace base { namespace internal { static int SuggestedDelayNS(int loop); }} + +#if defined(_WIN32) +#include "base/spinlock_win32-inl.h" +#elif defined(__linux__) +#include "base/spinlock_linux-inl.h" +#else +#include "base/spinlock_posix-inl.h" +#endif + +namespace base { +namespace internal { + +// Return a suggested delay in nanoseconds for iteration number "loop" +static int SuggestedDelayNS(int loop) { + // Weak pseudo-random number generator to get some spread between threads + // when many are spinning. +#ifdef BASE_HAS_ATOMIC64 + static base::subtle::Atomic64 rand; + uint64 r = base::subtle::NoBarrier_Load(&rand); + r = 0x5deece66dLL * r + 0xb; // numbers from nrand48() + base::subtle::NoBarrier_Store(&rand, r); + + r <<= 16; // 48-bit random number now in top 48-bits. + if (loop < 0 || loop > 32) { // limit loop to 0..32 + loop = 32; + } + // loop>>3 cannot exceed 4 because loop cannot exceed 32. + // Select top 20..24 bits of lower 48 bits, + // giving approximately 0ms to 16ms. + // Mean is exponential in loop for first 32 iterations, then 8ms. + // The futex path multiplies this by 16, since we expect explicit wakeups + // almost always on that path. + return r >> (44 - (loop >> 3)); +#else + static Atomic32 rand; + uint32 r = base::subtle::NoBarrier_Load(&rand); + r = 0x343fd * r + 0x269ec3; // numbers from MSVC++ + base::subtle::NoBarrier_Store(&rand, r); + + r <<= 1; // 31-bit random number now in top 31-bits. + if (loop < 0 || loop > 32) { // limit loop to 0..32 + loop = 32; + } + // loop>>3 cannot exceed 4 because loop cannot exceed 32. + // Select top 20..24 bits of lower 31 bits, + // giving approximately 0ms to 16ms. + // Mean is exponential in loop for first 32 iterations, then 8ms. + // The futex path multiplies this by 16, since we expect explicit wakeups + // almost always on that path. + return r >> (12 - (loop >> 3)); +#endif +} + +} // namespace internal +} // namespace base diff --git a/src/third_party/gperftools-2.5/src/base/spinlock_internal.h b/src/third_party/gperftools-2.5/src/base/spinlock_internal.h new file mode 100644 index 00000000000..aa47e67d4e0 --- /dev/null +++ b/src/third_party/gperftools-2.5/src/base/spinlock_internal.h @@ -0,0 +1,51 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +/* Copyright (c) 2010, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * This file is an internal part spinlock.cc and once.cc + * It may not be used directly by code outside of //base. + */ + +#ifndef BASE_SPINLOCK_INTERNAL_H_ +#define BASE_SPINLOCK_INTERNAL_H_ + +#include <config.h> +#include "base/basictypes.h" +#include "base/atomicops.h" + +namespace base { +namespace internal { + +void SpinLockWake(volatile Atomic32 *w, bool all); +void SpinLockDelay(volatile Atomic32 *w, int32 value, int loop); + +} // namespace internal +} // namespace base +#endif diff --git a/src/third_party/gperftools-2.5/src/base/spinlock_linux-inl.h b/src/third_party/gperftools-2.5/src/base/spinlock_linux-inl.h new file mode 100644 index 00000000000..aadf62a4b67 --- /dev/null +++ b/src/third_party/gperftools-2.5/src/base/spinlock_linux-inl.h @@ -0,0 +1,101 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +/* Copyright (c) 2009, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * This file is a Linux-specific part of spinlock_internal.cc + */ + +#include <errno.h> +#include <sched.h> +#include <time.h> +#include <limits.h> +#include "base/linux_syscall_support.h" + +#define FUTEX_WAIT 0 +#define FUTEX_WAKE 1 +#define FUTEX_PRIVATE_FLAG 128 + +static bool have_futex; +static int futex_private_flag = FUTEX_PRIVATE_FLAG; + +namespace { +static struct InitModule { + InitModule() { + int x = 0; + // futexes are ints, so we can use them only when + // that's the same size as the lockword_ in SpinLock. + have_futex = (sizeof (Atomic32) == sizeof (int) && + sys_futex(&x, FUTEX_WAKE, 1, NULL, NULL, 0) >= 0); + if (have_futex && + sys_futex(&x, FUTEX_WAKE | futex_private_flag, 1, NULL, NULL, 0) < 0) { + futex_private_flag = 0; + } + } +} init_module; + +} // anonymous namespace + + +namespace base { +namespace internal { + +void SpinLockDelay(volatile Atomic32 *w, int32 value, int loop) { + if (loop != 0) { + int save_errno = errno; + struct timespec tm; + tm.tv_sec = 0; + if (have_futex) { + tm.tv_nsec = base::internal::SuggestedDelayNS(loop); + } else { + tm.tv_nsec = 2000001; // above 2ms so linux 2.4 doesn't spin + } + if (have_futex) { + tm.tv_nsec *= 16; // increase the delay; we expect explicit wakeups + sys_futex(reinterpret_cast<int *>(const_cast<Atomic32 *>(w)), + FUTEX_WAIT | futex_private_flag, + value, reinterpret_cast<struct kernel_timespec *>(&tm), + NULL, 0); + } else { + nanosleep(&tm, NULL); + } + errno = save_errno; + } +} + +void SpinLockWake(volatile Atomic32 *w, bool all) { + if (have_futex) { + sys_futex(reinterpret_cast<int *>(const_cast<Atomic32 *>(w)), + FUTEX_WAKE | futex_private_flag, all? INT_MAX : 1, + NULL, NULL, 0); + } +} + +} // namespace internal +} // namespace base diff --git a/src/third_party/gperftools-2.5/src/base/spinlock_posix-inl.h b/src/third_party/gperftools-2.5/src/base/spinlock_posix-inl.h new file mode 100644 index 00000000000..e73a30fb7d8 --- /dev/null +++ b/src/third_party/gperftools-2.5/src/base/spinlock_posix-inl.h @@ -0,0 +1,63 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +/* Copyright (c) 2009, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * This file is a Posix-specific part of spinlock_internal.cc + */ + +#include <config.h> +#include <errno.h> +#ifdef HAVE_SCHED_H +#include <sched.h> /* For sched_yield() */ +#endif +#include <time.h> /* For nanosleep() */ + +namespace base { +namespace internal { + +void SpinLockDelay(volatile Atomic32 *w, int32 value, int loop) { + int save_errno = errno; + if (loop == 0) { + } else if (loop == 1) { + sched_yield(); + } else { + struct timespec tm; + tm.tv_sec = 0; + tm.tv_nsec = base::internal::SuggestedDelayNS(loop); + nanosleep(&tm, NULL); + } + errno = save_errno; +} + +void SpinLockWake(volatile Atomic32 *w, bool all) { +} + +} // namespace internal +} // namespace base diff --git a/src/third_party/gperftools-2.5/src/base/spinlock_win32-inl.h b/src/third_party/gperftools-2.5/src/base/spinlock_win32-inl.h new file mode 100644 index 00000000000..956b9653e6d --- /dev/null +++ b/src/third_party/gperftools-2.5/src/base/spinlock_win32-inl.h @@ -0,0 +1,54 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +/* Copyright (c) 2009, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * This file is a Win32-specific part of spinlock_internal.cc + */ + + +#include <windows.h> + +namespace base { +namespace internal { + +void SpinLockDelay(volatile Atomic32 *w, int32 value, int loop) { + if (loop == 0) { + } else if (loop == 1) { + Sleep(0); + } else { + Sleep(base::internal::SuggestedDelayNS(loop) / 1000000); + } +} + +void SpinLockWake(volatile Atomic32 *w, bool all) { +} + +} // namespace internal +} // namespace base diff --git a/src/third_party/gperftools-2.5/src/base/stl_allocator.h b/src/third_party/gperftools-2.5/src/base/stl_allocator.h new file mode 100644 index 00000000000..2345f463c24 --- /dev/null +++ b/src/third_party/gperftools-2.5/src/base/stl_allocator.h @@ -0,0 +1,98 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +/* Copyright (c) 2006, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: Maxim Lifantsev + */ + + +#ifndef BASE_STL_ALLOCATOR_H_ +#define BASE_STL_ALLOCATOR_H_ + +#include <config.h> + +#include <stddef.h> // for ptrdiff_t +#include <limits> + +#include "base/logging.h" + +// Generic allocator class for STL objects +// that uses a given type-less allocator Alloc, which must provide: +// static void* Alloc::Allocate(size_t size); +// static void Alloc::Free(void* ptr, size_t size); +// +// STL_Allocator<T, MyAlloc> provides the same thread-safety +// guarantees as MyAlloc. +// +// Usage example: +// set<T, less<T>, STL_Allocator<T, MyAlloc> > my_set; +// CAVEAT: Parts of the code below are probably specific +// to the STL version(s) we are using. +// The code is simply lifted from what std::allocator<> provides. +template <typename T, class Alloc> +class STL_Allocator { + public: + typedef size_t size_type; + typedef ptrdiff_t difference_type; + typedef T* pointer; + typedef const T* const_pointer; + typedef T& reference; + typedef const T& const_reference; + typedef T value_type; + + template <class T1> struct rebind { + typedef STL_Allocator<T1, Alloc> other; + }; + + STL_Allocator() { } + STL_Allocator(const STL_Allocator&) { } + template <class T1> STL_Allocator(const STL_Allocator<T1, Alloc>&) { } + ~STL_Allocator() { } + + pointer address(reference x) const { return &x; } + const_pointer address(const_reference x) const { return &x; } + + pointer allocate(size_type n, const void* = 0) { + RAW_DCHECK((n * sizeof(T)) / sizeof(T) == n, "n is too big to allocate"); + return static_cast<T*>(Alloc::Allocate(n * sizeof(T))); + } + void deallocate(pointer p, size_type n) { Alloc::Free(p, n * sizeof(T)); } + + size_type max_size() const { return size_t(-1) / sizeof(T); } + + void construct(pointer p, const T& val) { ::new(p) T(val); } + void construct(pointer p) { ::new(p) T(); } + void destroy(pointer p) { p->~T(); } + + // There's no state, so these allocators are always equal + bool operator==(const STL_Allocator&) const { return true; } +}; + +#endif // BASE_STL_ALLOCATOR_H_ diff --git a/src/third_party/gperftools-2.5/src/base/sysinfo.cc b/src/third_party/gperftools-2.5/src/base/sysinfo.cc new file mode 100644 index 00000000000..789a47d2977 --- /dev/null +++ b/src/third_party/gperftools-2.5/src/base/sysinfo.cc @@ -0,0 +1,860 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2006, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include <config.h> +#if (defined(_WIN32) || defined(__MINGW32__)) && !defined(__CYGWIN__) && !defined(__CYGWIN32) +# define PLATFORM_WINDOWS 1 +#endif + +#include <ctype.h> // for isspace() +#include <stdlib.h> // for getenv() +#include <stdio.h> // for snprintf(), sscanf() +#include <string.h> // for memmove(), memchr(), etc. +#include <fcntl.h> // for open() +#include <errno.h> // for errno +#ifdef HAVE_UNISTD_H +#include <unistd.h> // for read() +#endif +#if defined __MACH__ // Mac OS X, almost certainly +#include <mach-o/dyld.h> // for iterating over dll's in ProcMapsIter +#include <mach-o/loader.h> // for iterating over dll's in ProcMapsIter +#include <sys/types.h> +#include <sys/sysctl.h> // how we figure out numcpu's on OS X +#elif defined __FreeBSD__ +#include <sys/sysctl.h> +#elif defined __sun__ // Solaris +#include <procfs.h> // for, e.g., prmap_t +#elif defined(PLATFORM_WINDOWS) +#include <process.h> // for getpid() (actually, _getpid()) +#include <shlwapi.h> // for SHGetValueA() +#include <tlhelp32.h> // for Module32First() +#endif +#include "base/sysinfo.h" +#include "base/commandlineflags.h" +#include "base/dynamic_annotations.h" // for RunningOnValgrind +#include "base/logging.h" + +#ifdef PLATFORM_WINDOWS +#ifdef MODULEENTRY32 +// In a change from the usual W-A pattern, there is no A variant of +// MODULEENTRY32. Tlhelp32.h #defines the W variant, but not the A. +// In unicode mode, tlhelp32.h #defines MODULEENTRY32 to be +// MODULEENTRY32W. These #undefs are the only way I see to get back +// access to the original, ascii struct (and related functions). +#undef MODULEENTRY32 +#undef Module32First +#undef Module32Next +#undef PMODULEENTRY32 +#undef LPMODULEENTRY32 +#endif /* MODULEENTRY32 */ +// MinGW doesn't seem to define this, perhaps some windowsen don't either. +#ifndef TH32CS_SNAPMODULE32 +#define TH32CS_SNAPMODULE32 0 +#endif /* TH32CS_SNAPMODULE32 */ +#endif /* PLATFORM_WINDOWS */ + +// Re-run fn until it doesn't cause EINTR. +#define NO_INTR(fn) do {} while ((fn) < 0 && errno == EINTR) + +// open/read/close can set errno, which may be illegal at this +// time, so prefer making the syscalls directly if we can. +#ifdef HAVE_SYS_SYSCALL_H +# include <sys/syscall.h> +#endif +#ifdef SYS_open // solaris 11, at least sometimes, only defines SYS_openat +# define safeopen(filename, mode) syscall(SYS_open, filename, mode) +#else +# define safeopen(filename, mode) open(filename, mode) +#endif +#ifdef SYS_read +# define saferead(fd, buffer, size) syscall(SYS_read, fd, buffer, size) +#else +# define saferead(fd, buffer, size) read(fd, buffer, size) +#endif +#ifdef SYS_close +# define safeclose(fd) syscall(SYS_close, fd) +#else +# define safeclose(fd) close(fd) +#endif + +// ---------------------------------------------------------------------- +// GetenvBeforeMain() +// GetUniquePathFromEnv() +// Some non-trivial getenv-related functions. +// ---------------------------------------------------------------------- + +// It's not safe to call getenv() in the malloc hooks, because they +// might be called extremely early, before libc is done setting up +// correctly. In particular, the thread library may not be done +// setting up errno. So instead, we use the built-in __environ array +// if it exists, and otherwise read /proc/self/environ directly, using +// system calls to read the file, and thus avoid setting errno. +// /proc/self/environ has a limit of how much data it exports (around +// 8K), so it's not an ideal solution. +const char* GetenvBeforeMain(const char* name) { +#if defined(HAVE___ENVIRON) // if we have it, it's declared in unistd.h + if (__environ) { // can exist but be NULL, if statically linked + const int namelen = strlen(name); + for (char** p = __environ; *p; p++) { + if (strlen(*p) < namelen) { + continue; + } + if (!memcmp(*p, name, namelen) && (*p)[namelen] == '=') // it's a match + return *p + namelen+1; // point after = + } + return NULL; + } +#endif +#if defined(PLATFORM_WINDOWS) + // TODO(mbelshe) - repeated calls to this function will overwrite the + // contents of the static buffer. + static char envvar_buf[1024]; // enough to hold any envvar we care about + if (!GetEnvironmentVariableA(name, envvar_buf, sizeof(envvar_buf)-1)) + return NULL; + return envvar_buf; +#endif + // static is ok because this function should only be called before + // main(), when we're single-threaded. + static char envbuf[16<<10]; + if (*envbuf == '\0') { // haven't read the environ yet + int fd = safeopen("/proc/self/environ", O_RDONLY); + // The -2 below guarantees the last two bytes of the buffer will be \0\0 + if (fd == -1 || // unable to open the file, fall back onto libc + saferead(fd, envbuf, sizeof(envbuf) - 2) < 0) { // error reading file + RAW_VLOG(1, "Unable to open /proc/self/environ, falling back " + "on getenv(\"%s\"), which may not work", name); + if (fd != -1) safeclose(fd); + return getenv(name); + } + safeclose(fd); + } + const int namelen = strlen(name); + const char* p = envbuf; + while (*p != '\0') { // will happen at the \0\0 that terminates the buffer + // proc file has the format NAME=value\0NAME=value\0NAME=value\0... + const char* endp = (char*)memchr(p, '\0', sizeof(envbuf) - (p - envbuf)); + if (endp == NULL) // this entry isn't NUL terminated + return NULL; + else if (!memcmp(p, name, namelen) && p[namelen] == '=') // it's a match + return p + namelen+1; // point after = + p = endp + 1; + } + return NULL; // env var never found +} + +extern "C" { + const char* TCMallocGetenvSafe(const char* name) { + return GetenvBeforeMain(name); + } +} + +// This takes as an argument an environment-variable name (like +// CPUPROFILE) whose value is supposed to be a file-path, and sets +// path to that path, and returns true. If the env var doesn't exist, +// or is the empty string, leave path unchanged and returns false. +// The reason this is non-trivial is that this function handles munged +// pathnames. Here's why: +// +// If we're a child process of the 'main' process, we can't just use +// getenv("CPUPROFILE") -- the parent process will be using that path. +// Instead we append our pid to the pathname. How do we tell if we're a +// child process? Ideally we'd set an environment variable that all +// our children would inherit. But -- and this is seemingly a bug in +// gcc -- if you do a setenv() in a shared libarary in a global +// constructor, the environment setting is lost by the time main() is +// called. The only safe thing we can do in such a situation is to +// modify the existing envvar. So we do a hack: in the parent, we set +// the high bit of the 1st char of CPUPROFILE. In the child, we +// notice the high bit is set and append the pid(). This works +// assuming cpuprofile filenames don't normally have the high bit set +// in their first character! If that assumption is violated, we'll +// still get a profile, but one with an unexpected name. +// TODO(csilvers): set an envvar instead when we can do it reliably. +bool GetUniquePathFromEnv(const char* env_name, char* path) { + char* envval = getenv(env_name); + if (envval == NULL || *envval == '\0') + return false; + if (envval[0] & 128) { // high bit is set + snprintf(path, PATH_MAX, "%c%s_%u", // add pid and clear high bit + envval[0] & 127, envval+1, (unsigned int)(getpid())); + } else { + snprintf(path, PATH_MAX, "%s", envval); + envval[0] |= 128; // set high bit for kids to see + } + return true; +} + +void SleepForMilliseconds(int milliseconds) { +#ifdef PLATFORM_WINDOWS + _sleep(milliseconds); // Windows's _sleep takes milliseconds argument +#else + // Sleep for a few milliseconds + struct timespec sleep_time; + sleep_time.tv_sec = milliseconds / 1000; + sleep_time.tv_nsec = (milliseconds % 1000) * 1000000; + while (nanosleep(&sleep_time, &sleep_time) != 0 && errno == EINTR) + ; // Ignore signals and wait for the full interval to elapse. +#endif +} + +int GetSystemCPUsCount() +{ +#if defined(PLATFORM_WINDOWS) + // Get the number of processors. + SYSTEM_INFO info; + GetSystemInfo(&info); + return info.dwNumberOfProcessors; +#else + long rv = sysconf(_SC_NPROCESSORS_ONLN); + if (rv < 0) { + return 1; + } + return static_cast<int>(rv); +#endif +} + +// ---------------------------------------------------------------------- + +#if defined __linux__ || defined __FreeBSD__ || defined __sun__ || defined __CYGWIN__ || defined __CYGWIN32__ +static void ConstructFilename(const char* spec, pid_t pid, + char* buf, int buf_size) { + CHECK_LT(snprintf(buf, buf_size, + spec, + static_cast<int>(pid ? pid : getpid())), buf_size); +} +#endif + +// A templatized helper function instantiated for Mach (OS X) only. +// It can handle finding info for both 32 bits and 64 bits. +// Returns true if it successfully handled the hdr, false else. +#ifdef __MACH__ // Mac OS X, almost certainly +template<uint32_t kMagic, uint32_t kLCSegment, + typename MachHeader, typename SegmentCommand> +static bool NextExtMachHelper(const mach_header* hdr, + int current_image, int current_load_cmd, + uint64 *start, uint64 *end, char **flags, + uint64 *offset, int64 *inode, char **filename, + uint64 *file_mapping, uint64 *file_pages, + uint64 *anon_mapping, uint64 *anon_pages, + dev_t *dev) { + static char kDefaultPerms[5] = "r-xp"; + if (hdr->magic != kMagic) + return false; + const char* lc = (const char *)hdr + sizeof(MachHeader); + // TODO(csilvers): make this not-quadradic (increment and hold state) + for (int j = 0; j < current_load_cmd; j++) // advance to *our* load_cmd + lc += ((const load_command *)lc)->cmdsize; + if (((const load_command *)lc)->cmd == kLCSegment) { + const intptr_t dlloff = _dyld_get_image_vmaddr_slide(current_image); + const SegmentCommand* sc = (const SegmentCommand *)lc; + if (start) *start = sc->vmaddr + dlloff; + if (end) *end = sc->vmaddr + sc->vmsize + dlloff; + if (flags) *flags = kDefaultPerms; // can we do better? + if (offset) *offset = sc->fileoff; + if (inode) *inode = 0; + if (filename) + *filename = const_cast<char*>(_dyld_get_image_name(current_image)); + if (file_mapping) *file_mapping = 0; + if (file_pages) *file_pages = 0; // could we use sc->filesize? + if (anon_mapping) *anon_mapping = 0; + if (anon_pages) *anon_pages = 0; + if (dev) *dev = 0; + return true; + } + + return false; +} +#endif + +// Finds |c| in |text|, and assign '\0' at the found position. +// The original character at the modified position should be |c|. +// A pointer to the modified position is stored in |endptr|. +// |endptr| should not be NULL. +static bool ExtractUntilChar(char *text, int c, char **endptr) { + CHECK_NE(text, NULL); + CHECK_NE(endptr, NULL); + char *found; + found = strchr(text, c); + if (found == NULL) { + *endptr = NULL; + return false; + } + + *endptr = found; + *found = '\0'; + return true; +} + +// Increments |*text_pointer| while it points a whitespace character. +// It is to follow sscanf's whilespace handling. +static void SkipWhileWhitespace(char **text_pointer, int c) { + if (isspace(c)) { + while (isspace(**text_pointer) && isspace(*((*text_pointer) + 1))) { + ++(*text_pointer); + } + } +} + +template<class T> +static T StringToInteger(char *text, char **endptr, int base) { + assert(false); + return T(); +} + +template<> +int StringToInteger<int>(char *text, char **endptr, int base) { + return strtol(text, endptr, base); +} + +template<> +int64 StringToInteger<int64>(char *text, char **endptr, int base) { + return strtoll(text, endptr, base); +} + +template<> +uint64 StringToInteger<uint64>(char *text, char **endptr, int base) { + return strtoull(text, endptr, base); +} + +template<typename T> +static T StringToIntegerUntilChar( + char *text, int base, int c, char **endptr_result) { + CHECK_NE(endptr_result, NULL); + *endptr_result = NULL; + + char *endptr_extract; + if (!ExtractUntilChar(text, c, &endptr_extract)) + return 0; + + T result; + char *endptr_strto; + result = StringToInteger<T>(text, &endptr_strto, base); + *endptr_extract = c; + + if (endptr_extract != endptr_strto) + return 0; + + *endptr_result = endptr_extract; + SkipWhileWhitespace(endptr_result, c); + + return result; +} + +static char *CopyStringUntilChar( + char *text, unsigned out_len, int c, char *out) { + char *endptr; + if (!ExtractUntilChar(text, c, &endptr)) + return NULL; + + strncpy(out, text, out_len); + out[out_len-1] = '\0'; + *endptr = c; + + SkipWhileWhitespace(&endptr, c); + return endptr; +} + +template<typename T> +static bool StringToIntegerUntilCharWithCheck( + T *outptr, char *text, int base, int c, char **endptr) { + *outptr = StringToIntegerUntilChar<T>(*endptr, base, c, endptr); + if (*endptr == NULL || **endptr == '\0') return false; + ++(*endptr); + return true; +} + +static bool ParseProcMapsLine(char *text, uint64 *start, uint64 *end, + char *flags, uint64 *offset, + int *major, int *minor, int64 *inode, + unsigned *filename_offset) { +#if defined(__linux__) + /* + * It's similar to: + * sscanf(text, "%"SCNx64"-%"SCNx64" %4s %"SCNx64" %x:%x %"SCNd64" %n", + * start, end, flags, offset, major, minor, inode, filename_offset) + */ + char *endptr = text; + if (endptr == NULL || *endptr == '\0') return false; + + if (!StringToIntegerUntilCharWithCheck(start, endptr, 16, '-', &endptr)) + return false; + + if (!StringToIntegerUntilCharWithCheck(end, endptr, 16, ' ', &endptr)) + return false; + + endptr = CopyStringUntilChar(endptr, 5, ' ', flags); + if (endptr == NULL || *endptr == '\0') return false; + ++endptr; + + if (!StringToIntegerUntilCharWithCheck(offset, endptr, 16, ' ', &endptr)) + return false; + + if (!StringToIntegerUntilCharWithCheck(major, endptr, 16, ':', &endptr)) + return false; + + if (!StringToIntegerUntilCharWithCheck(minor, endptr, 16, ' ', &endptr)) + return false; + + if (!StringToIntegerUntilCharWithCheck(inode, endptr, 10, ' ', &endptr)) + return false; + + *filename_offset = (endptr - text); + return true; +#else + return false; +#endif +} + +ProcMapsIterator::ProcMapsIterator(pid_t pid) { + Init(pid, NULL, false); +} + +ProcMapsIterator::ProcMapsIterator(pid_t pid, Buffer *buffer) { + Init(pid, buffer, false); +} + +ProcMapsIterator::ProcMapsIterator(pid_t pid, Buffer *buffer, + bool use_maps_backing) { + Init(pid, buffer, use_maps_backing); +} + +void ProcMapsIterator::Init(pid_t pid, Buffer *buffer, + bool use_maps_backing) { + pid_ = pid; + using_maps_backing_ = use_maps_backing; + dynamic_buffer_ = NULL; + if (!buffer) { + // If the user didn't pass in any buffer storage, allocate it + // now. This is the normal case; the signal handler passes in a + // static buffer. + buffer = dynamic_buffer_ = new Buffer; + } else { + dynamic_buffer_ = NULL; + } + + ibuf_ = buffer->buf_; + + stext_ = etext_ = nextline_ = ibuf_; + ebuf_ = ibuf_ + Buffer::kBufSize - 1; + nextline_ = ibuf_; + +#if defined(__linux__) || defined(__CYGWIN__) || defined(__CYGWIN32__) + if (use_maps_backing) { // don't bother with clever "self" stuff in this case + ConstructFilename("/proc/%d/maps_backing", pid, ibuf_, Buffer::kBufSize); + } else if (pid == 0) { + // We have to kludge a bit to deal with the args ConstructFilename + // expects. The 1 is never used -- it's only impt. that it's not 0. + ConstructFilename("/proc/self/maps", 1, ibuf_, Buffer::kBufSize); + } else { + ConstructFilename("/proc/%d/maps", pid, ibuf_, Buffer::kBufSize); + } + // No error logging since this can be called from the crash dump + // handler at awkward moments. Users should call Valid() before + // using. + NO_INTR(fd_ = open(ibuf_, O_RDONLY)); +#elif defined(__FreeBSD__) + // We don't support maps_backing on freebsd + if (pid == 0) { + ConstructFilename("/proc/curproc/map", 1, ibuf_, Buffer::kBufSize); + } else { + ConstructFilename("/proc/%d/map", pid, ibuf_, Buffer::kBufSize); + } + NO_INTR(fd_ = open(ibuf_, O_RDONLY)); +#elif defined(__sun__) + if (pid == 0) { + ConstructFilename("/proc/self/map", 1, ibuf_, Buffer::kBufSize); + } else { + ConstructFilename("/proc/%d/map", pid, ibuf_, Buffer::kBufSize); + } + NO_INTR(fd_ = open(ibuf_, O_RDONLY)); +#elif defined(__MACH__) + current_image_ = _dyld_image_count(); // count down from the top + current_load_cmd_ = -1; +#elif defined(PLATFORM_WINDOWS) + snapshot_ = CreateToolhelp32Snapshot(TH32CS_SNAPMODULE | + TH32CS_SNAPMODULE32, + GetCurrentProcessId()); + memset(&module_, 0, sizeof(module_)); +#else + fd_ = -1; // so Valid() is always false +#endif + +} + +ProcMapsIterator::~ProcMapsIterator() { +#if defined(PLATFORM_WINDOWS) + if (snapshot_ != INVALID_HANDLE_VALUE) CloseHandle(snapshot_); +#elif defined(__MACH__) + // no cleanup necessary! +#else + if (fd_ >= 0) NO_INTR(close(fd_)); +#endif + delete dynamic_buffer_; +} + +bool ProcMapsIterator::Valid() const { +#if defined(PLATFORM_WINDOWS) + return snapshot_ != INVALID_HANDLE_VALUE; +#elif defined(__MACH__) + return 1; +#else + return fd_ != -1; +#endif +} + +bool ProcMapsIterator::Next(uint64 *start, uint64 *end, char **flags, + uint64 *offset, int64 *inode, char **filename) { + return NextExt(start, end, flags, offset, inode, filename, NULL, NULL, + NULL, NULL, NULL); +} + +// This has too many arguments. It should really be building +// a map object and returning it. The problem is that this is called +// when the memory allocator state is undefined, hence the arguments. +bool ProcMapsIterator::NextExt(uint64 *start, uint64 *end, char **flags, + uint64 *offset, int64 *inode, char **filename, + uint64 *file_mapping, uint64 *file_pages, + uint64 *anon_mapping, uint64 *anon_pages, + dev_t *dev) { + +#if defined(__linux__) || defined(__FreeBSD__) || defined(__CYGWIN__) || defined(__CYGWIN32__) + do { + // Advance to the start of the next line + stext_ = nextline_; + + // See if we have a complete line in the buffer already + nextline_ = static_cast<char *>(memchr (stext_, '\n', etext_ - stext_)); + if (!nextline_) { + // Shift/fill the buffer so we do have a line + int count = etext_ - stext_; + + // Move the current text to the start of the buffer + memmove(ibuf_, stext_, count); + stext_ = ibuf_; + etext_ = ibuf_ + count; + + int nread = 0; // fill up buffer with text + while (etext_ < ebuf_) { + NO_INTR(nread = read(fd_, etext_, ebuf_ - etext_)); + if (nread > 0) + etext_ += nread; + else + break; + } + + // Zero out remaining characters in buffer at EOF to avoid returning + // garbage from subsequent calls. + if (etext_ != ebuf_ && nread == 0) { + memset(etext_, 0, ebuf_ - etext_); + } + *etext_ = '\n'; // sentinel; safe because ibuf extends 1 char beyond ebuf + nextline_ = static_cast<char *>(memchr (stext_, '\n', etext_ + 1 - stext_)); + } + *nextline_ = 0; // turn newline into nul + nextline_ += ((nextline_ < etext_)? 1 : 0); // skip nul if not end of text + // stext_ now points at a nul-terminated line + uint64 tmpstart, tmpend, tmpoffset; + int64 tmpinode; + int major, minor; + unsigned filename_offset = 0; +#if defined(__linux__) + // for now, assume all linuxes have the same format + if (!ParseProcMapsLine( + stext_, + start ? start : &tmpstart, + end ? end : &tmpend, + flags_, + offset ? offset : &tmpoffset, + &major, &minor, + inode ? inode : &tmpinode, &filename_offset)) continue; +#elif defined(__CYGWIN__) || defined(__CYGWIN32__) + // cygwin is like linux, except the third field is the "entry point" + // rather than the offset (see format_process_maps at + // http://cygwin.com/cgi-bin/cvsweb.cgi/src/winsup/cygwin/fhandler_process.cc?rev=1.89&content-type=text/x-cvsweb-markup&cvsroot=src + // Offset is always be 0 on cygwin: cygwin implements an mmap + // by loading the whole file and then calling NtMapViewOfSection. + // Cygwin also seems to set its flags kinda randomly; use windows default. + char tmpflags[5]; + if (offset) + *offset = 0; + strcpy(flags_, "r-xp"); + if (sscanf(stext_, "%llx-%llx %4s %llx %x:%x %lld %n", + start ? start : &tmpstart, + end ? end : &tmpend, + tmpflags, + &tmpoffset, + &major, &minor, + inode ? inode : &tmpinode, &filename_offset) != 7) continue; +#elif defined(__FreeBSD__) + // For the format, see http://www.freebsd.org/cgi/cvsweb.cgi/src/sys/fs/procfs/procfs_map.c?rev=1.31&content-type=text/x-cvsweb-markup + tmpstart = tmpend = tmpoffset = 0; + tmpinode = 0; + major = minor = 0; // can't get this info in freebsd + if (inode) + *inode = 0; // nor this + if (offset) + *offset = 0; // seems like this should be in there, but maybe not + // start end resident privateresident obj(?) prot refcnt shadowcnt + // flags copy_on_write needs_copy type filename: + // 0x8048000 0x804a000 2 0 0xc104ce70 r-x 1 0 0x0 COW NC vnode /bin/cat + if (sscanf(stext_, "0x%" SCNx64 " 0x%" SCNx64 " %*d %*d %*p %3s %*d %*d 0x%*x %*s %*s %*s %n", + start ? start : &tmpstart, + end ? end : &tmpend, + flags_, + &filename_offset) != 3) continue; +#endif + + // Depending on the Linux kernel being used, there may or may not be a space + // after the inode if there is no filename. sscanf will in such situations + // nondeterministically either fill in filename_offset or not (the results + // differ on multiple calls in the same run even with identical arguments). + // We don't want to wander off somewhere beyond the end of the string. + size_t stext_length = strlen(stext_); + if (filename_offset == 0 || filename_offset > stext_length) + filename_offset = stext_length; + + // We found an entry + if (flags) *flags = flags_; + if (filename) *filename = stext_ + filename_offset; + if (dev) *dev = minor | (major << 8); + + if (using_maps_backing_) { + // Extract and parse physical page backing info. + char *backing_ptr = stext_ + filename_offset + + strlen(stext_+filename_offset); + + // find the second '(' + int paren_count = 0; + while (--backing_ptr > stext_) { + if (*backing_ptr == '(') { + ++paren_count; + if (paren_count >= 2) { + uint64 tmp_file_mapping; + uint64 tmp_file_pages; + uint64 tmp_anon_mapping; + uint64 tmp_anon_pages; + + sscanf(backing_ptr+1, "F %" SCNx64 " %" SCNd64 ") (A %" SCNx64 " %" SCNd64 ")", + file_mapping ? file_mapping : &tmp_file_mapping, + file_pages ? file_pages : &tmp_file_pages, + anon_mapping ? anon_mapping : &tmp_anon_mapping, + anon_pages ? anon_pages : &tmp_anon_pages); + // null terminate the file name (there is a space + // before the first (. + backing_ptr[-1] = 0; + break; + } + } + } + } + + return true; + } while (etext_ > ibuf_); +#elif defined(__sun__) + // This is based on MA_READ == 4, MA_WRITE == 2, MA_EXEC == 1 + static char kPerms[8][4] = { "---", "--x", "-w-", "-wx", + "r--", "r-x", "rw-", "rwx" }; + COMPILE_ASSERT(MA_READ == 4, solaris_ma_read_must_equal_4); + COMPILE_ASSERT(MA_WRITE == 2, solaris_ma_write_must_equal_2); + COMPILE_ASSERT(MA_EXEC == 1, solaris_ma_exec_must_equal_1); + Buffer object_path; + int nread = 0; // fill up buffer with text + NO_INTR(nread = read(fd_, ibuf_, sizeof(prmap_t))); + if (nread == sizeof(prmap_t)) { + long inode_from_mapname = 0; + prmap_t* mapinfo = reinterpret_cast<prmap_t*>(ibuf_); + // Best-effort attempt to get the inode from the filename. I think the + // two middle ints are major and minor device numbers, but I'm not sure. + sscanf(mapinfo->pr_mapname, "ufs.%*d.%*d.%ld", &inode_from_mapname); + + if (pid_ == 0) { + CHECK_LT(snprintf(object_path.buf_, Buffer::kBufSize, + "/proc/self/path/%s", mapinfo->pr_mapname), + Buffer::kBufSize); + } else { + CHECK_LT(snprintf(object_path.buf_, Buffer::kBufSize, + "/proc/%d/path/%s", + static_cast<int>(pid_), mapinfo->pr_mapname), + Buffer::kBufSize); + } + ssize_t len = readlink(object_path.buf_, current_filename_, PATH_MAX); + CHECK_LT(len, PATH_MAX); + if (len < 0) + len = 0; + current_filename_[len] = '\0'; + + if (start) *start = mapinfo->pr_vaddr; + if (end) *end = mapinfo->pr_vaddr + mapinfo->pr_size; + if (flags) *flags = kPerms[mapinfo->pr_mflags & 7]; + if (offset) *offset = mapinfo->pr_offset; + if (inode) *inode = inode_from_mapname; + if (filename) *filename = current_filename_; + if (file_mapping) *file_mapping = 0; + if (file_pages) *file_pages = 0; + if (anon_mapping) *anon_mapping = 0; + if (anon_pages) *anon_pages = 0; + if (dev) *dev = 0; + return true; + } +#elif defined(__MACH__) + // We return a separate entry for each segment in the DLL. (TODO(csilvers): + // can we do better?) A DLL ("image") has load-commands, some of which + // talk about segment boundaries. + // cf image_for_address from http://svn.digium.com/view/asterisk/team/oej/minivoicemail/dlfcn.c?revision=53912 + for (; current_image_ >= 0; current_image_--) { + const mach_header* hdr = _dyld_get_image_header(current_image_); + if (!hdr) continue; + if (current_load_cmd_ < 0) // set up for this image + current_load_cmd_ = hdr->ncmds; // again, go from the top down + + // We start with the next load command (we've already looked at this one). + for (current_load_cmd_--; current_load_cmd_ >= 0; current_load_cmd_--) { +#ifdef MH_MAGIC_64 + if (NextExtMachHelper<MH_MAGIC_64, LC_SEGMENT_64, + struct mach_header_64, struct segment_command_64>( + hdr, current_image_, current_load_cmd_, + start, end, flags, offset, inode, filename, + file_mapping, file_pages, anon_mapping, + anon_pages, dev)) { + return true; + } +#endif + if (NextExtMachHelper<MH_MAGIC, LC_SEGMENT, + struct mach_header, struct segment_command>( + hdr, current_image_, current_load_cmd_, + start, end, flags, offset, inode, filename, + file_mapping, file_pages, anon_mapping, + anon_pages, dev)) { + return true; + } + } + // If we get here, no more load_cmd's in this image talk about + // segments. Go on to the next image. + } +#elif defined(PLATFORM_WINDOWS) + static char kDefaultPerms[5] = "r-xp"; + BOOL ok; + if (module_.dwSize == 0) { // only possible before first call + module_.dwSize = sizeof(module_); + ok = Module32First(snapshot_, &module_); + } else { + ok = Module32Next(snapshot_, &module_); + } + if (ok) { + uint64 base_addr = reinterpret_cast<DWORD_PTR>(module_.modBaseAddr); + if (start) *start = base_addr; + if (end) *end = base_addr + module_.modBaseSize; + if (flags) *flags = kDefaultPerms; + if (offset) *offset = 0; + if (inode) *inode = 0; + if (filename) *filename = module_.szExePath; + if (file_mapping) *file_mapping = 0; + if (file_pages) *file_pages = 0; + if (anon_mapping) *anon_mapping = 0; + if (anon_pages) *anon_pages = 0; + if (dev) *dev = 0; + return true; + } +#endif + + // We didn't find anything + return false; +} + +int ProcMapsIterator::FormatLine(char* buffer, int bufsize, + uint64 start, uint64 end, const char *flags, + uint64 offset, int64 inode, + const char *filename, dev_t dev) { + // We assume 'flags' looks like 'rwxp' or 'rwx'. + char r = (flags && flags[0] == 'r') ? 'r' : '-'; + char w = (flags && flags[0] && flags[1] == 'w') ? 'w' : '-'; + char x = (flags && flags[0] && flags[1] && flags[2] == 'x') ? 'x' : '-'; + // p always seems set on linux, so we set the default to 'p', not '-' + char p = (flags && flags[0] && flags[1] && flags[2] && flags[3] != 'p') + ? '-' : 'p'; + + const int rc = snprintf(buffer, bufsize, + "%08" PRIx64 "-%08" PRIx64 " %c%c%c%c %08" PRIx64 " %02x:%02x %-11" PRId64 " %s\n", + start, end, r,w,x,p, offset, + static_cast<int>(dev/256), static_cast<int>(dev%256), + inode, filename); + return (rc < 0 || rc >= bufsize) ? 0 : rc; +} + +namespace tcmalloc { + +// Helper to add the list of mapped shared libraries to a profile. +// Fill formatted "/proc/self/maps" contents into buffer 'buf' of size 'size' +// and return the actual size occupied in 'buf'. We fill wrote_all to true +// if we successfully wrote all proc lines to buf, false else. +// We do not provision for 0-terminating 'buf'. +int FillProcSelfMaps(char buf[], int size, bool* wrote_all) { + ProcMapsIterator::Buffer iterbuf; + ProcMapsIterator it(0, &iterbuf); // 0 means "current pid" + + uint64 start, end, offset; + int64 inode; + char *flags, *filename; + int bytes_written = 0; + *wrote_all = true; + while (it.Next(&start, &end, &flags, &offset, &inode, &filename)) { + const int line_length = it.FormatLine(buf + bytes_written, + size - bytes_written, + start, end, flags, offset, + inode, filename, 0); + if (line_length == 0) + *wrote_all = false; // failed to write this line out + else + bytes_written += line_length; + + } + return bytes_written; +} + +// Dump the same data as FillProcSelfMaps reads to fd. +// It seems easier to repeat parts of FillProcSelfMaps here than to +// reuse it via a call. +void DumpProcSelfMaps(RawFD fd) { + ProcMapsIterator::Buffer iterbuf; + ProcMapsIterator it(0, &iterbuf); // 0 means "current pid" + + uint64 start, end, offset; + int64 inode; + char *flags, *filename; + ProcMapsIterator::Buffer linebuf; + while (it.Next(&start, &end, &flags, &offset, &inode, &filename)) { + int written = it.FormatLine(linebuf.buf_, sizeof(linebuf.buf_), + start, end, flags, offset, inode, filename, + 0); + RawWrite(fd, linebuf.buf_, written); + } +} + +} // namespace tcmalloc diff --git a/src/third_party/gperftools-2.5/src/base/sysinfo.h b/src/third_party/gperftools-2.5/src/base/sysinfo.h new file mode 100644 index 00000000000..e30b0d4d1a5 --- /dev/null +++ b/src/third_party/gperftools-2.5/src/base/sysinfo.h @@ -0,0 +1,232 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2006, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// All functions here are thread-hostile due to file caching unless +// commented otherwise. + +#ifndef _SYSINFO_H_ +#define _SYSINFO_H_ + +#include <config.h> + +#include <time.h> +#if (defined(_WIN32) || defined(__MINGW32__)) && (!defined(__CYGWIN__) && !defined(__CYGWIN32__)) +#include <windows.h> // for DWORD +#include <tlhelp32.h> // for CreateToolhelp32Snapshot +#endif +#ifdef HAVE_UNISTD_H +#include <unistd.h> // for pid_t +#endif +#include <stddef.h> // for size_t +#include <limits.h> // for PATH_MAX +#include "base/basictypes.h" +#include "base/logging.h" // for RawFD + +// This getenv function is safe to call before the C runtime is initialized. +// On Windows, it utilizes GetEnvironmentVariable() and on unix it uses +// /proc/self/environ instead calling getenv(). It's intended to be used in +// routines that run before main(), when the state required for getenv() may +// not be set up yet. In particular, errno isn't set up until relatively late +// (after the pthreads library has a chance to make it threadsafe), and +// getenv() doesn't work until then. +// On some platforms, this call will utilize the same, static buffer for +// repeated GetenvBeforeMain() calls. Callers should not expect pointers from +// this routine to be long lived. +// Note that on unix, /proc only has the environment at the time the +// application was started, so this routine ignores setenv() calls/etc. Also +// note it only reads the first 16K of the environment. +extern const char* GetenvBeforeMain(const char* name); + +// This takes as an argument an environment-variable name (like +// CPUPROFILE) whose value is supposed to be a file-path, and sets +// path to that path, and returns true. Non-trivial for surprising +// reasons, as documented in sysinfo.cc. path must have space PATH_MAX. +extern bool GetUniquePathFromEnv(const char* env_name, char* path); + +extern int GetSystemCPUsCount(); + +void SleepForMilliseconds(int milliseconds); + +// Return true if we're running POSIX (e.g., NPTL on Linux) threads, +// as opposed to a non-POSIX thread library. The thing that we care +// about is whether a thread's pid is the same as the thread that +// spawned it. If so, this function returns true. +// Thread-safe. +// Note: We consider false negatives to be OK. +bool HasPosixThreads(); + +#ifndef SWIG // SWIG doesn't like struct Buffer and variable arguments. + +// A ProcMapsIterator abstracts access to /proc/maps for a given +// process. Needs to be stack-allocatable and avoid using stdio/malloc +// so it can be used in the google stack dumper, heap-profiler, etc. +// +// On Windows and Mac OS X, this iterator iterates *only* over DLLs +// mapped into this process space. For Linux, FreeBSD, and Solaris, +// it iterates over *all* mapped memory regions, including anonymous +// mmaps. For other O/Ss, it is unlikely to work at all, and Valid() +// will always return false. Also note: this routine only works on +// FreeBSD if procfs is mounted: make sure this is in your /etc/fstab: +// proc /proc procfs rw 0 0 +class ProcMapsIterator { + public: + struct Buffer { +#ifdef __FreeBSD__ + // FreeBSD requires us to read all of the maps file at once, so + // we have to make a buffer that's "always" big enough + static const size_t kBufSize = 102400; +#else // a one-line buffer is good enough + static const size_t kBufSize = PATH_MAX + 1024; +#endif + char buf_[kBufSize]; + }; + + + // Create a new iterator for the specified pid. pid can be 0 for "self". + explicit ProcMapsIterator(pid_t pid); + + // Create an iterator with specified storage (for use in signal + // handler). "buffer" should point to a ProcMapsIterator::Buffer + // buffer can be NULL in which case a bufer will be allocated. + ProcMapsIterator(pid_t pid, Buffer *buffer); + + // Iterate through maps_backing instead of maps if use_maps_backing + // is true. Otherwise the same as above. buffer can be NULL and + // it will allocate a buffer itself. + ProcMapsIterator(pid_t pid, Buffer *buffer, + bool use_maps_backing); + + // Returns true if the iterator successfully initialized; + bool Valid() const; + + // Returns a pointer to the most recently parsed line. Only valid + // after Next() returns true, and until the iterator is destroyed or + // Next() is called again. This may give strange results on non-Linux + // systems. Prefer FormatLine() if that may be a concern. + const char *CurrentLine() const { return stext_; } + + // Writes the "canonical" form of the /proc/xxx/maps info for a single + // line to the passed-in buffer. Returns the number of bytes written, + // or 0 if it was not able to write the complete line. (To guarantee + // success, buffer should have size at least Buffer::kBufSize.) + // Takes as arguments values set via a call to Next(). The + // "canonical" form of the line (taken from linux's /proc/xxx/maps): + // <start_addr(hex)>-<end_addr(hex)> <perms(rwxp)> <offset(hex)> + + // <major_dev(hex)>:<minor_dev(hex)> <inode> <filename> Note: the + // eg + // 08048000-0804c000 r-xp 00000000 03:01 3793678 /bin/cat + // If you don't have the dev_t (dev), feel free to pass in 0. + // (Next() doesn't return a dev_t, though NextExt does.) + // + // Note: if filename and flags were obtained via a call to Next(), + // then the output of this function is only valid if Next() returned + // true, and only until the iterator is destroyed or Next() is + // called again. (Since filename, at least, points into CurrentLine.) + static int FormatLine(char* buffer, int bufsize, + uint64 start, uint64 end, const char *flags, + uint64 offset, int64 inode, const char *filename, + dev_t dev); + + // Find the next entry in /proc/maps; return true if found or false + // if at the end of the file. + // + // Any of the result pointers can be NULL if you're not interested + // in those values. + // + // If "flags" and "filename" are passed, they end up pointing to + // storage within the ProcMapsIterator that is valid only until the + // iterator is destroyed or Next() is called again. The caller may + // modify the contents of these strings (up as far as the first NUL, + // and only until the subsequent call to Next()) if desired. + + // The offsets are all uint64 in order to handle the case of a + // 32-bit process running on a 64-bit kernel + // + // IMPORTANT NOTE: see top-of-class notes for details about what + // mapped regions Next() iterates over, depending on O/S. + // TODO(csilvers): make flags and filename const. + bool Next(uint64 *start, uint64 *end, char **flags, + uint64 *offset, int64 *inode, char **filename); + + bool NextExt(uint64 *start, uint64 *end, char **flags, + uint64 *offset, int64 *inode, char **filename, + uint64 *file_mapping, uint64 *file_pages, + uint64 *anon_mapping, uint64 *anon_pages, + dev_t *dev); + + ~ProcMapsIterator(); + + private: + void Init(pid_t pid, Buffer *buffer, bool use_maps_backing); + + char *ibuf_; // input buffer + char *stext_; // start of text + char *etext_; // end of text + char *nextline_; // start of next line + char *ebuf_; // end of buffer (1 char for a nul) +#if (defined(_WIN32) || defined(__MINGW32__)) && (!defined(__CYGWIN__) && !defined(__CYGWIN32__)) + HANDLE snapshot_; // filehandle on dll info + // In a change from the usual W-A pattern, there is no A variant of + // MODULEENTRY32. Tlhelp32.h #defines the W variant, but not the A. + // We want the original A variants, and this #undef is the only + // way I see to get them. Redefining it when we're done prevents us + // from affecting other .cc files. +# ifdef MODULEENTRY32 // Alias of W +# undef MODULEENTRY32 + MODULEENTRY32 module_; // info about current dll (and dll iterator) +# define MODULEENTRY32 MODULEENTRY32W +# else // It's the ascii, the one we want. + MODULEENTRY32 module_; // info about current dll (and dll iterator) +# endif +#elif defined(__MACH__) + int current_image_; // dll's are called "images" in macos parlance + int current_load_cmd_; // the segment of this dll we're examining +#elif defined(__sun__) // Solaris + int fd_; + char current_filename_[PATH_MAX]; +#else + int fd_; // filehandle on /proc/*/maps +#endif + pid_t pid_; + char flags_[10]; + Buffer* dynamic_buffer_; // dynamically-allocated Buffer + bool using_maps_backing_; // true if we are looking at maps_backing instead of maps. +}; + +#endif /* #ifndef SWIG */ + +// Helper routines + +namespace tcmalloc { +int FillProcSelfMaps(char buf[], int size, bool* wrote_all); +void DumpProcSelfMaps(RawFD fd); +} + +#endif /* #ifndef _SYSINFO_H_ */ diff --git a/src/third_party/gperftools-2.5/src/base/thread_annotations.h b/src/third_party/gperftools-2.5/src/base/thread_annotations.h new file mode 100644 index 00000000000..f57b2999ee7 --- /dev/null +++ b/src/third_party/gperftools-2.5/src/base/thread_annotations.h @@ -0,0 +1,134 @@ +// Copyright (c) 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Le-Chun Wu +// +// This header file contains the macro definitions for thread safety +// annotations that allow the developers to document the locking policies +// of their multi-threaded code. The annotations can also help program +// analysis tools to identify potential thread safety issues. +// +// The annotations are implemented using GCC's "attributes" extension. +// Using the macros defined here instead of the raw GCC attributes allows +// for portability and future compatibility. +// +// This functionality is not yet fully implemented in perftools, +// but may be one day. + +#ifndef BASE_THREAD_ANNOTATIONS_H_ +#define BASE_THREAD_ANNOTATIONS_H_ + + +#if defined(__GNUC__) \ + && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 4)) \ + && defined(__SUPPORT_TS_ANNOTATION__) && (!defined(SWIG)) +#define THREAD_ANNOTATION_ATTRIBUTE__(x) __attribute__((x)) +#else +#define THREAD_ANNOTATION_ATTRIBUTE__(x) // no-op +#endif + + +// Document if a shared variable/field needs to be protected by a lock. +// GUARDED_BY allows the user to specify a particular lock that should be +// held when accessing the annotated variable, while GUARDED_VAR only +// indicates a shared variable should be guarded (by any lock). GUARDED_VAR +// is primarily used when the client cannot express the name of the lock. +#define GUARDED_BY(x) THREAD_ANNOTATION_ATTRIBUTE__(guarded_by(x)) +#define GUARDED_VAR THREAD_ANNOTATION_ATTRIBUTE__(guarded) + +// Document if the memory location pointed to by a pointer should be guarded +// by a lock when dereferencing the pointer. Similar to GUARDED_VAR, +// PT_GUARDED_VAR is primarily used when the client cannot express the name +// of the lock. Note that a pointer variable to a shared memory location +// could itself be a shared variable. For example, if a shared global pointer +// q, which is guarded by mu1, points to a shared memory location that is +// guarded by mu2, q should be annotated as follows: +// int *q GUARDED_BY(mu1) PT_GUARDED_BY(mu2); +#define PT_GUARDED_BY(x) \ + THREAD_ANNOTATION_ATTRIBUTE__(point_to_guarded_by(x)) +#define PT_GUARDED_VAR \ + THREAD_ANNOTATION_ATTRIBUTE__(point_to_guarded) + +// Document the acquisition order between locks that can be held +// simultaneously by a thread. For any two locks that need to be annotated +// to establish an acquisition order, only one of them needs the annotation. +// (i.e. You don't have to annotate both locks with both ACQUIRED_AFTER +// and ACQUIRED_BEFORE.) +#define ACQUIRED_AFTER(x) \ + THREAD_ANNOTATION_ATTRIBUTE__(acquired_after(x)) +#define ACQUIRED_BEFORE(x) \ + THREAD_ANNOTATION_ATTRIBUTE__(acquired_before(x)) + +// The following three annotations document the lock requirements for +// functions/methods. + +// Document if a function expects certain locks to be held before it is called +#define EXCLUSIVE_LOCKS_REQUIRED(x) \ + THREAD_ANNOTATION_ATTRIBUTE__(exclusive_locks_required(x)) + +#define SHARED_LOCKS_REQUIRED(x) \ + THREAD_ANNOTATION_ATTRIBUTE__(shared_locks_required(x)) + +// Document the locks acquired in the body of the function. These locks +// cannot be held when calling this function (as google3's Mutex locks are +// non-reentrant). +#define LOCKS_EXCLUDED(x) \ + THREAD_ANNOTATION_ATTRIBUTE__(locks_excluded(x)) + +// Document the lock the annotated function returns without acquiring it. +#define LOCK_RETURNED(x) THREAD_ANNOTATION_ATTRIBUTE__(lock_returned(x)) + +// Document if a class/type is a lockable type (such as the Mutex class). +#define LOCKABLE THREAD_ANNOTATION_ATTRIBUTE__(lockable) + +// Document if a class is a scoped lockable type (such as the MutexLock class). +#define SCOPED_LOCKABLE THREAD_ANNOTATION_ATTRIBUTE__(scoped_lockable) + +// The following annotations specify lock and unlock primitives. +#define EXCLUSIVE_LOCK_FUNCTION(x) \ + THREAD_ANNOTATION_ATTRIBUTE__(exclusive_lock(x)) + +#define SHARED_LOCK_FUNCTION(x) \ + THREAD_ANNOTATION_ATTRIBUTE__(shared_lock(x)) + +#define EXCLUSIVE_TRYLOCK_FUNCTION(x) \ + THREAD_ANNOTATION_ATTRIBUTE__(exclusive_trylock(x)) + +#define SHARED_TRYLOCK_FUNCTION(x) \ + THREAD_ANNOTATION_ATTRIBUTE__(shared_trylock(x)) + +#define UNLOCK_FUNCTION(x) \ + THREAD_ANNOTATION_ATTRIBUTE__(unlock(x)) + +// An escape hatch for thread safety analysis to ignore the annotated function. +#define NO_THREAD_SAFETY_ANALYSIS \ + THREAD_ANNOTATION_ATTRIBUTE__(no_thread_safety_analysis) + +#endif // BASE_THREAD_ANNOTATIONS_H_ diff --git a/src/third_party/gperftools-2.5/src/base/thread_lister.c b/src/third_party/gperftools-2.5/src/base/thread_lister.c new file mode 100644 index 00000000000..9dc8d721892 --- /dev/null +++ b/src/third_party/gperftools-2.5/src/base/thread_lister.c @@ -0,0 +1,83 @@ +/* Copyright (c) 2005-2007, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: Markus Gutschke + */ + +#include "config.h" + +#include "base/thread_lister.h" + +#include <stdio.h> /* needed for NULL on some powerpc platforms (?!) */ +#include <sys/types.h> +#include <unistd.h> /* for getpid */ + +#ifdef HAVE_SYS_PRCTL +# include <sys/prctl.h> +#endif + +#include "base/linuxthreads.h" +/* Include other thread listers here that define THREADS macro + * only when they can provide a good implementation. + */ + +#ifndef THREADS + +/* Default trivial thread lister for single-threaded applications, + * or if the multi-threading code has not been ported, yet. + */ + +int TCMalloc_ListAllProcessThreads(void *parameter, + ListAllProcessThreadsCallBack callback, ...) { + int rc; + va_list ap; + pid_t pid; + +#ifdef HAVE_SYS_PRCTL + int dumpable = prctl(PR_GET_DUMPABLE, 0); + if (!dumpable) + prctl(PR_SET_DUMPABLE, 1); +#endif + va_start(ap, callback); + pid = getpid(); + rc = callback(parameter, 1, &pid, ap); + va_end(ap); +#ifdef HAVE_SYS_PRCTL + if (!dumpable) + prctl(PR_SET_DUMPABLE, 0); +#endif + return rc; +} + +int TCMalloc_ResumeAllProcessThreads(int num_threads, pid_t *thread_pids) { + return 1; +} + +#endif /* ifndef THREADS */ diff --git a/src/third_party/gperftools-2.5/src/base/thread_lister.h b/src/third_party/gperftools-2.5/src/base/thread_lister.h new file mode 100644 index 00000000000..6e70b89fef5 --- /dev/null +++ b/src/third_party/gperftools-2.5/src/base/thread_lister.h @@ -0,0 +1,83 @@ +/* -*- Mode: c; c-basic-offset: 2; indent-tabs-mode: nil -*- */ +/* Copyright (c) 2005-2007, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: Markus Gutschke + */ + +#ifndef _THREAD_LISTER_H +#define _THREAD_LISTER_H + +#include <stdarg.h> +#include <sys/types.h> + +#ifdef __cplusplus +extern "C" { +#endif + +typedef int (*ListAllProcessThreadsCallBack)(void *parameter, + int num_threads, + pid_t *thread_pids, + va_list ap); + +/* This function gets the list of all linux threads of the current process + * passes them to the 'callback' along with the 'parameter' pointer; at the + * call back call time all the threads are paused via + * PTRACE_ATTACH. + * The callback is executed from a separate thread which shares only the + * address space, the filesystem, and the filehandles with the caller. Most + * notably, it does not share the same pid and ppid; and if it terminates, + * the rest of the application is still there. 'callback' is supposed to do + * or arrange for TCMalloc_ResumeAllProcessThreads. This happens automatically, if + * the thread raises a synchronous signal (e.g. SIGSEGV); asynchronous + * signals are blocked. If the 'callback' decides to unblock them, it must + * ensure that they cannot terminate the application, or that + * TCMalloc_ResumeAllProcessThreads will get called. + * It is an error for the 'callback' to make any library calls that could + * acquire locks. Most notably, this means that most system calls have to + * avoid going through libc. Also, this means that it is not legal to call + * exit() or abort(). + * We return -1 on error and the return value of 'callback' on success. + */ +int TCMalloc_ListAllProcessThreads(void *parameter, + ListAllProcessThreadsCallBack callback, ...); + +/* This function resumes the list of all linux threads that + * TCMalloc_ListAllProcessThreads pauses before giving to its + * callback. The function returns non-zero if at least one thread was + * suspended and has now been resumed. + */ +int TCMalloc_ResumeAllProcessThreads(int num_threads, pid_t *thread_pids); + +#ifdef __cplusplus +} +#endif + +#endif /* _THREAD_LISTER_H */ diff --git a/src/third_party/gperftools-2.5/src/base/vdso_support.cc b/src/third_party/gperftools-2.5/src/base/vdso_support.cc new file mode 100644 index 00000000000..730df3011d6 --- /dev/null +++ b/src/third_party/gperftools-2.5/src/base/vdso_support.cc @@ -0,0 +1,143 @@ +// Copyright (c) 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Paul Pluzhnikov +// +// Allow dynamic symbol lookup in the kernel VDSO page. +// +// VDSOSupport -- a class representing kernel VDSO (if present). +// + +#include "base/vdso_support.h" + +#ifdef HAVE_VDSO_SUPPORT // defined in vdso_support.h + +#include <fcntl.h> +#include <stddef.h> // for ptrdiff_t + +#include "base/atomicops.h" // for MemoryBarrier +#include "base/linux_syscall_support.h" +#include "base/logging.h" +#include "base/dynamic_annotations.h" +#include "base/basictypes.h" // for COMPILE_ASSERT + +using base::subtle::MemoryBarrier; + +#ifndef AT_SYSINFO_EHDR +#define AT_SYSINFO_EHDR 33 +#endif + +namespace base { + +const void *VDSOSupport::vdso_base_ = ElfMemImage::kInvalidBase; +VDSOSupport::VDSOSupport() + // If vdso_base_ is still set to kInvalidBase, we got here + // before VDSOSupport::Init has been called. Call it now. + : image_(vdso_base_ == ElfMemImage::kInvalidBase ? Init() : vdso_base_) { +} + +// NOTE: we can't use GoogleOnceInit() below, because we can be +// called by tcmalloc, and none of the *once* stuff may be functional yet. +// +// In addition, we hope that the VDSOSupportHelper constructor +// causes this code to run before there are any threads, and before +// InitGoogle() has executed any chroot or setuid calls. +// +// Finally, even if there is a race here, it is harmless, because +// the operation should be idempotent. +const void *VDSOSupport::Init() { + if (vdso_base_ == ElfMemImage::kInvalidBase) { + // Valgrind zaps AT_SYSINFO_EHDR and friends from the auxv[] + // on stack, and so glibc works as if VDSO was not present. + // But going directly to kernel via /proc/self/auxv below bypasses + // Valgrind zapping. So we check for Valgrind separately. + if (RunningOnValgrind()) { + vdso_base_ = NULL; + return NULL; + } + int fd = open("/proc/self/auxv", O_RDONLY); + if (fd == -1) { + // Kernel too old to have a VDSO. + vdso_base_ = NULL; + return NULL; + } + ElfW(auxv_t) aux; + while (read(fd, &aux, sizeof(aux)) == sizeof(aux)) { + if (aux.a_type == AT_SYSINFO_EHDR) { + COMPILE_ASSERT(sizeof(vdso_base_) == sizeof(aux.a_un.a_val), + unexpected_sizeof_pointer_NE_sizeof_a_val); + vdso_base_ = reinterpret_cast<void *>(aux.a_un.a_val); + break; + } + } + close(fd); + if (vdso_base_ == ElfMemImage::kInvalidBase) { + // Didn't find AT_SYSINFO_EHDR in auxv[]. + vdso_base_ = NULL; + } + } + return vdso_base_; +} + +const void *VDSOSupport::SetBase(const void *base) { + CHECK(base != ElfMemImage::kInvalidBase); + const void *old_base = vdso_base_; + vdso_base_ = base; + image_.Init(base); + return old_base; +} + +bool VDSOSupport::LookupSymbol(const char *name, + const char *version, + int type, + SymbolInfo *info) const { + return image_.LookupSymbol(name, version, type, info); +} + +bool VDSOSupport::LookupSymbolByAddress(const void *address, + SymbolInfo *info_out) const { + return image_.LookupSymbolByAddress(address, info_out); +} + +// We need to make sure VDSOSupport::Init() is called before +// the main() runs, since it might do something like setuid or +// chroot. If VDSOSupport +// is used in any global constructor, this will happen, since +// VDSOSupport's constructor calls Init. But if not, we need to +// ensure it here, with a global constructor of our own. This +// is an allowed exception to the normal rule against non-trivial +// global constructors. +static class VDSOInitHelper { + public: + VDSOInitHelper() { VDSOSupport::Init(); } +} vdso_init_helper; +} + +#endif // HAVE_VDSO_SUPPORT diff --git a/src/third_party/gperftools-2.5/src/base/vdso_support.h b/src/third_party/gperftools-2.5/src/base/vdso_support.h new file mode 100644 index 00000000000..c1209a47f13 --- /dev/null +++ b/src/third_party/gperftools-2.5/src/base/vdso_support.h @@ -0,0 +1,132 @@ +// Copyright (c) 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Paul Pluzhnikov +// +// Allow dynamic symbol lookup in the kernel VDSO page. +// +// VDSO stands for "Virtual Dynamic Shared Object" -- a page of +// executable code, which looks like a shared library, but doesn't +// necessarily exist anywhere on disk, and which gets mmap()ed into +// every process by kernels which support VDSO, such as 2.6.x for 32-bit +// executables, and 2.6.24 and above for 64-bit executables. +// +// More details could be found here: +// http://www.trilithium.com/johan/2005/08/linux-gate/ +// +// VDSOSupport -- a class representing kernel VDSO (if present). +// +// Example usage: +// VDSOSupport vdso; +// VDSOSupport::SymbolInfo info; +// typedef (*FN)(unsigned *, void *, void *); +// FN fn = NULL; +// if (vdso.LookupSymbol("__vdso_getcpu", "LINUX_2.6", STT_FUNC, &info)) { +// fn = reinterpret_cast<FN>(info.address); +// } + +#ifndef BASE_VDSO_SUPPORT_H_ +#define BASE_VDSO_SUPPORT_H_ + +#include <config.h> +#include "base/basictypes.h" +#include "base/elf_mem_image.h" + +#ifdef HAVE_ELF_MEM_IMAGE + +#define HAVE_VDSO_SUPPORT 1 + +#include <stdlib.h> // for NULL + +namespace base { + +// NOTE: this class may be used from within tcmalloc, and can not +// use any memory allocation routines. +class VDSOSupport { + public: + VDSOSupport(); + + typedef ElfMemImage::SymbolInfo SymbolInfo; + typedef ElfMemImage::SymbolIterator SymbolIterator; + + // Answers whether we have a vdso at all. + bool IsPresent() const { return image_.IsPresent(); } + + // Allow to iterate over all VDSO symbols. + SymbolIterator begin() const { return image_.begin(); } + SymbolIterator end() const { return image_.end(); } + + // Look up versioned dynamic symbol in the kernel VDSO. + // Returns false if VDSO is not present, or doesn't contain given + // symbol/version/type combination. + // If info_out != NULL, additional details are filled in. + bool LookupSymbol(const char *name, const char *version, + int symbol_type, SymbolInfo *info_out) const; + + // Find info about symbol (if any) which overlaps given address. + // Returns true if symbol was found; false if VDSO isn't present + // or doesn't have a symbol overlapping given address. + // If info_out != NULL, additional details are filled in. + bool LookupSymbolByAddress(const void *address, SymbolInfo *info_out) const; + + // Used only for testing. Replace real VDSO base with a mock. + // Returns previous value of vdso_base_. After you are done testing, + // you are expected to call SetBase() with previous value, in order to + // reset state to the way it was. + const void *SetBase(const void *s); + + // Computes vdso_base_ and returns it. Should be called as early as + // possible; before any thread creation, chroot or setuid. + static const void *Init(); + + private: + // image_ represents VDSO ELF image in memory. + // image_.ehdr_ == NULL implies there is no VDSO. + ElfMemImage image_; + + // Cached value of auxv AT_SYSINFO_EHDR, computed once. + // This is a tri-state: + // kInvalidBase => value hasn't been determined yet. + // 0 => there is no VDSO. + // else => vma of VDSO Elf{32,64}_Ehdr. + // + // When testing with mock VDSO, low bit is set. + // The low bit is always available because vdso_base_ is + // page-aligned. + static const void *vdso_base_; + + DISALLOW_COPY_AND_ASSIGN(VDSOSupport); +}; + +} // namespace base + +#endif // HAVE_ELF_MEM_IMAGE + +#endif // BASE_VDSO_SUPPORT_H_ diff --git a/src/third_party/gperftools-2.5/src/central_freelist.cc b/src/third_party/gperftools-2.5/src/central_freelist.cc new file mode 100644 index 00000000000..11b190dcfee --- /dev/null +++ b/src/third_party/gperftools-2.5/src/central_freelist.cc @@ -0,0 +1,387 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Sanjay Ghemawat <opensource@google.com> + +#include "config.h" +#include <algorithm> +#include "central_freelist.h" +#include "internal_logging.h" // for ASSERT, MESSAGE +#include "linked_list.h" // for SLL_Next, SLL_Push, etc +#include "page_heap.h" // for PageHeap +#include "static_vars.h" // for Static + +using std::min; +using std::max; + +namespace tcmalloc { + +void CentralFreeList::Init(size_t cl) { + size_class_ = cl; + tcmalloc::DLL_Init(&empty_); + tcmalloc::DLL_Init(&nonempty_); + num_spans_ = 0; + counter_ = 0; + + max_cache_size_ = kMaxNumTransferEntries; +#ifdef TCMALLOC_SMALL_BUT_SLOW + // Disable the transfer cache for the small footprint case. + cache_size_ = 0; +#else + cache_size_ = 16; +#endif + if (cl > 0) { + // Limit the maximum size of the cache based on the size class. If this + // is not done, large size class objects will consume a lot of memory if + // they just sit in the transfer cache. + int32_t bytes = Static::sizemap()->ByteSizeForClass(cl); + int32_t objs_to_move = Static::sizemap()->num_objects_to_move(cl); + + ASSERT(objs_to_move > 0 && bytes > 0); + // Limit each size class cache to at most 1MB of objects or one entry, + // whichever is greater. Total transfer cache memory used across all + // size classes then can't be greater than approximately + // 1MB * kMaxNumTransferEntries. + // min and max are in parens to avoid macro-expansion on windows. + max_cache_size_ = (min)(max_cache_size_, + (max)(1, (1024 * 1024) / (bytes * objs_to_move))); + cache_size_ = (min)(cache_size_, max_cache_size_); + } + used_slots_ = 0; + ASSERT(cache_size_ <= max_cache_size_); +} + +void CentralFreeList::ReleaseListToSpans(void* start) { + while (start) { + void *next = SLL_Next(start); + ReleaseToSpans(start); + start = next; + } +} + +// MapObjectToSpan should logically be part of ReleaseToSpans. But +// this triggers an optimization bug in gcc 4.5.0. Moving to a +// separate function, and making sure that function isn't inlined, +// seems to fix the problem. It also should be fixed for gcc 4.5.1. +static +#if __GNUC__ == 4 && __GNUC_MINOR__ == 5 && __GNUC_PATCHLEVEL__ == 0 +__attribute__ ((noinline)) +#endif +Span* MapObjectToSpan(void* object) { + const PageID p = reinterpret_cast<uintptr_t>(object) >> kPageShift; + Span* span = Static::pageheap()->GetDescriptor(p); + return span; +} + +void CentralFreeList::ReleaseToSpans(void* object) { + Span* span = MapObjectToSpan(object); + ASSERT(span != NULL); + ASSERT(span->refcount > 0); + + // If span is empty, move it to non-empty list + if (span->objects == NULL) { + tcmalloc::DLL_Remove(span); + tcmalloc::DLL_Prepend(&nonempty_, span); + Event(span, 'N', 0); + } + + // The following check is expensive, so it is disabled by default + if (false) { + // Check that object does not occur in list + int got = 0; + for (void* p = span->objects; p != NULL; p = *((void**) p)) { + ASSERT(p != object); + got++; + } + ASSERT(got + span->refcount == + (span->length<<kPageShift) / + Static::sizemap()->ByteSizeForClass(span->sizeclass)); + } + + counter_++; + span->refcount--; + if (span->refcount == 0) { + Event(span, '#', 0); + counter_ -= ((span->length<<kPageShift) / + Static::sizemap()->ByteSizeForClass(span->sizeclass)); + tcmalloc::DLL_Remove(span); + --num_spans_; + + // Release central list lock while operating on pageheap + lock_.Unlock(); + { + SpinLockHolder h(Static::pageheap_lock()); + Static::pageheap()->Delete(span); + } + lock_.Lock(); + } else { + *(reinterpret_cast<void**>(object)) = span->objects; + span->objects = object; + } +} + +bool CentralFreeList::EvictRandomSizeClass( + int locked_size_class, bool force) { + static int race_counter = 0; + int t = race_counter++; // Updated without a lock, but who cares. + if (t >= kNumClasses) { + while (t >= kNumClasses) { + t -= kNumClasses; + } + race_counter = t; + } + ASSERT(t >= 0); + ASSERT(t < kNumClasses); + if (t == locked_size_class) return false; + return Static::central_cache()[t].ShrinkCache(locked_size_class, force); +} + +bool CentralFreeList::MakeCacheSpace() { + // Is there room in the cache? + if (used_slots_ < cache_size_) return true; + // Check if we can expand this cache? + if (cache_size_ == max_cache_size_) return false; + // Ok, we'll try to grab an entry from some other size class. + if (EvictRandomSizeClass(size_class_, false) || + EvictRandomSizeClass(size_class_, true)) { + // Succeeded in evicting, we're going to make our cache larger. + // However, we may have dropped and re-acquired the lock in + // EvictRandomSizeClass (via ShrinkCache and the LockInverter), so the + // cache_size may have changed. Therefore, check and verify that it is + // still OK to increase the cache_size. + if (cache_size_ < max_cache_size_) { + cache_size_++; + return true; + } + } + return false; +} + + +namespace { +class LockInverter { + private: + SpinLock *held_, *temp_; + public: + inline explicit LockInverter(SpinLock* held, SpinLock *temp) + : held_(held), temp_(temp) { held_->Unlock(); temp_->Lock(); } + inline ~LockInverter() { temp_->Unlock(); held_->Lock(); } +}; +} + +// This function is marked as NO_THREAD_SAFETY_ANALYSIS because it uses +// LockInverter to release one lock and acquire another in scoped-lock +// style, which our current annotation/analysis does not support. +bool CentralFreeList::ShrinkCache(int locked_size_class, bool force) + NO_THREAD_SAFETY_ANALYSIS { + // Start with a quick check without taking a lock. + if (cache_size_ == 0) return false; + // We don't evict from a full cache unless we are 'forcing'. + if (force == false && used_slots_ == cache_size_) return false; + + // Grab lock, but first release the other lock held by this thread. We use + // the lock inverter to ensure that we never hold two size class locks + // concurrently. That can create a deadlock because there is no well + // defined nesting order. + LockInverter li(&Static::central_cache()[locked_size_class].lock_, &lock_); + ASSERT(used_slots_ <= cache_size_); + ASSERT(0 <= cache_size_); + if (cache_size_ == 0) return false; + if (used_slots_ == cache_size_) { + if (force == false) return false; + // ReleaseListToSpans releases the lock, so we have to make all the + // updates to the central list before calling it. + cache_size_--; + used_slots_--; + ReleaseListToSpans(tc_slots_[used_slots_].head); + return true; + } + cache_size_--; + return true; +} + +void CentralFreeList::InsertRange(void *start, void *end, int N) { + SpinLockHolder h(&lock_); + if (N == Static::sizemap()->num_objects_to_move(size_class_) && + MakeCacheSpace()) { + int slot = used_slots_++; + ASSERT(slot >=0); + ASSERT(slot < max_cache_size_); + TCEntry *entry = &tc_slots_[slot]; + entry->head = start; + entry->tail = end; + return; + } + ReleaseListToSpans(start); +} + +int CentralFreeList::RemoveRange(void **start, void **end, int N) { + ASSERT(N > 0); + lock_.Lock(); + if (N == Static::sizemap()->num_objects_to_move(size_class_) && + used_slots_ > 0) { + int slot = --used_slots_; + ASSERT(slot >= 0); + TCEntry *entry = &tc_slots_[slot]; + *start = entry->head; + *end = entry->tail; + lock_.Unlock(); + return N; + } + + int result = 0; + *start = NULL; + *end = NULL; + // TODO: Prefetch multiple TCEntries? + result = FetchFromOneSpansSafe(N, start, end); + if (result != 0) { + while (result < N) { + int n; + void* head = NULL; + void* tail = NULL; + n = FetchFromOneSpans(N - result, &head, &tail); + if (!n) break; + result += n; + SLL_PushRange(start, head, tail); + } + } + lock_.Unlock(); + return result; +} + + +int CentralFreeList::FetchFromOneSpansSafe(int N, void **start, void **end) { + int result = FetchFromOneSpans(N, start, end); + if (!result) { + Populate(); + result = FetchFromOneSpans(N, start, end); + } + return result; +} + +int CentralFreeList::FetchFromOneSpans(int N, void **start, void **end) { + if (tcmalloc::DLL_IsEmpty(&nonempty_)) return 0; + Span* span = nonempty_.next; + + ASSERT(span->objects != NULL); + + int result = 0; + void *prev, *curr; + curr = span->objects; + do { + prev = curr; + curr = *(reinterpret_cast<void**>(curr)); + } while (++result < N && curr != NULL); + + if (curr == NULL) { + // Move to empty list + tcmalloc::DLL_Remove(span); + tcmalloc::DLL_Prepend(&empty_, span); + Event(span, 'E', 0); + } + + *start = span->objects; + *end = prev; + span->objects = curr; + SLL_SetNext(*end, NULL); + span->refcount += result; + counter_ -= result; + return result; +} + +// Fetch memory from the system and add to the central cache freelist. +void CentralFreeList::Populate() { + // Release central list lock while operating on pageheap + lock_.Unlock(); + const size_t npages = Static::sizemap()->class_to_pages(size_class_); + + Span* span; + { + SpinLockHolder h(Static::pageheap_lock()); + span = Static::pageheap()->New(npages); + if (span) Static::pageheap()->RegisterSizeClass(span, size_class_); + } + if (span == NULL) { + Log(kLog, __FILE__, __LINE__, + "tcmalloc: allocation failed", npages << kPageShift); + lock_.Lock(); + return; + } + ASSERT(span->length == npages); + // Cache sizeclass info eagerly. Locking is not necessary. + // (Instead of being eager, we could just replace any stale info + // about this span, but that seems to be no better in practice.) + for (int i = 0; i < npages; i++) { + Static::pageheap()->CacheSizeClass(span->start + i, size_class_); + } + + // Split the block into pieces and add to the free-list + // TODO: coloring of objects to avoid cache conflicts? + void** tail = &span->objects; + char* ptr = reinterpret_cast<char*>(span->start << kPageShift); + char* limit = ptr + (npages << kPageShift); + const size_t size = Static::sizemap()->ByteSizeForClass(size_class_); + int num = 0; + while (ptr + size <= limit) { + *tail = ptr; + tail = reinterpret_cast<void**>(ptr); + ptr += size; + num++; + } + ASSERT(ptr <= limit); + *tail = NULL; + span->refcount = 0; // No sub-object in use yet + + // Add span to list of non-empty spans + lock_.Lock(); + tcmalloc::DLL_Prepend(&nonempty_, span); + ++num_spans_; + counter_ += num; +} + +int CentralFreeList::tc_length() { + SpinLockHolder h(&lock_); + return used_slots_ * Static::sizemap()->num_objects_to_move(size_class_); +} + +size_t CentralFreeList::OverheadBytes() { + SpinLockHolder h(&lock_); + if (size_class_ == 0) { // 0 holds the 0-sized allocations + return 0; + } + const size_t pages_per_span = Static::sizemap()->class_to_pages(size_class_); + const size_t object_size = Static::sizemap()->class_to_size(size_class_); + ASSERT(object_size > 0); + const size_t overhead_per_span = (pages_per_span * kPageSize) % object_size; + return num_spans_ * overhead_per_span; +} + +} // namespace tcmalloc diff --git a/src/third_party/gperftools-2.5/src/central_freelist.h b/src/third_party/gperftools-2.5/src/central_freelist.h new file mode 100644 index 00000000000..4148680d20a --- /dev/null +++ b/src/third_party/gperftools-2.5/src/central_freelist.h @@ -0,0 +1,211 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Sanjay Ghemawat <opensource@google.com> + +#ifndef TCMALLOC_CENTRAL_FREELIST_H_ +#define TCMALLOC_CENTRAL_FREELIST_H_ + +#include "config.h" +#include <stddef.h> // for size_t +#ifdef HAVE_STDINT_H +#include <stdint.h> // for int32_t +#endif +#include "base/spinlock.h" +#include "base/thread_annotations.h" +#include "common.h" +#include "span.h" + +namespace tcmalloc { + +// Data kept per size-class in central cache. +class CentralFreeList { + public: + // A CentralFreeList may be used before its constructor runs. + // So we prevent lock_'s constructor from doing anything to the + // lock_ state. + CentralFreeList() : lock_(base::LINKER_INITIALIZED) { } + + void Init(size_t cl); + + // These methods all do internal locking. + + // Insert the specified range into the central freelist. N is the number of + // elements in the range. RemoveRange() is the opposite operation. + void InsertRange(void *start, void *end, int N); + + // Returns the actual number of fetched elements and sets *start and *end. + int RemoveRange(void **start, void **end, int N); + + // Returns the number of free objects in cache. + int length() { + SpinLockHolder h(&lock_); + return counter_; + } + + // Returns the number of free objects in the transfer cache. + int tc_length(); + + // Returns the memory overhead (internal fragmentation) attributable + // to the freelist. This is memory lost when the size of elements + // in a freelist doesn't exactly divide the page-size (an 8192-byte + // page full of 5-byte objects would have 2 bytes memory overhead). + size_t OverheadBytes(); + + // Lock/Unlock the internal SpinLock. Used on the pthread_atfork call + // to set the lock in a consistent state before the fork. + void Lock() { + lock_.Lock(); + } + + void Unlock() { + lock_.Unlock(); + } + + private: + // TransferCache is used to cache transfers of + // sizemap.num_objects_to_move(size_class) back and forth between + // thread caches and the central cache for a given size class. + struct TCEntry { + void *head; // Head of chain of objects. + void *tail; // Tail of chain of objects. + }; + + // A central cache freelist can have anywhere from 0 to kMaxNumTransferEntries + // slots to put link list chains into. +#ifdef TCMALLOC_SMALL_BUT_SLOW + // For the small memory model, the transfer cache is not used. + static const int kMaxNumTransferEntries = 0; +#else + // Starting point for the the maximum number of entries in the transfer cache. + // This actual maximum for a given size class may be lower than this + // maximum value. + static const int kMaxNumTransferEntries = 64; +#endif + + // REQUIRES: lock_ is held + // Remove object from cache and return. + // Return NULL if no free entries in cache. + int FetchFromOneSpans(int N, void **start, void **end) EXCLUSIVE_LOCKS_REQUIRED(lock_); + + // REQUIRES: lock_ is held + // Remove object from cache and return. Fetches + // from pageheap if cache is empty. Only returns + // NULL on allocation failure. + int FetchFromOneSpansSafe(int N, void **start, void **end) EXCLUSIVE_LOCKS_REQUIRED(lock_); + + // REQUIRES: lock_ is held + // Release a linked list of objects to spans. + // May temporarily release lock_. + void ReleaseListToSpans(void *start) EXCLUSIVE_LOCKS_REQUIRED(lock_); + + // REQUIRES: lock_ is held + // Release an object to spans. + // May temporarily release lock_. + void ReleaseToSpans(void* object) EXCLUSIVE_LOCKS_REQUIRED(lock_); + + // REQUIRES: lock_ is held + // Populate cache by fetching from the page heap. + // May temporarily release lock_. + void Populate() EXCLUSIVE_LOCKS_REQUIRED(lock_); + + // REQUIRES: lock is held. + // Tries to make room for a TCEntry. If the cache is full it will try to + // expand it at the cost of some other cache size. Return false if there is + // no space. + bool MakeCacheSpace() EXCLUSIVE_LOCKS_REQUIRED(lock_); + + // REQUIRES: lock_ for locked_size_class is held. + // Picks a "random" size class to steal TCEntry slot from. In reality it + // just iterates over the sizeclasses but does so without taking a lock. + // Returns true on success. + // May temporarily lock a "random" size class. + static bool EvictRandomSizeClass(int locked_size_class, bool force); + + // REQUIRES: lock_ is *not* held. + // Tries to shrink the Cache. If force is true it will relase objects to + // spans if it allows it to shrink the cache. Return false if it failed to + // shrink the cache. Decrements cache_size_ on succeess. + // May temporarily take lock_. If it takes lock_, the locked_size_class + // lock is released to keep the thread from holding two size class locks + // concurrently which could lead to a deadlock. + bool ShrinkCache(int locked_size_class, bool force) LOCKS_EXCLUDED(lock_); + + // This lock protects all the data members. cached_entries and cache_size_ + // may be looked at without holding the lock. + SpinLock lock_; + + // We keep linked lists of empty and non-empty spans. + size_t size_class_; // My size class + Span empty_; // Dummy header for list of empty spans + Span nonempty_; // Dummy header for list of non-empty spans + size_t num_spans_; // Number of spans in empty_ plus nonempty_ + size_t counter_; // Number of free objects in cache entry + + // Here we reserve space for TCEntry cache slots. Space is preallocated + // for the largest possible number of entries than any one size class may + // accumulate. Not all size classes are allowed to accumulate + // kMaxNumTransferEntries, so there is some wasted space for those size + // classes. + TCEntry tc_slots_[kMaxNumTransferEntries]; + + // Number of currently used cached entries in tc_slots_. This variable is + // updated under a lock but can be read without one. + int32_t used_slots_; + // The current number of slots for this size class. This is an + // adaptive value that is increased if there is lots of traffic + // on a given size class. + int32_t cache_size_; + // Maximum size of the cache for a given size class. + int32_t max_cache_size_; +}; + +// Pads each CentralCache object to multiple of 64 bytes. Since some +// compilers (such as MSVC) don't like it when the padding is 0, I use +// template specialization to remove the padding entirely when +// sizeof(CentralFreeList) is a multiple of 64. +template<int kFreeListSizeMod64> +class CentralFreeListPaddedTo : public CentralFreeList { + private: + char pad_[64 - kFreeListSizeMod64]; +}; + +template<> +class CentralFreeListPaddedTo<0> : public CentralFreeList { +}; + +class CentralFreeListPadded : public CentralFreeListPaddedTo< + sizeof(CentralFreeList) % 64> { +}; + +} // namespace tcmalloc + +#endif // TCMALLOC_CENTRAL_FREELIST_H_ diff --git a/src/third_party/gperftools-2.5/src/common.cc b/src/third_party/gperftools-2.5/src/common.cc new file mode 100644 index 00000000000..313848c37b6 --- /dev/null +++ b/src/third_party/gperftools-2.5/src/common.cc @@ -0,0 +1,275 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Sanjay Ghemawat <opensource@google.com> + +#include <stdlib.h> // for getenv and strtol +#include "config.h" +#include "common.h" +#include "system-alloc.h" +#include "base/spinlock.h" +#include "getenv_safe.h" // TCMallocGetenvSafe + +namespace tcmalloc { + +// Define the maximum number of object per classe type to transfer between +// thread and central caches. +static int32 FLAGS_tcmalloc_transfer_num_objects; + +static const int32 kDefaultTransferNumObjecs = 512; + +// The init function is provided to explicit initialize the variable value +// from the env. var to avoid C++ global construction that might defer its +// initialization after a malloc/new call. +static inline void InitTCMallocTransferNumObjects() +{ + if (UNLIKELY(FLAGS_tcmalloc_transfer_num_objects == 0)) { + const char *envval = TCMallocGetenvSafe("TCMALLOC_TRANSFER_NUM_OBJ"); + FLAGS_tcmalloc_transfer_num_objects = !envval ? kDefaultTransferNumObjecs : + strtol(envval, NULL, 10); + } +} + +// Note: the following only works for "n"s that fit in 32-bits, but +// that is fine since we only use it for small sizes. +static inline int LgFloor(size_t n) { + int log = 0; + for (int i = 4; i >= 0; --i) { + int shift = (1 << i); + size_t x = n >> shift; + if (x != 0) { + n = x; + log += shift; + } + } + ASSERT(n == 1); + return log; +} + +int AlignmentForSize(size_t size) { + int alignment = kAlignment; + if (size > kMaxSize) { + // Cap alignment at kPageSize for large sizes. + alignment = kPageSize; + } else if (size >= 128) { + // Space wasted due to alignment is at most 1/8, i.e., 12.5%. + alignment = (1 << LgFloor(size)) / 8; + } else if (size >= kMinAlign) { + // We need an alignment of at least 16 bytes to satisfy + // requirements for some SSE types. + alignment = kMinAlign; + } + // Maximum alignment allowed is page size alignment. + if (alignment > kPageSize) { + alignment = kPageSize; + } + CHECK_CONDITION(size < kMinAlign || alignment >= kMinAlign); + CHECK_CONDITION((alignment & (alignment - 1)) == 0); + return alignment; +} + +int SizeMap::NumMoveSize(size_t size) { + if (size == 0) return 0; + // Use approx 64k transfers between thread and central caches. + int num = static_cast<int>(64.0 * 1024.0 / size); + if (num < 2) num = 2; + + // Avoid bringing too many objects into small object free lists. + // If this value is too large: + // - We waste memory with extra objects sitting in the thread caches. + // - The central freelist holds its lock for too long while + // building a linked list of objects, slowing down the allocations + // of other threads. + // If this value is too small: + // - We go to the central freelist too often and we have to acquire + // its lock each time. + // This value strikes a balance between the constraints above. + if (num > FLAGS_tcmalloc_transfer_num_objects) + num = FLAGS_tcmalloc_transfer_num_objects; + + return num; +} + +// Initialize the mapping arrays +void SizeMap::Init() { + InitTCMallocTransferNumObjects(); + + // Do some sanity checking on add_amount[]/shift_amount[]/class_array[] + if (ClassIndex(0) != 0) { + Log(kCrash, __FILE__, __LINE__, + "Invalid class index for size 0", ClassIndex(0)); + } + if (ClassIndex(kMaxSize) >= sizeof(class_array_)) { + Log(kCrash, __FILE__, __LINE__, + "Invalid class index for kMaxSize", ClassIndex(kMaxSize)); + } + + // Compute the size classes we want to use + int sc = 1; // Next size class to assign + int alignment = kAlignment; + CHECK_CONDITION(kAlignment <= kMinAlign); + for (size_t size = kAlignment; size <= kMaxSize; size += alignment) { + alignment = AlignmentForSize(size); + CHECK_CONDITION((size % alignment) == 0); + + int blocks_to_move = NumMoveSize(size) / 4; + size_t psize = 0; + do { + psize += kPageSize; + // Allocate enough pages so leftover is less than 1/8 of total. + // This bounds wasted space to at most 12.5%. + while ((psize % size) > (psize >> 3)) { + psize += kPageSize; + } + // Continue to add pages until there are at least as many objects in + // the span as are needed when moving objects from the central + // freelists and spans to the thread caches. + } while ((psize / size) < (blocks_to_move)); + const size_t my_pages = psize >> kPageShift; + + if (sc > 1 && my_pages == class_to_pages_[sc-1]) { + // See if we can merge this into the previous class without + // increasing the fragmentation of the previous class. + const size_t my_objects = (my_pages << kPageShift) / size; + const size_t prev_objects = (class_to_pages_[sc-1] << kPageShift) + / class_to_size_[sc-1]; + if (my_objects == prev_objects) { + // Adjust last class to include this size + class_to_size_[sc-1] = size; + continue; + } + } + + // Add new class + class_to_pages_[sc] = my_pages; + class_to_size_[sc] = size; + sc++; + } + if (sc != kNumClasses) { + Log(kCrash, __FILE__, __LINE__, + "wrong number of size classes: (found vs. expected )", sc, kNumClasses); + } + + // Initialize the mapping arrays + int next_size = 0; + for (int c = 1; c < kNumClasses; c++) { + const int max_size_in_class = class_to_size_[c]; + for (int s = next_size; s <= max_size_in_class; s += kAlignment) { + class_array_[ClassIndex(s)] = c; + } + next_size = max_size_in_class + kAlignment; + } + + // Double-check sizes just to be safe + for (size_t size = 0; size <= kMaxSize;) { + const int sc = SizeClass(size); + if (sc <= 0 || sc >= kNumClasses) { + Log(kCrash, __FILE__, __LINE__, + "Bad size class (class, size)", sc, size); + } + if (sc > 1 && size <= class_to_size_[sc-1]) { + Log(kCrash, __FILE__, __LINE__, + "Allocating unnecessarily large class (class, size)", sc, size); + } + const size_t s = class_to_size_[sc]; + if (size > s || s == 0) { + Log(kCrash, __FILE__, __LINE__, + "Bad (class, size, requested)", sc, s, size); + } + if (size <= kMaxSmallSize) { + size += 8; + } else { + size += 128; + } + } + + // Initialize the num_objects_to_move array. + for (size_t cl = 1; cl < kNumClasses; ++cl) { + num_objects_to_move_[cl] = NumMoveSize(ByteSizeForClass(cl)); + } +} + +// Metadata allocator -- keeps stats about how many bytes allocated. +static uint64_t metadata_system_bytes_ = 0; +static const size_t kMetadataAllocChunkSize = 8*1024*1024; +// As ThreadCache objects are allocated with MetaDataAlloc, and also +// CACHELINE_ALIGNED, we must use the same alignment as TCMalloc_SystemAlloc. +static const size_t kMetadataAllignment = sizeof(MemoryAligner); + +static char *metadata_chunk_alloc_; +static size_t metadata_chunk_avail_; + +static SpinLock metadata_alloc_lock(SpinLock::LINKER_INITIALIZED); + +void* MetaDataAlloc(size_t bytes) { + if (bytes >= kMetadataAllocChunkSize) { + void *rv = TCMalloc_SystemAlloc(bytes, + NULL, kMetadataAllignment); + if (rv != NULL) { + metadata_system_bytes_ += bytes; + } + return rv; + } + + SpinLockHolder h(&metadata_alloc_lock); + + // the following works by essentially turning address to integer of + // log_2 kMetadataAllignment size and negating it. I.e. negated + // value + original value gets 0 and that's what we want modulo + // kMetadataAllignment. Note, we negate before masking higher bits + // off, otherwise we'd have to mask them off after negation anyways. + intptr_t alignment = -reinterpret_cast<intptr_t>(metadata_chunk_alloc_) & (kMetadataAllignment-1); + + if (metadata_chunk_avail_ < bytes + alignment) { + size_t real_size; + void *ptr = TCMalloc_SystemAlloc(kMetadataAllocChunkSize, + &real_size, kMetadataAllignment); + if (ptr == NULL) { + return NULL; + } + + metadata_chunk_alloc_ = static_cast<char *>(ptr); + metadata_chunk_avail_ = real_size; + + alignment = 0; + } + + void *rv = static_cast<void *>(metadata_chunk_alloc_ + alignment); + bytes += alignment; + metadata_chunk_alloc_ += bytes; + metadata_chunk_avail_ -= bytes; + metadata_system_bytes_ += bytes; + return rv; +} + +uint64_t metadata_system_bytes() { return metadata_system_bytes_; } + +} // namespace tcmalloc diff --git a/src/third_party/gperftools-2.5/src/common.h b/src/third_party/gperftools-2.5/src/common.h new file mode 100644 index 00000000000..e8a1ba6972c --- /dev/null +++ b/src/third_party/gperftools-2.5/src/common.h @@ -0,0 +1,295 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Sanjay Ghemawat <opensource@google.com> +// +// Common definitions for tcmalloc code. + +#ifndef TCMALLOC_COMMON_H_ +#define TCMALLOC_COMMON_H_ + +#include "config.h" +#include <stddef.h> // for size_t +#ifdef HAVE_STDINT_H +#include <stdint.h> // for uintptr_t, uint64_t +#endif +#include "internal_logging.h" // for ASSERT, etc +#include "base/basictypes.h" // for LIKELY, etc + +#ifdef HAVE_BUILTIN_EXPECT +#define LIKELY(x) __builtin_expect(!!(x), 1) +#define UNLIKELY(x) __builtin_expect(!!(x), 0) +#else +#define LIKELY(x) (x) +#define UNLIKELY(x) (x) +#endif + +// Type that can hold a page number +typedef uintptr_t PageID; + +// Type that can hold the length of a run of pages +typedef uintptr_t Length; + +//------------------------------------------------------------------- +// Configuration +//------------------------------------------------------------------- + +#if defined(TCMALLOC_ALIGN_8BYTES) +// Unless we force to use 8 bytes alignment we use an alignment of +// at least 16 bytes to statisfy requirements for some SSE types. +// Keep in mind when using the 16 bytes alignment you can have a space +// waste due alignment of 25%. (eg malloc of 24 bytes will get 32 bytes) +static const size_t kMinAlign = 8; +// Number of classes created until reach page size 128. +static const size_t kBaseClasses = 16; +#else +static const size_t kMinAlign = 16; +static const size_t kBaseClasses = 9; +#endif + +// Using large pages speeds up the execution at a cost of larger memory use. +// Deallocation may speed up by a factor as the page map gets 8x smaller, so +// lookups in the page map result in fewer L2 cache misses, which translates to +// speedup for application/platform combinations with high L2 cache pressure. +// As the number of size classes increases with large pages, we increase +// the thread cache allowance to avoid passing more free ranges to and from +// central lists. Also, larger pages are less likely to get freed. +// These two factors cause a bounded increase in memory use. +#if defined(TCMALLOC_32K_PAGES) +static const size_t kPageShift = 15; +static const size_t kNumClasses = kBaseClasses + 69; +#elif defined(TCMALLOC_64K_PAGES) +static const size_t kPageShift = 16; +static const size_t kNumClasses = kBaseClasses + 73; +#else +static const size_t kPageShift = 13; +static const size_t kNumClasses = kBaseClasses + 79; +#endif + +static const size_t kMaxThreadCacheSize = 4 << 20; + +static const size_t kPageSize = 1 << kPageShift; +static const size_t kMaxSize = 256 * 1024; +static const size_t kAlignment = 8; +static const size_t kLargeSizeClass = 0; +// For all span-lengths < kMaxPages we keep an exact-size list. +static const size_t kMaxPages = 1 << (20 - kPageShift); + +// Default bound on the total amount of thread caches. +#ifdef TCMALLOC_SMALL_BUT_SLOW +// Make the overall thread cache no bigger than that of a single thread +// for the small memory footprint case. +static const size_t kDefaultOverallThreadCacheSize = kMaxThreadCacheSize; +#else +static const size_t kDefaultOverallThreadCacheSize = 8u * kMaxThreadCacheSize; +#endif + +// Lower bound on the per-thread cache sizes +static const size_t kMinThreadCacheSize = kMaxSize * 2; + +// The number of bytes one ThreadCache will steal from another when +// the first ThreadCache is forced to Scavenge(), delaying the +// next call to Scavenge for this thread. +static const size_t kStealAmount = 1 << 16; + +// The number of times that a deallocation can cause a freelist to +// go over its max_length() before shrinking max_length(). +static const int kMaxOverages = 3; + +// Maximum length we allow a per-thread free-list to have before we +// move objects from it into the corresponding central free-list. We +// want this big to avoid locking the central free-list too often. It +// should not hurt to make this list somewhat big because the +// scavenging code will shrink it down when its contents are not in use. +static const int kMaxDynamicFreeListLength = 8192; + +static const Length kMaxValidPages = (~static_cast<Length>(0)) >> kPageShift; + +#if defined __x86_64__ +// All current and planned x86_64 processors only look at the lower 48 bits +// in virtual to physical address translation. The top 16 are thus unused. +// TODO(rus): Under what operating systems can we increase it safely to 17? +// This lets us use smaller page maps. On first allocation, a 36-bit page map +// uses only 96 KB instead of the 4.5 MB used by a 52-bit page map. +static const int kAddressBits = (sizeof(void*) < 8 ? (8 * sizeof(void*)) : 48); +#else +static const int kAddressBits = 8 * sizeof(void*); +#endif + +namespace tcmalloc { + +// Convert byte size into pages. This won't overflow, but may return +// an unreasonably large value if bytes is huge enough. +inline Length pages(size_t bytes) { + return (bytes >> kPageShift) + + ((bytes & (kPageSize - 1)) > 0 ? 1 : 0); +} + +// For larger allocation sizes, we use larger memory alignments to +// reduce the number of size classes. +int AlignmentForSize(size_t size); + +// Size-class information + mapping +class SizeMap { + private: + // Number of objects to move between a per-thread list and a central + // list in one shot. We want this to be not too small so we can + // amortize the lock overhead for accessing the central list. Making + // it too big may temporarily cause unnecessary memory wastage in the + // per-thread free list until the scavenger cleans up the list. + int num_objects_to_move_[kNumClasses]; + + //------------------------------------------------------------------- + // Mapping from size to size_class and vice versa + //------------------------------------------------------------------- + + // Sizes <= 1024 have an alignment >= 8. So for such sizes we have an + // array indexed by ceil(size/8). Sizes > 1024 have an alignment >= 128. + // So for these larger sizes we have an array indexed by ceil(size/128). + // + // We flatten both logical arrays into one physical array and use + // arithmetic to compute an appropriate index. The constants used by + // ClassIndex() were selected to make the flattening work. + // + // Examples: + // Size Expression Index + // ------------------------------------------------------- + // 0 (0 + 7) / 8 0 + // 1 (1 + 7) / 8 1 + // ... + // 1024 (1024 + 7) / 8 128 + // 1025 (1025 + 127 + (120<<7)) / 128 129 + // ... + // 32768 (32768 + 127 + (120<<7)) / 128 376 + static const int kMaxSmallSize = 1024; + static const size_t kClassArraySize = + ((kMaxSize + 127 + (120 << 7)) >> 7) + 1; + unsigned char class_array_[kClassArraySize]; + + static inline size_t SmallSizeClass(size_t s) { + return (static_cast<uint32_t>(s) + 7) >> 3; + } + + static inline size_t LargeSizeClass(size_t s) { + return (static_cast<uint32_t>(s) + 127 + (120 << 7)) >> 7; + } + + // Compute index of the class_array[] entry for a given size + static inline size_t ClassIndex(size_t s) { + // Use unsigned arithmetic to avoid unnecessary sign extensions. + ASSERT(0 <= s); + ASSERT(s <= kMaxSize); + if (LIKELY(s <= kMaxSmallSize)) { + return SmallSizeClass(s); + } else { + return LargeSizeClass(s); + } + } + + int NumMoveSize(size_t size); + + // Mapping from size class to max size storable in that class + size_t class_to_size_[kNumClasses]; + + // Mapping from size class to number of pages to allocate at a time + size_t class_to_pages_[kNumClasses]; + + public: + // Constructor should do nothing since we rely on explicit Init() + // call, which may or may not be called before the constructor runs. + SizeMap() { } + + // Initialize the mapping arrays + void Init(); + + inline int SizeClass(size_t size) { + return class_array_[ClassIndex(size)]; + } + + inline bool MaybeSizeClass(size_t size, size_t *size_class) { + size_t class_idx; + if (LIKELY(size <= kMaxSmallSize)) { + class_idx = SmallSizeClass(size); + } else if (size <= kMaxSize) { + class_idx = LargeSizeClass(size); + } else { + return false; + } + *size_class = class_array_[class_idx]; + return true; + } + + // Get the byte-size for a specified class + inline size_t ByteSizeForClass(size_t cl) { + return class_to_size_[cl]; + } + + // Mapping from size class to max size storable in that class + inline size_t class_to_size(size_t cl) { + return class_to_size_[cl]; + } + + // Mapping from size class to number of pages to allocate at a time + inline size_t class_to_pages(size_t cl) { + return class_to_pages_[cl]; + } + + // Number of objects to move between a per-thread list and a central + // list in one shot. We want this to be not too small so we can + // amortize the lock overhead for accessing the central list. Making + // it too big may temporarily cause unnecessary memory wastage in the + // per-thread free list until the scavenger cleans up the list. + inline int num_objects_to_move(size_t cl) { + return num_objects_to_move_[cl]; + } +}; + +// Allocates "bytes" worth of memory and returns it. Increments +// metadata_system_bytes appropriately. May return NULL if allocation +// fails. Requires pageheap_lock is held. +void* MetaDataAlloc(size_t bytes); + +// Returns the total number of bytes allocated from the system. +// Requires pageheap_lock is held. +uint64_t metadata_system_bytes(); + +// size/depth are made the same size as a pointer so that some generic +// code below can conveniently cast them back and forth to void*. +static const int kMaxStackDepth = 31; +struct StackTrace { + uintptr_t size; // Size of object + uintptr_t depth; // Number of PC values stored in array below + void* stack[kMaxStackDepth]; +}; + +} // namespace tcmalloc + +#endif // TCMALLOC_COMMON_H_ diff --git a/src/third_party/gperftools-2.5/src/config.h.in b/src/third_party/gperftools-2.5/src/config.h.in new file mode 100644 index 00000000000..f7681dabfc4 --- /dev/null +++ b/src/third_party/gperftools-2.5/src/config.h.in @@ -0,0 +1,307 @@ +/* src/config.h.in. Generated from configure.ac by autoheader. */ + + +#ifndef GPERFTOOLS_CONFIG_H_ +#define GPERFTOOLS_CONFIG_H_ + + +/* Build runtime detection for sized delete */ +#undef ENABLE_DYNAMIC_SIZED_DELETE + +/* Build sized deletion operators */ +#undef ENABLE_SIZED_DELETE + +/* Define to 1 if compiler supports __builtin_expect */ +#undef HAVE_BUILTIN_EXPECT + +/* Define to 1 if compiler supports __builtin_stack_pointer */ +#undef HAVE_BUILTIN_STACK_POINTER + +/* Define to 1 if you have the <conflict-signal.h> header file. */ +#undef HAVE_CONFLICT_SIGNAL_H + +/* Define to 1 if you have the <cygwin/signal.h> header file. */ +#undef HAVE_CYGWIN_SIGNAL_H + +/* Define to 1 if you have the declaration of `backtrace', and to 0 if you + don't. */ +#undef HAVE_DECL_BACKTRACE + +/* Define to 1 if you have the declaration of `cfree', and to 0 if you don't. + */ +#undef HAVE_DECL_CFREE + +/* Define to 1 if you have the declaration of `memalign', and to 0 if you + don't. */ +#undef HAVE_DECL_MEMALIGN + +/* Define to 1 if you have the declaration of `nanosleep', and to 0 if you + don't. */ +#undef HAVE_DECL_NANOSLEEP + +/* Define to 1 if you have the declaration of `posix_memalign', and to 0 if + you don't. */ +#undef HAVE_DECL_POSIX_MEMALIGN + +/* Define to 1 if you have the declaration of `pvalloc', and to 0 if you + don't. */ +#undef HAVE_DECL_PVALLOC + +/* Define to 1 if you have the declaration of `sleep', and to 0 if you don't. + */ +#undef HAVE_DECL_SLEEP + +/* Define to 1 if you have the declaration of `uname', and to 0 if you don't. + */ +#undef HAVE_DECL_UNAME + +/* Define to 1 if you have the declaration of `valloc', and to 0 if you don't. + */ +#undef HAVE_DECL_VALLOC + +/* Define to 1 if you have the <dlfcn.h> header file. */ +#undef HAVE_DLFCN_H + +/* Define to 1 if the system has the type `Elf32_Versym'. */ +#undef HAVE_ELF32_VERSYM + +/* Define to 1 if you have the <execinfo.h> header file. */ +#undef HAVE_EXECINFO_H + +/* Define to 1 if you have the <fcntl.h> header file. */ +#undef HAVE_FCNTL_H + +/* Define to 1 if you have the <features.h> header file. */ +#undef HAVE_FEATURES_H + +/* Define to 1 if you have the `fork' function. */ +#undef HAVE_FORK + +/* Define to 1 if you have the `geteuid' function. */ +#undef HAVE_GETEUID + +/* Define to 1 if you have the `getpagesize' function. */ +#undef HAVE_GETPAGESIZE + +/* Define to 1 if you have the <glob.h> header file. */ +#undef HAVE_GLOB_H + +/* Define to 1 if you have the <grp.h> header file. */ +#undef HAVE_GRP_H + +/* Define to 1 if you have the <inttypes.h> header file. */ +#undef HAVE_INTTYPES_H + +/* Define to 1 if you have the <libunwind.h> header file. */ +#undef HAVE_LIBUNWIND_H + +/* Define to 1 if you have the <linux/ptrace.h> header file. */ +#undef HAVE_LINUX_PTRACE_H + +/* Define if this is Linux that has SIGEV_THREAD_ID */ +#undef HAVE_LINUX_SIGEV_THREAD_ID + +/* Define to 1 if you have the <malloc.h> header file. */ +#undef HAVE_MALLOC_H + +/* Define to 1 if you have the <memory.h> header file. */ +#undef HAVE_MEMORY_H + +/* Define to 1 if you have a working `mmap' system call. */ +#undef HAVE_MMAP + +/* define if the compiler implements namespaces */ +#undef HAVE_NAMESPACES + +/* Define to 1 if you have the <poll.h> header file. */ +#undef HAVE_POLL_H + +/* define if libc has program_invocation_name */ +#undef HAVE_PROGRAM_INVOCATION_NAME + +/* Define if you have POSIX threads libraries and header files. */ +#undef HAVE_PTHREAD + +/* defined to 1 if pthread symbols are exposed even without include pthread.h + */ +#undef HAVE_PTHREAD_DESPITE_ASKING_FOR + +/* Define to 1 if you have the <pwd.h> header file. */ +#undef HAVE_PWD_H + +/* Define to 1 if you have the `sbrk' function. */ +#undef HAVE_SBRK + +/* Define to 1 if you have the <sched.h> header file. */ +#undef HAVE_SCHED_H + +/* Define to 1 if you have the <stdint.h> header file. */ +#undef HAVE_STDINT_H + +/* Define to 1 if you have the <stdlib.h> header file. */ +#undef HAVE_STDLIB_H + +/* Define to 1 if you have the <strings.h> header file. */ +#undef HAVE_STRINGS_H + +/* Define to 1 if you have the <string.h> header file. */ +#undef HAVE_STRING_H + +/* Define to 1 if the system has the type `struct mallinfo'. */ +#undef HAVE_STRUCT_MALLINFO + +/* Define to 1 if you have the <sys/cdefs.h> header file. */ +#undef HAVE_SYS_CDEFS_H + +/* Define to 1 if you have the <sys/param.h> header file. */ +#undef HAVE_SYS_PARAM_H + +/* Define to 1 if you have the <sys/prctl.h> header file. */ +#undef HAVE_SYS_PRCTL_H + +/* Define to 1 if you have the <sys/resource.h> header file. */ +#undef HAVE_SYS_RESOURCE_H + +/* Define to 1 if you have the <sys/socket.h> header file. */ +#undef HAVE_SYS_SOCKET_H + +/* Define to 1 if you have the <sys/stat.h> header file. */ +#undef HAVE_SYS_STAT_H + +/* Define to 1 if you have the <sys/syscall.h> header file. */ +#undef HAVE_SYS_SYSCALL_H + +/* Define to 1 if you have the <sys/types.h> header file. */ +#undef HAVE_SYS_TYPES_H + +/* Define to 1 if you have the <sys/ucontext.h> header file. */ +#undef HAVE_SYS_UCONTEXT_H + +/* Define to 1 if you have the <sys/wait.h> header file. */ +#undef HAVE_SYS_WAIT_H + +/* Define to 1 if compiler supports __thread */ +#undef HAVE_TLS + +/* Define to 1 if you have the <ucontext.h> header file. */ +#undef HAVE_UCONTEXT_H + +/* Define to 1 if you have the <unistd.h> header file. */ +#undef HAVE_UNISTD_H + +/* Whether <unwind.h> contains _Unwind_Backtrace */ +#undef HAVE_UNWIND_BACKTRACE + +/* Define to 1 if you have the <unwind.h> header file. */ +#undef HAVE_UNWIND_H + +/* Define to 1 if you have the <valgrind.h> header file. */ +#undef HAVE_VALGRIND_H + +/* define if your compiler has __attribute__ */ +#undef HAVE___ATTRIBUTE__ + +/* Define to 1 if compiler supports __environ */ +#undef HAVE___ENVIRON + +/* Define to 1 if the system has the type `__int64'. */ +#undef HAVE___INT64 + +/* prefix where we look for installed files */ +#undef INSTALL_PREFIX + +/* Define to 1 if int32_t is equivalent to intptr_t */ +#undef INT32_EQUALS_INTPTR + +/* Define to the sub-directory where libtool stores uninstalled libraries. */ +#undef LT_OBJDIR + +/* Define to 'volatile' if __malloc_hook is declared volatile */ +#undef MALLOC_HOOK_MAYBE_VOLATILE + +/* Name of package */ +#undef PACKAGE + +/* Define to the address where bug reports for this package should be sent. */ +#undef PACKAGE_BUGREPORT + +/* Define to the full name of this package. */ +#undef PACKAGE_NAME + +/* Define to the full name and version of this package. */ +#undef PACKAGE_STRING + +/* Define to the one symbol short name of this package. */ +#undef PACKAGE_TARNAME + +/* Define to the home page for this package. */ +#undef PACKAGE_URL + +/* Define to the version of this package. */ +#undef PACKAGE_VERSION + +/* How to access the PC from a struct ucontext */ +#undef PC_FROM_UCONTEXT + +/* Always the empty-string on non-windows systems. On windows, should be + "__declspec(dllexport)". This way, when we compile the dll, we export our + functions/classes. It's safe to define this here because config.h is only + used internally, to compile the DLL, and every DLL source file #includes + "config.h" before anything else. */ +#undef PERFTOOLS_DLL_DECL + +/* printf format code for printing a size_t and ssize_t */ +#undef PRIdS + +/* printf format code for printing a size_t and ssize_t */ +#undef PRIuS + +/* printf format code for printing a size_t and ssize_t */ +#undef PRIxS + +/* Mark the systems where we know it's bad if pthreads runs too + early before main (before threads are initialized, presumably). */ +#ifdef __FreeBSD__ +#define PTHREADS_CRASHES_IF_RUN_TOO_EARLY 1 +#endif + +/* Define to necessary symbol if this constant uses a non-standard name on + your system. */ +#undef PTHREAD_CREATE_JOINABLE + +/* Define to 1 if you have the ANSI C header files. */ +#undef STDC_HEADERS + +/* the namespace where STL code like vector<> is defined */ +#undef STL_NAMESPACE + +/* Define 32K of internal pages size for tcmalloc */ +#undef TCMALLOC_32K_PAGES + +/* Define 64K of internal pages size for tcmalloc */ +#undef TCMALLOC_64K_PAGES + +/* Define 8 bytes of allocation alignment for tcmalloc */ +#undef TCMALLOC_ALIGN_8BYTES + +/* Version number of package */ +#undef VERSION + +/* C99 says: define this to get the PRI... macros from stdint.h */ +#ifndef __STDC_FORMAT_MACROS +# define __STDC_FORMAT_MACROS 1 +#endif + +/* Define to `__inline__' or `__inline' if that's what the C compiler + calls it, or to nothing if 'inline' is not supported under any name. */ +#ifndef __cplusplus +#undef inline +#endif + + +#ifdef __MINGW32__ +#include "windows/mingw.h" +#endif + +#endif /* #ifndef GPERFTOOLS_CONFIG_H_ */ + diff --git a/src/third_party/gperftools-2.5/src/config_for_unittests.h b/src/third_party/gperftools-2.5/src/config_for_unittests.h new file mode 100644 index 00000000000..66592a70071 --- /dev/null +++ b/src/third_party/gperftools-2.5/src/config_for_unittests.h @@ -0,0 +1,65 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2007, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// All Rights Reserved. +// +// Author: Craig Silverstein +// +// This file is needed for windows -- unittests are not part of the +// perftools dll, but still want to include config.h just like the +// dll does, so they can use internal tools and APIs for testing. +// +// The problem is that config.h declares PERFTOOLS_DLL_DECL to be +// for exporting symbols, but the unittest needs to *import* symbols +// (since it's not the dll). +// +// The solution is to have this file, which is just like config.h but +// sets PERFTOOLS_DLL_DECL to do a dllimport instead of a dllexport. +// +// The reason we need this extra PERFTOOLS_DLL_DECL_FOR_UNITTESTS +// variable is in case people want to set PERFTOOLS_DLL_DECL explicitly +// to something other than __declspec(dllexport). In that case, they +// may want to use something other than __declspec(dllimport) for the +// unittest case. For that, we allow folks to define both +// PERFTOOLS_DLL_DECL and PERFTOOLS_DLL_DECL_FOR_UNITTESTS explicitly. +// +// NOTE: This file is equivalent to config.h on non-windows systems, +// which never defined PERFTOOLS_DLL_DECL_FOR_UNITTESTS and always +// define PERFTOOLS_DLL_DECL to the empty string. + +#include "config.h" + +#undef PERFTOOLS_DLL_DECL +#ifdef PERFTOOLS_DLL_DECL_FOR_UNITTESTS +# define PERFTOOLS_DLL_DECL PERFTOOLS_DLL_DECL_FOR_UNITTESTS +#else +# define PERFTOOLS_DLL_DECL // if DLL_DECL_FOR_UNITTESTS isn't defined, use "" +#endif diff --git a/src/third_party/gperftools-2.5/src/debugallocation.cc b/src/third_party/gperftools-2.5/src/debugallocation.cc new file mode 100644 index 00000000000..0e650b6483d --- /dev/null +++ b/src/third_party/gperftools-2.5/src/debugallocation.cc @@ -0,0 +1,1490 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2000, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Urs Holzle <opensource@google.com> + +#include "config.h" +#include <errno.h> +#ifdef HAVE_FCNTL_H +#include <fcntl.h> +#endif +#ifdef HAVE_INTTYPES_H +#include <inttypes.h> +#endif +// We only need malloc.h for struct mallinfo. +#ifdef HAVE_STRUCT_MALLINFO +// Malloc can be in several places on older versions of OS X. +# if defined(HAVE_MALLOC_H) +# include <malloc.h> +# elif defined(HAVE_MALLOC_MALLOC_H) +# include <malloc/malloc.h> +# elif defined(HAVE_SYS_MALLOC_H) +# include <sys/malloc.h> +# endif +#endif +#ifdef HAVE_PTHREAD +#include <pthread.h> +#endif +#include <stdarg.h> +#include <stdio.h> +#include <string.h> +#ifdef HAVE_MMAP +#include <sys/mman.h> +#endif +#include <sys/stat.h> +#include <sys/types.h> +#ifdef HAVE_UNISTD_H +#include <unistd.h> +#endif + +#include <gperftools/malloc_extension.h> +#include <gperftools/malloc_hook.h> +#include <gperftools/stacktrace.h> +#include "addressmap-inl.h" +#include "base/commandlineflags.h" +#include "base/googleinit.h" +#include "base/logging.h" +#include "base/spinlock.h" +#include "malloc_hook-inl.h" +#include "symbolize.h" + +// NOTE: due to #define below, tcmalloc.cc will omit tc_XXX +// definitions. So that debug implementations can be defined +// instead. We're going to use do_malloc, do_free and other do_XXX +// functions that are defined in tcmalloc.cc for actual memory +// management +#define TCMALLOC_USING_DEBUGALLOCATION +#include "tcmalloc.cc" + +// __THROW is defined in glibc systems. It means, counter-intuitively, +// "This function will never throw an exception." It's an optional +// optimization tool, but we may need to use it to match glibc prototypes. +#ifndef __THROW // I guess we're not on a glibc system +# define __THROW // __THROW is just an optimization, so ok to make it "" +#endif + +// On systems (like freebsd) that don't define MAP_ANONYMOUS, use the old +// form of the name instead. +#ifndef MAP_ANONYMOUS +# define MAP_ANONYMOUS MAP_ANON +#endif + +// ========================================================================= // + +DEFINE_bool(malloctrace, + EnvToBool("TCMALLOC_TRACE", false), + "Enables memory (de)allocation tracing to /tmp/google.alloc."); +#ifdef HAVE_MMAP +DEFINE_bool(malloc_page_fence, + EnvToBool("TCMALLOC_PAGE_FENCE", false), + "Enables putting of memory allocations at page boundaries " + "with a guard page following the allocation (to catch buffer " + "overruns right when they happen)."); +DEFINE_bool(malloc_page_fence_never_reclaim, + EnvToBool("TCMALLOC_PAGE_FRANCE_NEVER_RECLAIM", false), + "Enables making the virtual address space inaccessible " + "upon a deallocation instead of returning it and reusing later."); +#else +DEFINE_bool(malloc_page_fence, false, "Not usable (requires mmap)"); +DEFINE_bool(malloc_page_fence_never_reclaim, false, "Not usable (required mmap)"); +#endif +DEFINE_bool(malloc_reclaim_memory, + EnvToBool("TCMALLOC_RECLAIM_MEMORY", true), + "If set to false, we never return memory to malloc " + "when an object is deallocated. This ensures that all " + "heap object addresses are unique."); +DEFINE_int32(max_free_queue_size, + EnvToInt("TCMALLOC_MAX_FREE_QUEUE_SIZE", 10*1024*1024), + "If greater than 0, keep freed blocks in a queue instead of " + "releasing them to the allocator immediately. Release them when " + "the total size of all blocks in the queue would otherwise exceed " + "this limit."); + +DEFINE_bool(symbolize_stacktrace, + EnvToBool("TCMALLOC_SYMBOLIZE_STACKTRACE", true), + "Symbolize the stack trace when provided (on some error exits)"); + +// If we are LD_PRELOAD-ed against a non-pthreads app, then +// pthread_once won't be defined. We declare it here, for that +// case (with weak linkage) which will cause the non-definition to +// resolve to NULL. We can then check for NULL or not in Instance. +extern "C" int pthread_once(pthread_once_t *, void (*)(void)) + ATTRIBUTE_WEAK; + +// ========================================================================= // + +// A safe version of printf() that does not do any allocation and +// uses very little stack space. +static void TracePrintf(int fd, const char *fmt, ...) + __attribute__ ((__format__ (__printf__, 2, 3))); + +// Round "value" up to next "alignment" boundary. +// Requires that "alignment" be a power of two. +static intptr_t RoundUp(intptr_t value, intptr_t alignment) { + return (value + alignment - 1) & ~(alignment - 1); +} + +// ========================================================================= // + +class MallocBlock; + +// A circular buffer to hold freed blocks of memory. MallocBlock::Deallocate +// (below) pushes blocks into this queue instead of returning them to the +// underlying allocator immediately. See MallocBlock::Deallocate for more +// information. +// +// We can't use an STL class for this because we need to be careful not to +// perform any heap de-allocations in any of the code in this class, since the +// code in MallocBlock::Deallocate is not re-entrant. +template <typename QueueEntry> +class FreeQueue { + public: + FreeQueue() : q_front_(0), q_back_(0) {} + + bool Full() { + return (q_front_ + 1) % kFreeQueueSize == q_back_; + } + + void Push(const QueueEntry& block) { + q_[q_front_] = block; + q_front_ = (q_front_ + 1) % kFreeQueueSize; + } + + QueueEntry Pop() { + RAW_CHECK(q_back_ != q_front_, "Queue is empty"); + const QueueEntry& ret = q_[q_back_]; + q_back_ = (q_back_ + 1) % kFreeQueueSize; + return ret; + } + + size_t size() const { + return (q_front_ - q_back_ + kFreeQueueSize) % kFreeQueueSize; + } + + private: + // Maximum number of blocks kept in the free queue before being freed. + static const int kFreeQueueSize = 1024; + + QueueEntry q_[kFreeQueueSize]; + int q_front_; + int q_back_; +}; + +struct MallocBlockQueueEntry { + MallocBlockQueueEntry() : block(NULL), size(0), + num_deleter_pcs(0), deleter_threadid(0) {} + MallocBlockQueueEntry(MallocBlock* b, size_t s) : block(b), size(s) { + if (FLAGS_max_free_queue_size != 0 && b != NULL) { + // Adjust the number of frames to skip (4) if you change the + // location of this call. + num_deleter_pcs = + GetStackTrace(deleter_pcs, + sizeof(deleter_pcs) / sizeof(deleter_pcs[0]), + 4); + deleter_threadid = pthread_self(); + } else { + num_deleter_pcs = 0; + // Zero is an illegal pthread id by my reading of the pthread + // implementation: + deleter_threadid = 0; + } + } + + MallocBlock* block; + size_t size; + + // When deleted and put in the free queue, we (flag-controlled) + // record the stack so that if corruption is later found, we can + // print the deleter's stack. (These three vars add 144 bytes of + // overhead under the LP64 data model.) + void* deleter_pcs[16]; + int num_deleter_pcs; + pthread_t deleter_threadid; +}; + +class MallocBlock { + public: // allocation type constants + + // Different allocation types we distinguish. + // Note: The lower 4 bits are not random: we index kAllocName array + // by these values masked with kAllocTypeMask; + // the rest are "random" magic bits to help catch memory corruption. + static const int kMallocType = 0xEFCDAB90; + static const int kNewType = 0xFEBADC81; + static const int kArrayNewType = 0xBCEADF72; + + private: // constants + + // A mask used on alloc types above to get to 0, 1, 2 + static const int kAllocTypeMask = 0x3; + // An additional bit to set in AllocType constants + // to mark now deallocated regions. + static const int kDeallocatedTypeBit = 0x4; + + // For better memory debugging, we initialize all storage to known + // values, and overwrite the storage when it's deallocated: + // Byte that fills uninitialized storage. + static const int kMagicUninitializedByte = 0xAB; + // Byte that fills deallocated storage. + // NOTE: tcmalloc.cc depends on the value of kMagicDeletedByte + // to work around a bug in the pthread library. + static const int kMagicDeletedByte = 0xCD; + // A size_t (type of alloc_type_ below) in a deallocated storage + // filled with kMagicDeletedByte. + static const size_t kMagicDeletedSizeT = + 0xCDCDCDCD | (((size_t)0xCDCDCDCD << 16) << 16); + // Initializer works for 32 and 64 bit size_ts; + // "<< 16 << 16" is to fool gcc from issuing a warning + // when size_ts are 32 bits. + + // NOTE: on Linux, you can enable malloc debugging support in libc by + // setting the environment variable MALLOC_CHECK_ to 1 before you + // start the program (see man malloc). + + // We use either do_malloc or mmap to make the actual allocation. In + // order to remember which one of the two was used for any block, we store an + // appropriate magic word next to the block. + static const int kMagicMalloc = 0xDEADBEEF; + static const int kMagicMMap = 0xABCDEFAB; + + // This array will be filled with 0xCD, for use with memcmp. + static unsigned char kMagicDeletedBuffer[1024]; + static pthread_once_t deleted_buffer_initialized_; + static bool deleted_buffer_initialized_no_pthreads_; + + private: // data layout + + // The four fields size1_,offset_,magic1_,alloc_type_ + // should together occupy a multiple of 16 bytes. (At the + // moment, sizeof(size_t) == 4 or 8 depending on piii vs + // k8, and 4 of those sum to 16 or 32 bytes). + // This, combined with do_malloc's alignment guarantees, + // ensures that SSE types can be stored into the returned + // block, at &size2_. + size_t size1_; + size_t offset_; // normally 0 unless memaligned memory + // see comments in memalign() and FromRawPointer(). + size_t magic1_; + size_t alloc_type_; + // here comes the actual data (variable length) + // ... + // then come the size2_ and magic2_, or a full page of mprotect-ed memory + // if the malloc_page_fence feature is enabled. + size_t size2_; + int magic2_; + + private: // static data and helpers + + // Allocation map: stores the allocation type for each allocated object, + // or the type or'ed with kDeallocatedTypeBit + // for each formerly allocated object. + typedef AddressMap<int> AllocMap; + static AllocMap* alloc_map_; + // This protects alloc_map_ and consistent state of metadata + // for each still-allocated object in it. + // We use spin locks instead of pthread_mutex_t locks + // to prevent crashes via calls to pthread_mutex_(un)lock + // for the (de)allocations coming from pthreads initialization itself. + static SpinLock alloc_map_lock_; + + // A queue of freed blocks. Instead of releasing blocks to the allocator + // immediately, we put them in a queue, freeing them only when necessary + // to keep the total size of all the freed blocks below the limit set by + // FLAGS_max_free_queue_size. + static FreeQueue<MallocBlockQueueEntry>* free_queue_; + + static size_t free_queue_size_; // total size of blocks in free_queue_ + // protects free_queue_ and free_queue_size_ + static SpinLock free_queue_lock_; + + // Names of allocation types (kMallocType, kNewType, kArrayNewType) + static const char* const kAllocName[]; + // Names of corresponding deallocation types + static const char* const kDeallocName[]; + + static const char* AllocName(int type) { + return kAllocName[type & kAllocTypeMask]; + } + + static const char* DeallocName(int type) { + return kDeallocName[type & kAllocTypeMask]; + } + + private: // helper accessors + + bool IsMMapped() const { return kMagicMMap == magic1_; } + + bool IsValidMagicValue(int value) const { + return kMagicMMap == value || kMagicMalloc == value; + } + + static size_t real_malloced_size(size_t size) { + return size + sizeof(MallocBlock); + } + + /* + * Here we assume size of page is kMinAlign aligned, + * so if size is MALLOC_ALIGNMENT aligned too, then we could + * guarantee return address is also kMinAlign aligned, because + * mmap return address at nearby page boundary on Linux. + */ + static size_t real_mmapped_size(size_t size) { + size_t tmp = size + MallocBlock::data_offset(); + tmp = RoundUp(tmp, kMinAlign); + return tmp; + } + + size_t real_size() { + return IsMMapped() ? real_mmapped_size(size1_) : real_malloced_size(size1_); + } + + // NOTE: if the block is mmapped (that is, we're using the + // malloc_page_fence option) then there's no size2 or magic2 + // (instead, the guard page begins where size2 would be). + + size_t* size2_addr() { return (size_t*)((char*)&size2_ + size1_); } + const size_t* size2_addr() const { + return (const size_t*)((char*)&size2_ + size1_); + } + + int* magic2_addr() { return (int*)(size2_addr() + 1); } + const int* magic2_addr() const { return (const int*)(size2_addr() + 1); } + + private: // other helpers + + void Initialize(size_t size, int type) { + RAW_CHECK(IsValidMagicValue(magic1_), ""); + // record us as allocated in the map + alloc_map_lock_.Lock(); + if (!alloc_map_) { + void* p = do_malloc(sizeof(AllocMap)); + alloc_map_ = new(p) AllocMap(do_malloc, do_free); + } + alloc_map_->Insert(data_addr(), type); + // initialize us + size1_ = size; + offset_ = 0; + alloc_type_ = type; + if (!IsMMapped()) { + *magic2_addr() = magic1_; + *size2_addr() = size; + } + alloc_map_lock_.Unlock(); + memset(data_addr(), kMagicUninitializedByte, size); + if (!IsMMapped()) { + RAW_CHECK(size1_ == *size2_addr(), "should hold"); + RAW_CHECK(magic1_ == *magic2_addr(), "should hold"); + } + } + + size_t CheckAndClear(int type, size_t given_size) { + alloc_map_lock_.Lock(); + CheckLocked(type); + if (!IsMMapped()) { + RAW_CHECK(size1_ == *size2_addr(), "should hold"); + } + // record us as deallocated in the map + alloc_map_->Insert(data_addr(), type | kDeallocatedTypeBit); + alloc_map_lock_.Unlock(); + // clear us + const size_t size = real_size(); + RAW_CHECK(!given_size || given_size == size1_, + "right size must be passed to sized delete"); + memset(this, kMagicDeletedByte, size); + return size; + } + + void CheckLocked(int type) const { + int map_type = 0; + const int* found_type = + alloc_map_ != NULL ? alloc_map_->Find(data_addr()) : NULL; + if (found_type == NULL) { + RAW_LOG(FATAL, "memory allocation bug: object at %p " + "has never been allocated", data_addr()); + } else { + map_type = *found_type; + } + if ((map_type & kDeallocatedTypeBit) != 0) { + RAW_LOG(FATAL, "memory allocation bug: object at %p " + "has been already deallocated (it was allocated with %s)", + data_addr(), AllocName(map_type & ~kDeallocatedTypeBit)); + } + if (alloc_type_ == kMagicDeletedSizeT) { + RAW_LOG(FATAL, "memory stomping bug: a word before object at %p " + "has been corrupted; or else the object has been already " + "deallocated and our memory map has been corrupted", + data_addr()); + } + if (!IsValidMagicValue(magic1_)) { + RAW_LOG(FATAL, "memory stomping bug: a word before object at %p " + "has been corrupted; " + "or else our memory map has been corrupted and this is a " + "deallocation for not (currently) heap-allocated object", + data_addr()); + } + if (!IsMMapped()) { + if (size1_ != *size2_addr()) { + RAW_LOG(FATAL, "memory stomping bug: a word after object at %p " + "has been corrupted", data_addr()); + } + if (!IsValidMagicValue(*magic2_addr())) { + RAW_LOG(FATAL, "memory stomping bug: a word after object at %p " + "has been corrupted", data_addr()); + } + } + if (alloc_type_ != type) { + if ((alloc_type_ != MallocBlock::kMallocType) && + (alloc_type_ != MallocBlock::kNewType) && + (alloc_type_ != MallocBlock::kArrayNewType)) { + RAW_LOG(FATAL, "memory stomping bug: a word before object at %p " + "has been corrupted", data_addr()); + } + RAW_LOG(FATAL, "memory allocation/deallocation mismatch at %p: " + "allocated with %s being deallocated with %s", + data_addr(), AllocName(alloc_type_), DeallocName(type)); + } + if (alloc_type_ != map_type) { + RAW_LOG(FATAL, "memory stomping bug: our memory map has been corrupted : " + "allocation at %p made with %s " + "is recorded in the map to be made with %s", + data_addr(), AllocName(alloc_type_), AllocName(map_type)); + } + } + + public: // public accessors + + void* data_addr() { return (void*)&size2_; } + const void* data_addr() const { return (const void*)&size2_; } + + static size_t data_offset() { return OFFSETOF_MEMBER(MallocBlock, size2_); } + + size_t data_size() const { return size1_; } + + void set_offset(int offset) { this->offset_ = offset; } + + public: // our main interface + + static MallocBlock* Allocate(size_t size, int type) { + // Prevent an integer overflow / crash with large allocation sizes. + // TODO - Note that for a e.g. 64-bit size_t, max_size_t may not actually + // be the maximum value, depending on how the compiler treats ~0. The worst + // practical effect is that allocations are limited to 4Gb or so, even if + // the address space could take more. + static size_t max_size_t = ~0; + if (size > max_size_t - sizeof(MallocBlock)) { + RAW_LOG(ERROR, "Massive size passed to malloc: %" PRIuS "", size); + return NULL; + } + MallocBlock* b = NULL; + const bool use_malloc_page_fence = FLAGS_malloc_page_fence; +#ifdef HAVE_MMAP + if (use_malloc_page_fence) { + // Put the block towards the end of the page and make the next page + // inaccessible. This will catch buffer overrun right when it happens. + size_t sz = real_mmapped_size(size); + int pagesize = getpagesize(); + int num_pages = (sz + pagesize - 1) / pagesize + 1; + char* p = (char*) mmap(NULL, num_pages * pagesize, PROT_READ|PROT_WRITE, + MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); + if (p == MAP_FAILED) { + // If the allocation fails, abort rather than returning NULL to + // malloc. This is because in most cases, the program will run out + // of memory in this mode due to tremendous amount of wastage. There + // is no point in propagating the error elsewhere. + RAW_LOG(FATAL, "Out of memory: possibly due to page fence overhead: %s", + strerror(errno)); + } + // Mark the page after the block inaccessible + if (mprotect(p + (num_pages - 1) * pagesize, pagesize, PROT_NONE)) { + RAW_LOG(FATAL, "Guard page setup failed: %s", strerror(errno)); + } + b = (MallocBlock*) (p + (num_pages - 1) * pagesize - sz); + } else { + b = (MallocBlock*) do_malloc(real_malloced_size(size)); + } +#else + b = (MallocBlock*) do_malloc(real_malloced_size(size)); +#endif + + // It would be nice to output a diagnostic on allocation failure + // here, but logging (other than FATAL) requires allocating + // memory, which could trigger a nasty recursion. Instead, preserve + // malloc semantics and return NULL on failure. + if (b != NULL) { + b->magic1_ = use_malloc_page_fence ? kMagicMMap : kMagicMalloc; + b->Initialize(size, type); + } + return b; + } + + void Deallocate(int type, size_t given_size) { + if (IsMMapped()) { // have to do this before CheckAndClear +#ifdef HAVE_MMAP + int size = CheckAndClear(type, given_size); + int pagesize = getpagesize(); + int num_pages = (size + pagesize - 1) / pagesize + 1; + char* p = (char*) this; + if (FLAGS_malloc_page_fence_never_reclaim || + !FLAGS_malloc_reclaim_memory) { + mprotect(p - (num_pages - 1) * pagesize + size, + num_pages * pagesize, PROT_NONE); + } else { + munmap(p - (num_pages - 1) * pagesize + size, num_pages * pagesize); + } +#endif + } else { + const size_t size = CheckAndClear(type, given_size); + if (FLAGS_malloc_reclaim_memory) { + // Instead of freeing the block immediately, push it onto a queue of + // recently freed blocks. Free only enough blocks to keep from + // exceeding the capacity of the queue or causing the total amount of + // un-released memory in the queue from exceeding + // FLAGS_max_free_queue_size. + ProcessFreeQueue(this, size, FLAGS_max_free_queue_size); + } + } + } + + static size_t FreeQueueSize() { + SpinLockHolder l(&free_queue_lock_); + return free_queue_size_; + } + + static void ProcessFreeQueue(MallocBlock* b, size_t size, + int max_free_queue_size) { + // MallocBlockQueueEntry are about 144 in size, so we can only + // use a small array of them on the stack. + MallocBlockQueueEntry entries[4]; + int num_entries = 0; + MallocBlockQueueEntry new_entry(b, size); + free_queue_lock_.Lock(); + if (free_queue_ == NULL) + free_queue_ = new FreeQueue<MallocBlockQueueEntry>; + RAW_CHECK(!free_queue_->Full(), "Free queue mustn't be full!"); + + if (b != NULL) { + free_queue_size_ += size + sizeof(MallocBlockQueueEntry); + free_queue_->Push(new_entry); + } + + // Free blocks until the total size of unfreed blocks no longer exceeds + // max_free_queue_size, and the free queue has at least one free + // space in it. + while (free_queue_size_ > max_free_queue_size || free_queue_->Full()) { + RAW_CHECK(num_entries < arraysize(entries), "entries array overflow"); + entries[num_entries] = free_queue_->Pop(); + free_queue_size_ -= + entries[num_entries].size + sizeof(MallocBlockQueueEntry); + num_entries++; + if (num_entries == arraysize(entries)) { + // The queue will not be full at this point, so it is ok to + // release the lock. The queue may still contain more than + // max_free_queue_size, but this is not a strict invariant. + free_queue_lock_.Unlock(); + for (int i = 0; i < num_entries; i++) { + CheckForDanglingWrites(entries[i]); + do_free(entries[i].block); + } + num_entries = 0; + free_queue_lock_.Lock(); + } + } + RAW_CHECK(free_queue_size_ >= 0, "Free queue size went negative!"); + free_queue_lock_.Unlock(); + for (int i = 0; i < num_entries; i++) { + CheckForDanglingWrites(entries[i]); + do_free(entries[i].block); + } + } + + static void InitDeletedBuffer() { + memset(kMagicDeletedBuffer, kMagicDeletedByte, sizeof(kMagicDeletedBuffer)); + deleted_buffer_initialized_no_pthreads_ = true; + } + + static void CheckForDanglingWrites(const MallocBlockQueueEntry& queue_entry) { + // Initialize the buffer if necessary. + if (pthread_once) + pthread_once(&deleted_buffer_initialized_, &InitDeletedBuffer); + if (!deleted_buffer_initialized_no_pthreads_) { + // This will be the case on systems that don't link in pthreads, + // including on FreeBSD where pthread_once has a non-zero address + // (but doesn't do anything) even when pthreads isn't linked in. + InitDeletedBuffer(); + } + + const unsigned char* p = + reinterpret_cast<unsigned char*>(queue_entry.block); + + static const size_t size_of_buffer = sizeof(kMagicDeletedBuffer); + const size_t size = queue_entry.size; + const size_t buffers = size / size_of_buffer; + const size_t remainder = size % size_of_buffer; + size_t buffer_idx; + for (buffer_idx = 0; buffer_idx < buffers; ++buffer_idx) { + CheckForCorruptedBuffer(queue_entry, buffer_idx, p, size_of_buffer); + p += size_of_buffer; + } + CheckForCorruptedBuffer(queue_entry, buffer_idx, p, remainder); + } + + static void CheckForCorruptedBuffer(const MallocBlockQueueEntry& queue_entry, + size_t buffer_idx, + const unsigned char* buffer, + size_t size_of_buffer) { + if (memcmp(buffer, kMagicDeletedBuffer, size_of_buffer) == 0) { + return; + } + + RAW_LOG(ERROR, + "Found a corrupted memory buffer in MallocBlock (may be offset " + "from user ptr): buffer index: %zd, buffer ptr: %p, size of " + "buffer: %zd", buffer_idx, buffer, size_of_buffer); + + // The magic deleted buffer should only be 1024 bytes, but in case + // this changes, let's put an upper limit on the number of debug + // lines we'll output: + if (size_of_buffer <= 1024) { + for (int i = 0; i < size_of_buffer; ++i) { + if (buffer[i] != kMagicDeletedByte) { + RAW_LOG(ERROR, "Buffer byte %d is 0x%02x (should be 0x%02x).", + i, buffer[i], kMagicDeletedByte); + } + } + } else { + RAW_LOG(ERROR, "Buffer too large to print corruption."); + } + + const MallocBlock* b = queue_entry.block; + const size_t size = queue_entry.size; + if (queue_entry.num_deleter_pcs > 0) { + TracePrintf(STDERR_FILENO, "Deleted by thread %p\n", + reinterpret_cast<void*>( + PRINTABLE_PTHREAD(queue_entry.deleter_threadid))); + + // We don't want to allocate or deallocate memory here, so we use + // placement-new. It's ok that we don't destroy this, since we're + // just going to error-exit below anyway. Union is for alignment. + union { void* alignment; char buf[sizeof(SymbolTable)]; } tablebuf; + SymbolTable* symbolization_table = new (tablebuf.buf) SymbolTable; + for (int i = 0; i < queue_entry.num_deleter_pcs; i++) { + // Symbolizes the previous address of pc because pc may be in the + // next function. This may happen when the function ends with + // a call to a function annotated noreturn (e.g. CHECK). + char *pc = reinterpret_cast<char*>(queue_entry.deleter_pcs[i]); + symbolization_table->Add(pc - 1); + } + if (FLAGS_symbolize_stacktrace) + symbolization_table->Symbolize(); + for (int i = 0; i < queue_entry.num_deleter_pcs; i++) { + char *pc = reinterpret_cast<char*>(queue_entry.deleter_pcs[i]); + TracePrintf(STDERR_FILENO, " @ %p %s\n", + pc, symbolization_table->GetSymbol(pc - 1)); + } + } else { + RAW_LOG(ERROR, + "Skipping the printing of the deleter's stack! Its stack was " + "not found; either the corruption occurred too early in " + "execution to obtain a stack trace or --max_free_queue_size was " + "set to 0."); + } + + RAW_LOG(FATAL, + "Memory was written to after being freed. MallocBlock: %p, user " + "ptr: %p, size: %zd. If you can't find the source of the error, " + "try using ASan (http://code.google.com/p/address-sanitizer/), " + "Valgrind, or Purify, or study the " + "output of the deleter's stack printed above.", + b, b->data_addr(), size); + } + + static MallocBlock* FromRawPointer(void* p) { + const size_t data_offset = MallocBlock::data_offset(); + // Find the header just before client's memory. + MallocBlock *mb = reinterpret_cast<MallocBlock *>( + reinterpret_cast<char *>(p) - data_offset); + // If mb->alloc_type_ is kMagicDeletedSizeT, we're not an ok pointer. + if (mb->alloc_type_ == kMagicDeletedSizeT) { + RAW_LOG(FATAL, "memory allocation bug: object at %p has been already" + " deallocated; or else a word before the object has been" + " corrupted (memory stomping bug)", p); + } + // If mb->offset_ is zero (common case), mb is the real header. + // If mb->offset_ is non-zero, this block was allocated by debug + // memallign implementation, and mb->offset_ is the distance + // backwards to the real header from mb, which is a fake header. + if (mb->offset_ == 0) { + return mb; + } + + MallocBlock *main_block = reinterpret_cast<MallocBlock *>( + reinterpret_cast<char *>(mb) - mb->offset_); + + if (main_block->offset_ != 0) { + RAW_LOG(FATAL, "memory corruption bug: offset_ field is corrupted." + " Need 0 but got %x", + (unsigned)(main_block->offset_)); + } + if (main_block >= p) { + RAW_LOG(FATAL, "memory corruption bug: offset_ field is corrupted." + " Detected main_block address overflow: %x", + (unsigned)(mb->offset_)); + } + if (main_block->size2_addr() < p) { + RAW_LOG(FATAL, "memory corruption bug: offset_ field is corrupted." + " It points below it's own main_block: %x", + (unsigned)(mb->offset_)); + } + + return main_block; + } + + static const MallocBlock* FromRawPointer(const void* p) { + // const-safe version: we just cast about + return FromRawPointer(const_cast<void*>(p)); + } + + void Check(int type) const { + alloc_map_lock_.Lock(); + CheckLocked(type); + alloc_map_lock_.Unlock(); + } + + static bool CheckEverything() { + alloc_map_lock_.Lock(); + if (alloc_map_ != NULL) alloc_map_->Iterate(CheckCallback, 0); + alloc_map_lock_.Unlock(); + return true; // if we get here, we're okay + } + + static bool MemoryStats(int* blocks, size_t* total, + int histogram[kMallocHistogramSize]) { + memset(histogram, 0, kMallocHistogramSize * sizeof(int)); + alloc_map_lock_.Lock(); + stats_blocks_ = 0; + stats_total_ = 0; + stats_histogram_ = histogram; + if (alloc_map_ != NULL) alloc_map_->Iterate(StatsCallback, 0); + *blocks = stats_blocks_; + *total = stats_total_; + alloc_map_lock_.Unlock(); + return true; + } + + private: // helpers for CheckEverything and MemoryStats + + static void CheckCallback(const void* ptr, int* type, int dummy) { + if ((*type & kDeallocatedTypeBit) == 0) { + FromRawPointer(ptr)->CheckLocked(*type); + } + } + + // Accumulation variables for StatsCallback protected by alloc_map_lock_ + static int stats_blocks_; + static size_t stats_total_; + static int* stats_histogram_; + + static void StatsCallback(const void* ptr, int* type, int dummy) { + if ((*type & kDeallocatedTypeBit) == 0) { + const MallocBlock* b = FromRawPointer(ptr); + b->CheckLocked(*type); + ++stats_blocks_; + size_t mysize = b->size1_; + int entry = 0; + stats_total_ += mysize; + while (mysize) { + ++entry; + mysize >>= 1; + } + RAW_CHECK(entry < kMallocHistogramSize, + "kMallocHistogramSize should be at least as large as log2 " + "of the maximum process memory size"); + stats_histogram_[entry] += 1; + } + } +}; + +void DanglingWriteChecker() { + // Clear out the remaining free queue to check for dangling writes. + MallocBlock::ProcessFreeQueue(NULL, 0, 0); +} + +// ========================================================================= // + +const int MallocBlock::kMagicMalloc; +const int MallocBlock::kMagicMMap; + +MallocBlock::AllocMap* MallocBlock::alloc_map_ = NULL; +SpinLock MallocBlock::alloc_map_lock_(SpinLock::LINKER_INITIALIZED); + +FreeQueue<MallocBlockQueueEntry>* MallocBlock::free_queue_ = NULL; +size_t MallocBlock::free_queue_size_ = 0; +SpinLock MallocBlock::free_queue_lock_(SpinLock::LINKER_INITIALIZED); + +unsigned char MallocBlock::kMagicDeletedBuffer[1024]; +pthread_once_t MallocBlock::deleted_buffer_initialized_ = PTHREAD_ONCE_INIT; +bool MallocBlock::deleted_buffer_initialized_no_pthreads_ = false; + +const char* const MallocBlock::kAllocName[] = { + "malloc", + "new", + "new []", + NULL, +}; + +const char* const MallocBlock::kDeallocName[] = { + "free", + "delete", + "delete []", + NULL, +}; + +int MallocBlock::stats_blocks_; +size_t MallocBlock::stats_total_; +int* MallocBlock::stats_histogram_; + +// ========================================================================= // + +// The following cut-down version of printf() avoids +// using stdio or ostreams. +// This is to guarantee no recursive calls into +// the allocator and to bound the stack space consumed. (The pthread +// manager thread in linuxthreads has a very small stack, +// so fprintf can't be called.) +static void TracePrintf(int fd, const char *fmt, ...) { + char buf[64]; + int i = 0; + va_list ap; + va_start(ap, fmt); + const char *p = fmt; + char numbuf[25]; + if (fd < 0) { + return; + } + numbuf[sizeof(numbuf)-1] = 0; + while (*p != '\0') { // until end of format string + char *s = &numbuf[sizeof(numbuf)-1]; + if (p[0] == '%' && p[1] != 0) { // handle % formats + int64 l = 0; + unsigned long base = 0; + if (*++p == 's') { // %s + s = va_arg(ap, char *); + } else if (*p == 'l' && p[1] == 'd') { // %ld + l = va_arg(ap, long); + base = 10; + p++; + } else if (*p == 'l' && p[1] == 'u') { // %lu + l = va_arg(ap, unsigned long); + base = 10; + p++; + } else if (*p == 'z' && p[1] == 'u') { // %zu + l = va_arg(ap, size_t); + base = 10; + p++; + } else if (*p == 'u') { // %u + l = va_arg(ap, unsigned int); + base = 10; + } else if (*p == 'd') { // %d + l = va_arg(ap, int); + base = 10; + } else if (*p == 'p') { // %p + l = va_arg(ap, intptr_t); + base = 16; + } else { + write(STDERR_FILENO, "Unimplemented TracePrintf format\n", 33); + write(STDERR_FILENO, p, 2); + write(STDERR_FILENO, "\n", 1); + abort(); + } + p++; + if (base != 0) { + bool minus = (l < 0 && base == 10); + uint64 ul = minus? -l : l; + do { + *--s = "0123456789abcdef"[ul % base]; + ul /= base; + } while (ul != 0); + if (base == 16) { + *--s = 'x'; + *--s = '0'; + } else if (minus) { + *--s = '-'; + } + } + } else { // handle normal characters + *--s = *p++; + } + while (*s != 0) { + if (i == sizeof(buf)) { + write(fd, buf, i); + i = 0; + } + buf[i++] = *s++; + } + } + if (i != 0) { + write(fd, buf, i); + } + va_end(ap); +} + +// Return the file descriptor we're writing a log to +static int TraceFd() { + static int trace_fd = -1; + if (trace_fd == -1) { // Open the trace file on the first call + const char *val = getenv("TCMALLOC_TRACE_FILE"); + bool fallback_to_stderr = false; + if (!val) { + val = "/tmp/google.alloc"; + fallback_to_stderr = true; + } + trace_fd = open(val, O_CREAT|O_TRUNC|O_WRONLY, 0666); + if (trace_fd == -1) { + if (fallback_to_stderr) { + trace_fd = 2; + TracePrintf(trace_fd, "Can't open %s. Logging to stderr.\n", val); + } else { + TracePrintf(2, "Can't open %s. Logging disabled.\n", val); + } + } + // Add a header to the log. + TracePrintf(trace_fd, "Trace started: %lu\n", + static_cast<unsigned long>(time(NULL))); + TracePrintf(trace_fd, + "func\tsize\tptr\tthread_id\tstack pcs for tools/symbolize\n"); + } + return trace_fd; +} + +// Print the hex stack dump on a single line. PCs are separated by tabs. +static void TraceStack(void) { + void *pcs[16]; + int n = GetStackTrace(pcs, sizeof(pcs)/sizeof(pcs[0]), 0); + for (int i = 0; i != n; i++) { + TracePrintf(TraceFd(), "\t%p", pcs[i]); + } +} + +// This protects MALLOC_TRACE, to make sure its info is atomically written. +static SpinLock malloc_trace_lock(SpinLock::LINKER_INITIALIZED); + +#define MALLOC_TRACE(name, size, addr) \ + do { \ + if (FLAGS_malloctrace) { \ + SpinLockHolder l(&malloc_trace_lock); \ + TracePrintf(TraceFd(), "%s\t%" PRIuS "\t%p\t%" GPRIuPTHREAD, \ + name, size, addr, PRINTABLE_PTHREAD(pthread_self())); \ + TraceStack(); \ + TracePrintf(TraceFd(), "\n"); \ + } \ + } while (0) + +// ========================================================================= // + +// Write the characters buf[0, ..., size-1] to +// the malloc trace buffer. +// This function is intended for debugging, +// and is not declared in any header file. +// You must insert a declaration of it by hand when you need +// to use it. +void __malloctrace_write(const char *buf, size_t size) { + if (FLAGS_malloctrace) { + write(TraceFd(), buf, size); + } +} + +// ========================================================================= // + +// General debug allocation/deallocation + +static inline void* DebugAllocate(size_t size, int type) { + MallocBlock* ptr = MallocBlock::Allocate(size, type); + if (ptr == NULL) return NULL; + MALLOC_TRACE("malloc", size, ptr->data_addr()); + return ptr->data_addr(); +} + +static inline void DebugDeallocate(void* ptr, int type, size_t given_size) { + MALLOC_TRACE("free", + (ptr != 0 ? MallocBlock::FromRawPointer(ptr)->data_size() : 0), + ptr); + if (ptr) MallocBlock::FromRawPointer(ptr)->Deallocate(type, given_size); +} + +// ========================================================================= // + +// The following functions may be called via MallocExtension::instance() +// for memory verification and statistics. +class DebugMallocImplementation : public TCMallocImplementation { + public: + virtual bool GetNumericProperty(const char* name, size_t* value) { + bool result = TCMallocImplementation::GetNumericProperty(name, value); + if (result && (strcmp(name, "generic.current_allocated_bytes") == 0)) { + // Subtract bytes kept in the free queue + size_t qsize = MallocBlock::FreeQueueSize(); + if (*value >= qsize) { + *value -= qsize; + } + } + return result; + } + + virtual bool VerifyNewMemory(const void* p) { + if (p) MallocBlock::FromRawPointer(p)->Check(MallocBlock::kNewType); + return true; + } + + virtual bool VerifyArrayNewMemory(const void* p) { + if (p) MallocBlock::FromRawPointer(p)->Check(MallocBlock::kArrayNewType); + return true; + } + + virtual bool VerifyMallocMemory(const void* p) { + if (p) MallocBlock::FromRawPointer(p)->Check(MallocBlock::kMallocType); + return true; + } + + virtual bool VerifyAllMemory() { + return MallocBlock::CheckEverything(); + } + + virtual bool MallocMemoryStats(int* blocks, size_t* total, + int histogram[kMallocHistogramSize]) { + return MallocBlock::MemoryStats(blocks, total, histogram); + } + + virtual size_t GetEstimatedAllocatedSize(size_t size) { + return size; + } + + virtual size_t GetAllocatedSize(const void* p) { + if (p) { + RAW_CHECK(GetOwnership(p) != MallocExtension::kNotOwned, + "ptr not allocated by tcmalloc"); + return MallocBlock::FromRawPointer(p)->data_size(); + } + return 0; + } + + virtual MallocExtension::Ownership GetOwnership(const void* p) { + if (!p) { + // nobody owns NULL + return MallocExtension::kNotOwned; + } + + // FIXME: note that correct GetOwnership should not touch memory + // that is not owned by tcmalloc. Main implementation is using + // pagemap to discover if page in question is owned by us or + // not. But pagemap only has marks for first and last page of + // spans. Note that if p was returned out of our memalign with + // big alignment, then it will point outside of marked pages. Also + // note that FromRawPointer call below requires touching memory + // before pointer in order to handle memalign-ed chunks + // (offset_). This leaves us with two options: + // + // * do FromRawPointer first and have possibility of crashing if + // we're given not owned pointer + // + // * return incorrect ownership for those large memalign chunks + // + // I've decided to choose later, which appears to happen rarer and + // therefore is arguably a lesser evil + + MallocExtension::Ownership rv = TCMallocImplementation::GetOwnership(p); + if (rv != MallocExtension::kOwned) { + return rv; + } + + const MallocBlock* mb = MallocBlock::FromRawPointer(p); + return TCMallocImplementation::GetOwnership(mb); + } + + virtual void GetFreeListSizes(vector<MallocExtension::FreeListInfo>* v) { + static const char* kDebugFreeQueue = "debug.free_queue"; + + TCMallocImplementation::GetFreeListSizes(v); + + MallocExtension::FreeListInfo i; + i.type = kDebugFreeQueue; + i.min_object_size = 0; + i.max_object_size = numeric_limits<size_t>::max(); + i.total_bytes_free = MallocBlock::FreeQueueSize(); + v->push_back(i); + } + + }; + +static union { + char chars[sizeof(DebugMallocImplementation)]; + void *ptr; +} debug_malloc_implementation_space; + +REGISTER_MODULE_INITIALIZER(debugallocation, { +#if (__cplusplus >= 201103L) + COMPILE_ASSERT(alignof(debug_malloc_implementation_space) >= alignof(DebugMallocImplementation), + debug_malloc_implementation_space_is_not_properly_aligned); +#endif + // Either we or valgrind will control memory management. We + // register our extension if we're the winner. Otherwise let + // Valgrind use its own malloc (so don't register our extension). + if (!RunningOnValgrind()) { + DebugMallocImplementation *impl = new (debug_malloc_implementation_space.chars) DebugMallocImplementation(); + MallocExtension::Register(impl); + } +}); + +REGISTER_MODULE_DESTRUCTOR(debugallocation, { + if (!RunningOnValgrind()) { + // When the program exits, check all blocks still in the free + // queue for corruption. + DanglingWriteChecker(); + } +}); + +// ========================================================================= // + +struct debug_alloc_retry_data { + size_t size; + int new_type; +}; + +static void *retry_debug_allocate(void *arg) { + debug_alloc_retry_data *data = static_cast<debug_alloc_retry_data *>(arg); + return DebugAllocate(data->size, data->new_type); +} + +// This is mostly the same a cpp_alloc in tcmalloc.cc. +// TODO(csilvers): change Allocate() above to call cpp_alloc, so we +// don't have to reproduce the logic here. To make tc_new_mode work +// properly, I think we'll need to separate out the logic of throwing +// from the logic of calling the new-handler. +inline void* debug_cpp_alloc(size_t size, int new_type, bool nothrow) { + void* p = DebugAllocate(size, new_type); + if (p != NULL) { + return p; + } + struct debug_alloc_retry_data data; + data.size = size; + data.new_type = new_type; + return handle_oom(retry_debug_allocate, &data, + true, nothrow); +} + +inline void* do_debug_malloc_or_debug_cpp_alloc(size_t size) { + void* p = DebugAllocate(size, MallocBlock::kMallocType); + if (p != NULL) { + return p; + } + struct debug_alloc_retry_data data; + data.size = size; + data.new_type = MallocBlock::kMallocType; + return handle_oom(retry_debug_allocate, &data, + false, true); +} + +// Exported routines + +extern "C" PERFTOOLS_DLL_DECL void* tc_malloc(size_t size) PERFTOOLS_THROW { + if (ThreadCache::IsUseEmergencyMalloc()) { + return tcmalloc::EmergencyMalloc(size); + } + void* ptr = do_debug_malloc_or_debug_cpp_alloc(size); + MallocHook::InvokeNewHook(ptr, size); + return ptr; +} + +extern "C" PERFTOOLS_DLL_DECL void tc_free(void* ptr) PERFTOOLS_THROW { + if (tcmalloc::IsEmergencyPtr(ptr)) { + return tcmalloc::EmergencyFree(ptr); + } + MallocHook::InvokeDeleteHook(ptr); + DebugDeallocate(ptr, MallocBlock::kMallocType, 0); +} + +extern "C" PERFTOOLS_DLL_DECL void tc_free_sized(void *ptr, size_t size) PERFTOOLS_THROW { + MallocHook::InvokeDeleteHook(ptr); + DebugDeallocate(ptr, MallocBlock::kMallocType, size); +} + +extern "C" PERFTOOLS_DLL_DECL void* tc_calloc(size_t count, size_t size) PERFTOOLS_THROW { + if (ThreadCache::IsUseEmergencyMalloc()) { + return tcmalloc::EmergencyCalloc(count, size); + } + // Overflow check + const size_t total_size = count * size; + if (size != 0 && total_size / size != count) return NULL; + + void* block = do_debug_malloc_or_debug_cpp_alloc(total_size); + MallocHook::InvokeNewHook(block, total_size); + if (block) memset(block, 0, total_size); + return block; +} + +extern "C" PERFTOOLS_DLL_DECL void tc_cfree(void* ptr) PERFTOOLS_THROW { + if (tcmalloc::IsEmergencyPtr(ptr)) { + return tcmalloc::EmergencyFree(ptr); + } + MallocHook::InvokeDeleteHook(ptr); + DebugDeallocate(ptr, MallocBlock::kMallocType, 0); +} + +extern "C" PERFTOOLS_DLL_DECL void* tc_realloc(void* ptr, size_t size) PERFTOOLS_THROW { + if (tcmalloc::IsEmergencyPtr(ptr)) { + return tcmalloc::EmergencyRealloc(ptr, size); + } + if (ptr == NULL) { + ptr = do_debug_malloc_or_debug_cpp_alloc(size); + MallocHook::InvokeNewHook(ptr, size); + return ptr; + } + if (size == 0) { + MallocHook::InvokeDeleteHook(ptr); + DebugDeallocate(ptr, MallocBlock::kMallocType, 0); + return NULL; + } + MallocBlock* old = MallocBlock::FromRawPointer(ptr); + old->Check(MallocBlock::kMallocType); + MallocBlock* p = MallocBlock::Allocate(size, MallocBlock::kMallocType); + + // If realloc fails we are to leave the old block untouched and + // return null + if (p == NULL) return NULL; + + // if ptr was allocated via memalign, then old->data_size() is not + // start of user data. So we must be careful to copy only user-data + char *old_begin = (char *)old->data_addr(); + char *old_end = old_begin + old->data_size(); + + ssize_t old_ssize = old_end - (char *)ptr; + CHECK_CONDITION(old_ssize >= 0); + + size_t old_size = (size_t)old_ssize; + CHECK_CONDITION(old_size <= old->data_size()); + + memcpy(p->data_addr(), ptr, (old_size < size) ? old_size : size); + MallocHook::InvokeDeleteHook(ptr); + MallocHook::InvokeNewHook(p->data_addr(), size); + DebugDeallocate(ptr, MallocBlock::kMallocType, 0); + MALLOC_TRACE("realloc", p->data_size(), p->data_addr()); + return p->data_addr(); +} + +extern "C" PERFTOOLS_DLL_DECL void* tc_new(size_t size) { + void* ptr = debug_cpp_alloc(size, MallocBlock::kNewType, false); + MallocHook::InvokeNewHook(ptr, size); + if (ptr == NULL) { + RAW_LOG(FATAL, "Unable to allocate %" PRIuS " bytes: new failed.", size); + } + return ptr; +} + +extern "C" PERFTOOLS_DLL_DECL void* tc_new_nothrow(size_t size, const std::nothrow_t&) PERFTOOLS_THROW { + void* ptr = debug_cpp_alloc(size, MallocBlock::kNewType, true); + MallocHook::InvokeNewHook(ptr, size); + return ptr; +} + +extern "C" PERFTOOLS_DLL_DECL void tc_delete(void* p) PERFTOOLS_THROW { + MallocHook::InvokeDeleteHook(p); + DebugDeallocate(p, MallocBlock::kNewType, 0); +} + +extern "C" PERFTOOLS_DLL_DECL void tc_delete_sized(void* p, size_t size) throw() { + MallocHook::InvokeDeleteHook(p); + DebugDeallocate(p, MallocBlock::kNewType, size); +} + +// Some STL implementations explicitly invoke this. +// It is completely equivalent to a normal delete (delete never throws). +extern "C" PERFTOOLS_DLL_DECL void tc_delete_nothrow(void* p, const std::nothrow_t&) PERFTOOLS_THROW { + MallocHook::InvokeDeleteHook(p); + DebugDeallocate(p, MallocBlock::kNewType, 0); +} + +extern "C" PERFTOOLS_DLL_DECL void* tc_newarray(size_t size) { + void* ptr = debug_cpp_alloc(size, MallocBlock::kArrayNewType, false); + MallocHook::InvokeNewHook(ptr, size); + if (ptr == NULL) { + RAW_LOG(FATAL, "Unable to allocate %" PRIuS " bytes: new[] failed.", size); + } + return ptr; +} + +extern "C" PERFTOOLS_DLL_DECL void* tc_newarray_nothrow(size_t size, const std::nothrow_t&) + PERFTOOLS_THROW { + void* ptr = debug_cpp_alloc(size, MallocBlock::kArrayNewType, true); + MallocHook::InvokeNewHook(ptr, size); + return ptr; +} + +extern "C" PERFTOOLS_DLL_DECL void tc_deletearray(void* p) PERFTOOLS_THROW { + MallocHook::InvokeDeleteHook(p); + DebugDeallocate(p, MallocBlock::kArrayNewType, 0); +} + +extern "C" PERFTOOLS_DLL_DECL void tc_deletearray_sized(void* p, size_t size) throw() { + MallocHook::InvokeDeleteHook(p); + DebugDeallocate(p, MallocBlock::kArrayNewType, size); +} + +// Some STL implementations explicitly invoke this. +// It is completely equivalent to a normal delete (delete never throws). +extern "C" PERFTOOLS_DLL_DECL void tc_deletearray_nothrow(void* p, const std::nothrow_t&) PERFTOOLS_THROW { + MallocHook::InvokeDeleteHook(p); + DebugDeallocate(p, MallocBlock::kArrayNewType, 0); +} + +// This is mostly the same as do_memalign in tcmalloc.cc. +static void *do_debug_memalign(size_t alignment, size_t size) { + // Allocate >= size bytes aligned on "alignment" boundary + // "alignment" is a power of two. + void *p = 0; + RAW_CHECK((alignment & (alignment-1)) == 0, "must be power of two"); + const size_t data_offset = MallocBlock::data_offset(); + // Allocate "alignment-1" extra bytes to ensure alignment is possible, and + // a further data_offset bytes for an additional fake header. + size_t extra_bytes = data_offset + alignment - 1; + if (size + extra_bytes < size) return NULL; // Overflow + p = DebugAllocate(size + extra_bytes, MallocBlock::kMallocType); + if (p != 0) { + intptr_t orig_p = reinterpret_cast<intptr_t>(p); + // Leave data_offset bytes for fake header, and round up to meet + // alignment. + p = reinterpret_cast<void *>(RoundUp(orig_p + data_offset, alignment)); + // Create a fake header block with an offset_ that points back to the + // real header. FromRawPointer uses this value. + MallocBlock *fake_hdr = reinterpret_cast<MallocBlock *>( + reinterpret_cast<char *>(p) - data_offset); + // offset_ is distance between real and fake headers. + // p is now end of fake header (beginning of client area), + // and orig_p is the end of the real header, so offset_ + // is their difference. + // + // Note that other fields of fake_hdr are initialized with + // kMagicUninitializedByte + fake_hdr->set_offset(reinterpret_cast<intptr_t>(p) - orig_p); + } + return p; +} + +struct memalign_retry_data { + size_t align; + size_t size; +}; + +static void *retry_debug_memalign(void *arg) { + memalign_retry_data *data = static_cast<memalign_retry_data *>(arg); + return do_debug_memalign(data->align, data->size); +} + +inline void* do_debug_memalign_or_debug_cpp_memalign(size_t align, + size_t size) { + void* p = do_debug_memalign(align, size); + if (p != NULL) { + return p; + } + + struct memalign_retry_data data; + data.align = align; + data.size = size; + return handle_oom(retry_debug_memalign, &data, + false, true); +} + +extern "C" PERFTOOLS_DLL_DECL void* tc_memalign(size_t align, size_t size) PERFTOOLS_THROW { + void *p = do_debug_memalign_or_debug_cpp_memalign(align, size); + MallocHook::InvokeNewHook(p, size); + return p; +} + +// Implementation taken from tcmalloc/tcmalloc.cc +extern "C" PERFTOOLS_DLL_DECL int tc_posix_memalign(void** result_ptr, size_t align, size_t size) + PERFTOOLS_THROW { + if (((align % sizeof(void*)) != 0) || + ((align & (align - 1)) != 0) || + (align == 0)) { + return EINVAL; + } + + void* result = do_debug_memalign_or_debug_cpp_memalign(align, size); + MallocHook::InvokeNewHook(result, size); + if (result == NULL) { + return ENOMEM; + } else { + *result_ptr = result; + return 0; + } +} + +extern "C" PERFTOOLS_DLL_DECL void* tc_valloc(size_t size) PERFTOOLS_THROW { + // Allocate >= size bytes starting on a page boundary + void *p = do_debug_memalign_or_debug_cpp_memalign(getpagesize(), size); + MallocHook::InvokeNewHook(p, size); + return p; +} + +extern "C" PERFTOOLS_DLL_DECL void* tc_pvalloc(size_t size) PERFTOOLS_THROW { + // Round size up to a multiple of pages + // then allocate memory on a page boundary + int pagesize = getpagesize(); + size = RoundUp(size, pagesize); + if (size == 0) { // pvalloc(0) should allocate one page, according to + size = pagesize; // http://man.free4web.biz/man3/libmpatrol.3.html + } + void *p = do_debug_memalign_or_debug_cpp_memalign(pagesize, size); + MallocHook::InvokeNewHook(p, size); + return p; +} + +// malloc_stats just falls through to the base implementation. +extern "C" PERFTOOLS_DLL_DECL void tc_malloc_stats(void) PERFTOOLS_THROW { + do_malloc_stats(); +} + +extern "C" PERFTOOLS_DLL_DECL int tc_mallopt(int cmd, int value) PERFTOOLS_THROW { + return do_mallopt(cmd, value); +} + +#ifdef HAVE_STRUCT_MALLINFO +extern "C" PERFTOOLS_DLL_DECL struct mallinfo tc_mallinfo(void) PERFTOOLS_THROW { + return do_mallinfo(); +} +#endif + +extern "C" PERFTOOLS_DLL_DECL size_t tc_malloc_size(void* ptr) PERFTOOLS_THROW { + return MallocExtension::instance()->GetAllocatedSize(ptr); +} + +extern "C" PERFTOOLS_DLL_DECL void* tc_malloc_skip_new_handler(size_t size) PERFTOOLS_THROW { + void* result = DebugAllocate(size, MallocBlock::kMallocType); + MallocHook::InvokeNewHook(result, size); + return result; +} diff --git a/src/third_party/gperftools-2.5/src/emergency_malloc.cc b/src/third_party/gperftools-2.5/src/emergency_malloc.cc new file mode 100644 index 00000000000..81c55541ad2 --- /dev/null +++ b/src/third_party/gperftools-2.5/src/emergency_malloc.cc @@ -0,0 +1,169 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2014, gperftools Contributors +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// + +#include "config.h" + +#include "emergency_malloc.h" + +#include <errno.h> // for ENOMEM, errno +#include <string.h> // for memset + +#include "base/basictypes.h" +#include "base/logging.h" +#include "base/low_level_alloc.h" +#include "base/spinlock.h" +#include "internal_logging.h" + + +namespace tcmalloc { + __attribute__ ((visibility("internal"))) char *emergency_arena_start; + __attribute__ ((visibility("internal"))) uintptr_t emergency_arena_start_shifted; + + static CACHELINE_ALIGNED SpinLock emergency_malloc_lock(base::LINKER_INITIALIZED); + static char *emergency_arena_end; + static LowLevelAlloc::Arena *emergency_arena; + + class EmergencyArenaPagesAllocator : public LowLevelAlloc::PagesAllocator { + ~EmergencyArenaPagesAllocator() {} + void *MapPages(int32 flags, size_t size) { + char *new_end = emergency_arena_end + size; + if (new_end > emergency_arena_start + kEmergencyArenaSize) { + RAW_LOG(FATAL, "Unable to allocate %" PRIuS " bytes in emergency zone.", size); + } + char *rv = emergency_arena_end; + emergency_arena_end = new_end; + return static_cast<void *>(rv); + } + void UnMapPages(int32 flags, void *addr, size_t size) { + RAW_LOG(FATAL, "UnMapPages is not implemented for emergency arena"); + } + }; + + static union { + char bytes[sizeof(EmergencyArenaPagesAllocator)]; + void *ptr; + } pages_allocator_place; + + static void InitEmergencyMalloc(void) { + const int32 flags = LowLevelAlloc::kAsyncSignalSafe; + + void *arena = LowLevelAlloc::GetDefaultPagesAllocator()->MapPages(flags, kEmergencyArenaSize * 2); + + uintptr_t arena_ptr = reinterpret_cast<uintptr_t>(arena); + uintptr_t ptr = (arena_ptr + kEmergencyArenaSize - 1) & ~(kEmergencyArenaSize-1); + + emergency_arena_end = emergency_arena_start = reinterpret_cast<char *>(ptr); + EmergencyArenaPagesAllocator *allocator = new (pages_allocator_place.bytes) EmergencyArenaPagesAllocator(); + emergency_arena = LowLevelAlloc::NewArenaWithCustomAlloc(0, LowLevelAlloc::DefaultArena(), allocator); + + emergency_arena_start_shifted = reinterpret_cast<uintptr_t>(emergency_arena_start) >> kEmergencyArenaShift; + + uintptr_t head_unmap_size = ptr - arena_ptr; + CHECK_CONDITION(head_unmap_size < kEmergencyArenaSize); + if (head_unmap_size != 0) { + LowLevelAlloc::GetDefaultPagesAllocator()->UnMapPages(flags, arena, ptr - arena_ptr); + } + + uintptr_t tail_unmap_size = kEmergencyArenaSize - head_unmap_size; + void *tail_start = reinterpret_cast<void *>(arena_ptr + head_unmap_size + kEmergencyArenaSize); + LowLevelAlloc::GetDefaultPagesAllocator()->UnMapPages(flags, tail_start, tail_unmap_size); + } + + PERFTOOLS_DLL_DECL void *EmergencyMalloc(size_t size) { + SpinLockHolder l(&emergency_malloc_lock); + + if (emergency_arena_start == NULL) { + InitEmergencyMalloc(); + CHECK_CONDITION(emergency_arena_start != NULL); + } + + void *rv = LowLevelAlloc::AllocWithArena(size, emergency_arena); + if (rv == NULL) { + errno = ENOMEM; + } + return rv; + } + + PERFTOOLS_DLL_DECL void EmergencyFree(void *p) { + SpinLockHolder l(&emergency_malloc_lock); + if (emergency_arena_start == NULL) { + InitEmergencyMalloc(); + CHECK_CONDITION(emergency_arena_start != NULL); + free(p); + return; + } + CHECK_CONDITION(emergency_arena_start); + LowLevelAlloc::Free(p); + } + + PERFTOOLS_DLL_DECL void *EmergencyRealloc(void *_old_ptr, size_t new_size) { + if (_old_ptr == NULL) { + return EmergencyMalloc(new_size); + } + if (new_size == 0) { + EmergencyFree(_old_ptr); + return NULL; + } + SpinLockHolder l(&emergency_malloc_lock); + CHECK_CONDITION(emergency_arena_start); + + char *old_ptr = static_cast<char *>(_old_ptr); + CHECK_CONDITION(old_ptr <= emergency_arena_end); + CHECK_CONDITION(emergency_arena_start <= old_ptr); + + // NOTE: we don't know previous size of old_ptr chunk. So instead + // of trying to figure out right size of copied memory, we just + // copy largest possible size. We don't care about being slow. + size_t old_ptr_size = emergency_arena_end - old_ptr; + size_t copy_size = (new_size < old_ptr_size) ? new_size : old_ptr_size; + + void *new_ptr = LowLevelAlloc::AllocWithArena(new_size, emergency_arena); + if (new_ptr == NULL) { + errno = ENOMEM; + return NULL; + } + memcpy(new_ptr, old_ptr, copy_size); + + LowLevelAlloc::Free(old_ptr); + return new_ptr; + } + + PERFTOOLS_DLL_DECL void *EmergencyCalloc(size_t n, size_t elem_size) { + // Overflow check + const size_t size = n * elem_size; + if (elem_size != 0 && size / elem_size != n) return NULL; + void *rv = EmergencyMalloc(size); + if (rv != NULL) { + memset(rv, 0, size); + } + return rv; + } +}; diff --git a/src/third_party/gperftools-2.5/src/emergency_malloc.h b/src/third_party/gperftools-2.5/src/emergency_malloc.h new file mode 100644 index 00000000000..8ec53d231c5 --- /dev/null +++ b/src/third_party/gperftools-2.5/src/emergency_malloc.h @@ -0,0 +1,60 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2014, gperftools Contributors +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef EMERGENCY_MALLOC_H +#define EMERGENCY_MALLOC_H +#include "config.h" + +#include <stddef.h> + +#include "base/basictypes.h" +#include "common.h" + +namespace tcmalloc { + static const uintptr_t kEmergencyArenaShift = 20+4; // 16 megs + static const uintptr_t kEmergencyArenaSize = 1 << kEmergencyArenaShift; + + extern __attribute__ ((visibility("internal"))) char *emergency_arena_start; + extern __attribute__ ((visibility("internal"))) uintptr_t emergency_arena_start_shifted;; + + PERFTOOLS_DLL_DECL void *EmergencyMalloc(size_t size); + PERFTOOLS_DLL_DECL void EmergencyFree(void *p); + PERFTOOLS_DLL_DECL void *EmergencyCalloc(size_t n, size_t elem_size); + PERFTOOLS_DLL_DECL void *EmergencyRealloc(void *old_ptr, size_t new_size); + + static inline bool IsEmergencyPtr(const void *_ptr) { + uintptr_t ptr = reinterpret_cast<uintptr_t>(_ptr); + return UNLIKELY((ptr >> kEmergencyArenaShift) == emergency_arena_start_shifted) + && emergency_arena_start_shifted; + } + +} // namespace tcmalloc + +#endif diff --git a/src/third_party/gperftools-2.5/src/emergency_malloc_for_stacktrace.cc b/src/third_party/gperftools-2.5/src/emergency_malloc_for_stacktrace.cc new file mode 100644 index 00000000000..f1dc35e76a4 --- /dev/null +++ b/src/third_party/gperftools-2.5/src/emergency_malloc_for_stacktrace.cc @@ -0,0 +1,48 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2014, gperftools Contributors +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +#include "emergency_malloc.h" +#include "thread_cache.h" + +namespace tcmalloc { + bool EnterStacktraceScope(void); + void LeaveStacktraceScope(void); +} + +bool tcmalloc::EnterStacktraceScope(void) { + if (ThreadCache::IsUseEmergencyMalloc()) { + return false; + } + ThreadCache::SetUseEmergencyMalloc(); + return true; +} + +void tcmalloc::LeaveStacktraceScope(void) { + ThreadCache::ResetUseEmergencyMalloc(); +} diff --git a/src/third_party/gperftools-2.5/src/fake_stacktrace_scope.cc b/src/third_party/gperftools-2.5/src/fake_stacktrace_scope.cc new file mode 100644 index 00000000000..ee35a041252 --- /dev/null +++ b/src/third_party/gperftools-2.5/src/fake_stacktrace_scope.cc @@ -0,0 +1,39 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2014, gperftools Contributors +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "base/basictypes.h" + +namespace tcmalloc { + ATTRIBUTE_WEAK bool EnterStacktraceScope(void) { + return true; + } + ATTRIBUTE_WEAK void LeaveStacktraceScope(void) { + } +} diff --git a/src/third_party/gperftools-2.5/src/getenv_safe.h b/src/third_party/gperftools-2.5/src/getenv_safe.h new file mode 100644 index 00000000000..3b9f4dbbcb2 --- /dev/null +++ b/src/third_party/gperftools-2.5/src/getenv_safe.h @@ -0,0 +1,63 @@ +/* -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- + * Copyright (c) 2014, gperftools Contributors + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef GETENV_SAFE_H +#define GETENV_SAFE_H + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * This getenv function is safe to call before the C runtime is initialized. + * On Windows, it utilizes GetEnvironmentVariable() and on unix it uses + * /proc/self/environ instead calling getenv(). It's intended to be used in + * routines that run before main(), when the state required for getenv() may + * not be set up yet. In particular, errno isn't set up until relatively late + * (after the pthreads library has a chance to make it threadsafe), and + * getenv() doesn't work until then. + * On some platforms, this call will utilize the same, static buffer for + * repeated GetenvBeforeMain() calls. Callers should not expect pointers from + * this routine to be long lived. + * Note that on unix, /proc only has the environment at the time the + * application was started, so this routine ignores setenv() calls/etc. Also + * note it only reads the first 16K of the environment. + * + * NOTE: this is version of GetenvBeforeMain that's usable from + * C. Implementation is in sysinfo.cc + */ +const char* TCMallocGetenvSafe(const char* name); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/third_party/gperftools-2.5/src/getpc.h b/src/third_party/gperftools-2.5/src/getpc.h new file mode 100644 index 00000000000..25fee3931c7 --- /dev/null +++ b/src/third_party/gperftools-2.5/src/getpc.h @@ -0,0 +1,187 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Craig Silverstein +// +// This is an internal header file used by profiler.cc. It defines +// the single (inline) function GetPC. GetPC is used in a signal +// handler to figure out the instruction that was being executed when +// the signal-handler was triggered. +// +// To get this, we use the ucontext_t argument to the signal-handler +// callback, which holds the full context of what was going on when +// the signal triggered. How to get from a ucontext_t to a Program +// Counter is OS-dependent. + +#ifndef BASE_GETPC_H_ +#define BASE_GETPC_H_ + +#include "config.h" + +// On many linux systems, we may need _GNU_SOURCE to get access to +// the defined constants that define the register we want to see (eg +// REG_EIP). Note this #define must come first! +#define _GNU_SOURCE 1 +// If #define _GNU_SOURCE causes problems, this might work instead. +// It will cause problems for FreeBSD though!, because it turns off +// the needed __BSD_VISIBLE. +//#define _XOPEN_SOURCE 500 + +#include <string.h> // for memcmp +#if defined(HAVE_SYS_UCONTEXT_H) +#include <sys/ucontext.h> +#elif defined(HAVE_UCONTEXT_H) +#include <ucontext.h> // for ucontext_t (and also mcontext_t) +#elif defined(HAVE_CYGWIN_SIGNAL_H) +#include <cygwin/signal.h> +typedef ucontext ucontext_t; +#endif + + +// Take the example where function Foo() calls function Bar(). For +// many architectures, Bar() is responsible for setting up and tearing +// down its own stack frame. In that case, it's possible for the +// interrupt to happen when execution is in Bar(), but the stack frame +// is not properly set up (either before it's done being set up, or +// after it's been torn down but before Bar() returns). In those +// cases, the stack trace cannot see the caller function anymore. +// +// GetPC can try to identify this situation, on architectures where it +// might occur, and unwind the current function call in that case to +// avoid false edges in the profile graph (that is, edges that appear +// to show a call skipping over a function). To do this, we hard-code +// in the asm instructions we might see when setting up or tearing +// down a stack frame. +// +// This is difficult to get right: the instructions depend on the +// processor, the compiler ABI, and even the optimization level. This +// is a best effort patch -- if we fail to detect such a situation, or +// mess up the PC, nothing happens; the returned PC is not used for +// any further processing. +struct CallUnrollInfo { + // Offset from (e)ip register where this instruction sequence + // should be matched. Interpreted as bytes. Offset 0 is the next + // instruction to execute. Be extra careful with negative offsets in + // architectures of variable instruction length (like x86) - it is + // not that easy as taking an offset to step one instruction back! + int pc_offset; + // The actual instruction bytes. Feel free to make it larger if you + // need a longer sequence. + unsigned char ins[16]; + // How many bytes to match from ins array? + int ins_size; + // The offset from the stack pointer (e)sp where to look for the + // call return address. Interpreted as bytes. + int return_sp_offset; +}; + + +// The dereferences needed to get the PC from a struct ucontext were +// determined at configure time, and stored in the macro +// PC_FROM_UCONTEXT in config.h. The only thing we need to do here, +// then, is to do the magic call-unrolling for systems that support it. + +// -- Special case 1: linux x86, for which we have CallUnrollInfo +#if defined(__linux) && defined(__i386) && defined(__GNUC__) +static const CallUnrollInfo callunrollinfo[] = { + // Entry to a function: push %ebp; mov %esp,%ebp + // Top-of-stack contains the caller IP. + { 0, + {0x55, 0x89, 0xe5}, 3, + 0 + }, + // Entry to a function, second instruction: push %ebp; mov %esp,%ebp + // Top-of-stack contains the old frame, caller IP is +4. + { -1, + {0x55, 0x89, 0xe5}, 3, + 4 + }, + // Return from a function: RET. + // Top-of-stack contains the caller IP. + { 0, + {0xc3}, 1, + 0 + } +}; + +inline void* GetPC(const ucontext_t& signal_ucontext) { + // See comment above struct CallUnrollInfo. Only try instruction + // flow matching if both eip and esp looks reasonable. + const int eip = signal_ucontext.uc_mcontext.gregs[REG_EIP]; + const int esp = signal_ucontext.uc_mcontext.gregs[REG_ESP]; + if ((eip & 0xffff0000) != 0 && (~eip & 0xffff0000) != 0 && + (esp & 0xffff0000) != 0) { + char* eip_char = reinterpret_cast<char*>(eip); + for (int i = 0; i < sizeof(callunrollinfo)/sizeof(*callunrollinfo); ++i) { + if (!memcmp(eip_char + callunrollinfo[i].pc_offset, + callunrollinfo[i].ins, callunrollinfo[i].ins_size)) { + // We have a match. + void **retaddr = (void**)(esp + callunrollinfo[i].return_sp_offset); + return *retaddr; + } + } + } + return (void*)eip; +} + +// Special case #2: Windows, which has to do something totally different. +#elif defined(_WIN32) || defined(__CYGWIN__) || defined(__CYGWIN32__) || defined(__MINGW32__) +// If this is ever implemented, probably the way to do it is to have +// profiler.cc use a high-precision timer via timeSetEvent: +// http://msdn2.microsoft.com/en-us/library/ms712713.aspx +// We'd use it in mode TIME_CALLBACK_FUNCTION/TIME_PERIODIC. +// The callback function would be something like prof_handler, but +// alas the arguments are different: no ucontext_t! I don't know +// how we'd get the PC (using StackWalk64?) +// http://msdn2.microsoft.com/en-us/library/ms680650.aspx + +#include "base/logging.h" // for RAW_LOG +#ifndef HAVE_CYGWIN_SIGNAL_H +typedef int ucontext_t; +#endif + +inline void* GetPC(const struct ucontext_t& signal_ucontext) { + RAW_LOG(ERROR, "GetPC is not yet implemented on Windows\n"); + return NULL; +} + +// Normal cases. If this doesn't compile, it's probably because +// PC_FROM_UCONTEXT is the empty string. You need to figure out +// the right value for your system, and add it to the list in +// configure.ac (or set it manually in your config.h). +#else +inline void* GetPC(const ucontext_t& signal_ucontext) { + return (void*)signal_ucontext.PC_FROM_UCONTEXT; // defined in config.h +} + +#endif + +#endif // BASE_GETPC_H_ diff --git a/src/third_party/gperftools-2.5/src/google/heap-checker.h b/src/third_party/gperftools-2.5/src/google/heap-checker.h new file mode 100644 index 00000000000..7cacf1f0094 --- /dev/null +++ b/src/third_party/gperftools-2.5/src/google/heap-checker.h @@ -0,0 +1,36 @@ +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +/* The code has moved to gperftools/. Use that include-directory for + * new code. + */ +#ifdef __GNUC__ +#warning "google/heap-checker.h is deprecated. Use gperftools/heap-checker.h instead" +#endif +#include <gperftools/heap-checker.h> diff --git a/src/third_party/gperftools-2.5/src/google/heap-profiler.h b/src/third_party/gperftools-2.5/src/google/heap-profiler.h new file mode 100644 index 00000000000..3fc26cf7378 --- /dev/null +++ b/src/third_party/gperftools-2.5/src/google/heap-profiler.h @@ -0,0 +1,37 @@ +/* Copyright (c) 2005, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* The code has moved to gperftools/. Use that include-directory for + * new code. + */ +#ifdef __GNUC__ +#warning "google/heap-profiler.h is deprecated. Use gperftools/heap-profiler.h instead" +#endif +#include <gperftools/heap-profiler.h> diff --git a/src/third_party/gperftools-2.5/src/google/malloc_extension.h b/src/third_party/gperftools-2.5/src/google/malloc_extension.h new file mode 100644 index 00000000000..7cacc348158 --- /dev/null +++ b/src/third_party/gperftools-2.5/src/google/malloc_extension.h @@ -0,0 +1,36 @@ +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +/* The code has moved to gperftools/. Use that include-directory for + * new code. + */ +#ifdef __GNUC__ +#warning "google/malloc_extension.h is deprecated. Use gperftools/malloc_extension.h instead" +#endif +#include <gperftools/malloc_extension.h> diff --git a/src/third_party/gperftools-2.5/src/google/malloc_extension_c.h b/src/third_party/gperftools-2.5/src/google/malloc_extension_c.h new file mode 100644 index 00000000000..f34a835d823 --- /dev/null +++ b/src/third_party/gperftools-2.5/src/google/malloc_extension_c.h @@ -0,0 +1,37 @@ +/* Copyright (c) 2008, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* The code has moved to gperftools/. Use that include-directory for + * new code. + */ +#ifdef __GNUC__ +#warning "google/malloc_extension_c.h is deprecated. Use gperftools/malloc_extension_c.h instead" +#endif +#include <gperftools/malloc_extension_c.h> diff --git a/src/third_party/gperftools-2.5/src/google/malloc_hook.h b/src/third_party/gperftools-2.5/src/google/malloc_hook.h new file mode 100644 index 00000000000..371aba476f1 --- /dev/null +++ b/src/third_party/gperftools-2.5/src/google/malloc_hook.h @@ -0,0 +1,36 @@ +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +/* The code has moved to gperftools/. Use that include-directory for + * new code. + */ +#ifdef __GNUC__ +#warning "google/malloc_hook.h is deprecated. Use gperftools/malloc_hook.h instead" +#endif +#include <gperftools/malloc_hook.h> diff --git a/src/third_party/gperftools-2.5/src/google/malloc_hook_c.h b/src/third_party/gperftools-2.5/src/google/malloc_hook_c.h new file mode 100644 index 00000000000..f882c16d64d --- /dev/null +++ b/src/third_party/gperftools-2.5/src/google/malloc_hook_c.h @@ -0,0 +1,37 @@ +/* Copyright (c) 2008, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* The code has moved to gperftools/. Use that include-directory for + * new code. + */ +#ifdef __GNUC__ +#warning "google/malloc_hook_c.h is deprecated. Use gperftools/malloc_hook_c.h instead" +#endif +#include <gperftools/malloc_hook_c.h> diff --git a/src/third_party/gperftools-2.5/src/google/profiler.h b/src/third_party/gperftools-2.5/src/google/profiler.h new file mode 100644 index 00000000000..3674c9e379d --- /dev/null +++ b/src/third_party/gperftools-2.5/src/google/profiler.h @@ -0,0 +1,37 @@ +/* Copyright (c) 2005, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* The code has moved to gperftools/. Use that include-directory for + * new code. + */ +#ifdef __GNUC__ +#warning "google/profiler.h is deprecated. Use gperftools/profiler.h instead" +#endif +#include <gperftools/profiler.h> diff --git a/src/third_party/gperftools-2.5/src/google/stacktrace.h b/src/third_party/gperftools-2.5/src/google/stacktrace.h new file mode 100644 index 00000000000..53d29472a1d --- /dev/null +++ b/src/third_party/gperftools-2.5/src/google/stacktrace.h @@ -0,0 +1,36 @@ +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +/* The code has moved to gperftools/. Use that include-directory for + * new code. + */ +#ifdef __GNUC__ +#warning "google/stacktrace.h is deprecated. Use gperftools/stacktrace.h instead" +#endif +#include <gperftools/stacktrace.h> diff --git a/src/third_party/gperftools-2.5/src/google/tcmalloc.h b/src/third_party/gperftools-2.5/src/google/tcmalloc.h new file mode 100644 index 00000000000..a2db70e2f37 --- /dev/null +++ b/src/third_party/gperftools-2.5/src/google/tcmalloc.h @@ -0,0 +1,37 @@ +/* Copyright (c) 2003, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* The code has moved to gperftools/. Use that include-directory for + * new code. + */ +#ifdef __GNUC__ +#warning "google/tcmalloc.h is deprecated. Use gperftools/tcmalloc.h instead" +#endif +#include <gperftools/tcmalloc.h> diff --git a/src/third_party/gperftools-2.5/src/gperftools/heap-checker.h b/src/third_party/gperftools-2.5/src/gperftools/heap-checker.h new file mode 100644 index 00000000000..5a87d8da7f7 --- /dev/null +++ b/src/third_party/gperftools-2.5/src/gperftools/heap-checker.h @@ -0,0 +1,422 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Maxim Lifantsev (with design ideas by Sanjay Ghemawat) +// +// +// Module for detecing heap (memory) leaks. +// +// For full(er) information, see doc/heap_checker.html +// +// This module can be linked into programs with +// no slowdown caused by this unless you activate the leak-checker: +// +// 1. Set the environment variable HEAPCHEK to _type_ before +// running the program. +// +// _type_ is usually "normal" but can also be "minimal", "strict", or +// "draconian". (See the html file for other options, like 'local'.) +// +// After that, just run your binary. If the heap-checker detects +// a memory leak at program-exit, it will print instructions on how +// to track down the leak. + +#ifndef BASE_HEAP_CHECKER_H_ +#define BASE_HEAP_CHECKER_H_ + +#include <sys/types.h> // for size_t +// I can't #include config.h in this public API file, but I should +// really use configure (and make malloc_extension.h a .in file) to +// figure out if the system has stdint.h or not. But I'm lazy, so +// for now I'm assuming it's a problem only with MSVC. +#ifndef _MSC_VER +#include <stdint.h> // for uintptr_t +#endif +#include <stdarg.h> // for va_list +#include <vector> + +// Annoying stuff for windows -- makes sure clients can import these functions +#ifndef PERFTOOLS_DLL_DECL +# ifdef _WIN32 +# define PERFTOOLS_DLL_DECL __declspec(dllimport) +# else +# define PERFTOOLS_DLL_DECL +# endif +#endif + + +// The class is thread-safe with respect to all the provided static methods, +// as well as HeapLeakChecker objects: they can be accessed by multiple threads. +class PERFTOOLS_DLL_DECL HeapLeakChecker { + public: + + // ----------------------------------------------------------------------- // + // Static functions for working with (whole-program) leak checking. + + // If heap leak checking is currently active in some mode + // e.g. if leak checking was started (and is still active now) + // due to HEAPCHECK=... defined in the environment. + // The return value reflects iff HeapLeakChecker objects manually + // constructed right now will be doing leak checking or nothing. + // Note that we can go from active to inactive state during InitGoogle() + // if FLAGS_heap_check gets set to "" by some code before/during InitGoogle(). + static bool IsActive(); + + // Return pointer to the whole-program checker if it has been created + // and NULL otherwise. + // Once GlobalChecker() returns non-NULL that object will not disappear and + // will be returned by all later GlobalChecker calls. + // This is mainly to access BytesLeaked() and ObjectsLeaked() (see below) + // for the whole-program checker after one calls NoGlobalLeaks() + // or similar and gets false. + static HeapLeakChecker* GlobalChecker(); + + // Do whole-program leak check now (if it was activated for this binary); + // return false only if it was activated and has failed. + // The mode of the check is controlled by the command-line flags. + // This method can be called repeatedly. + // Things like GlobalChecker()->SameHeap() can also be called explicitly + // to do the desired flavor of the check. + static bool NoGlobalLeaks(); + + // If whole-program checker if active, + // cancel its automatic execution after main() exits. + // This requires that some leak check (e.g. NoGlobalLeaks()) + // has been called at least once on the whole-program checker. + static void CancelGlobalCheck(); + + // ----------------------------------------------------------------------- // + // Non-static functions for starting and doing leak checking. + + // Start checking and name the leak check performed. + // The name is used in naming dumped profiles + // and needs to be unique only within your binary. + // It must also be a string that can be a part of a file name, + // in particular not contain path expressions. + explicit HeapLeakChecker(const char *name); + + // Destructor (verifies that some *NoLeaks or *SameHeap method + // has been called at least once). + ~HeapLeakChecker(); + + // These used to be different but are all the same now: they return + // true iff all memory allocated since this HeapLeakChecker object + // was constructor is still reachable from global state. + // + // Because we fork to convert addresses to symbol-names, and forking + // is not thread-safe, and we may be called in a threaded context, + // we do not try to symbolize addresses when called manually. + bool NoLeaks() { return DoNoLeaks(DO_NOT_SYMBOLIZE); } + + // These forms are obsolete; use NoLeaks() instead. + // TODO(csilvers): mark as DEPRECATED. + bool QuickNoLeaks() { return NoLeaks(); } + bool BriefNoLeaks() { return NoLeaks(); } + bool SameHeap() { return NoLeaks(); } + bool QuickSameHeap() { return NoLeaks(); } + bool BriefSameHeap() { return NoLeaks(); } + + // Detailed information about the number of leaked bytes and objects + // (both of these can be negative as well). + // These are available only after a *SameHeap or *NoLeaks + // method has been called. + // Note that it's possible for both of these to be zero + // while SameHeap() or NoLeaks() returned false in case + // of a heap state change that is significant + // but preserves the byte and object counts. + ssize_t BytesLeaked() const; + ssize_t ObjectsLeaked() const; + + // ----------------------------------------------------------------------- // + // Static helpers to make us ignore certain leaks. + + // Scoped helper class. Should be allocated on the stack inside a + // block of code. Any heap allocations done in the code block + // covered by the scoped object (including in nested function calls + // done by the code block) will not be reported as leaks. This is + // the recommended replacement for the GetDisableChecksStart() and + // DisableChecksToHereFrom() routines below. + // + // Example: + // void Foo() { + // HeapLeakChecker::Disabler disabler; + // ... code that allocates objects whose leaks should be ignored ... + // } + // + // REQUIRES: Destructor runs in same thread as constructor + class Disabler { + public: + Disabler(); + ~Disabler(); + private: + Disabler(const Disabler&); // disallow copy + void operator=(const Disabler&); // and assign + }; + + // Ignore an object located at 'ptr' (can go at the start or into the object) + // as well as all heap objects (transitively) referenced from it for the + // purposes of heap leak checking. Returns 'ptr' so that one can write + // static T* obj = IgnoreObject(new T(...)); + // + // If 'ptr' does not point to an active allocated object at the time of this + // call, it is ignored; but if it does, the object must not get deleted from + // the heap later on. + // + // See also HiddenPointer, below, if you need to prevent a pointer from + // being traversed by the heap checker but do not wish to transitively + // whitelist objects referenced through it. + template <typename T> + static T* IgnoreObject(T* ptr) { + DoIgnoreObject(static_cast<const void*>(const_cast<const T*>(ptr))); + return ptr; + } + + // Undo what an earlier IgnoreObject() call promised and asked to do. + // At the time of this call 'ptr' must point at or inside of an active + // allocated object which was previously registered with IgnoreObject(). + static void UnIgnoreObject(const void* ptr); + + // ----------------------------------------------------------------------- // + // Internal types defined in .cc + + class Allocator; + struct RangeValue; + + private: + + // ----------------------------------------------------------------------- // + // Various helpers + + // Create the name of the heap profile file. + // Should be deleted via Allocator::Free(). + char* MakeProfileNameLocked(); + + // Helper for constructors + void Create(const char *name, bool make_start_snapshot); + + enum ShouldSymbolize { SYMBOLIZE, DO_NOT_SYMBOLIZE }; + + // Helper for *NoLeaks and *SameHeap + bool DoNoLeaks(ShouldSymbolize should_symbolize); + + // Helper for NoGlobalLeaks, also called by the global destructor. + static bool NoGlobalLeaksMaybeSymbolize(ShouldSymbolize should_symbolize); + + // These used to be public, but they are now deprecated. + // Will remove entirely when all internal uses are fixed. + // In the meantime, use friendship so the unittest can still test them. + static void* GetDisableChecksStart(); + static void DisableChecksToHereFrom(const void* start_address); + static void DisableChecksIn(const char* pattern); + friend void RangeDisabledLeaks(); + friend void NamedTwoDisabledLeaks(); + friend void* RunNamedDisabledLeaks(void*); + friend void TestHeapLeakCheckerNamedDisabling(); + + // Actually implements IgnoreObject(). + static void DoIgnoreObject(const void* ptr); + + // Disable checks based on stack trace entry at a depth <= + // max_depth. Used to hide allocations done inside some special + // libraries. + static void DisableChecksFromToLocked(const void* start_address, + const void* end_address, + int max_depth); + + // Helper for DoNoLeaks to ignore all objects reachable from all live data + static void IgnoreAllLiveObjectsLocked(const void* self_stack_top); + + // Callback we pass to TCMalloc_ListAllProcessThreads (see thread_lister.h) + // that is invoked when all threads of our process are found and stopped. + // The call back does the things needed to ignore live data reachable from + // thread stacks and registers for all our threads + // as well as do other global-live-data ignoring + // (via IgnoreNonThreadLiveObjectsLocked) + // during the quiet state of all threads being stopped. + // For the argument meaning see the comment by TCMalloc_ListAllProcessThreads. + // Here we only use num_threads and thread_pids, that TCMalloc_ListAllProcessThreads + // fills for us with the number and pids of all the threads of our process + // it found and attached to. + static int IgnoreLiveThreadsLocked(void* parameter, + int num_threads, + pid_t* thread_pids, + va_list ap); + + // Helper for IgnoreAllLiveObjectsLocked and IgnoreLiveThreadsLocked + // that we prefer to execute from IgnoreLiveThreadsLocked + // while all threads are stopped. + // This helper does live object discovery and ignoring + // for all objects that are reachable from everything + // not related to thread stacks and registers. + static void IgnoreNonThreadLiveObjectsLocked(); + + // Helper for IgnoreNonThreadLiveObjectsLocked and IgnoreLiveThreadsLocked + // to discover and ignore all heap objects + // reachable from currently considered live objects + // (live_objects static global variable in out .cc file). + // "name", "name2" are two strings that we print one after another + // in a debug message to describe what kind of live object sources + // are being used. + static void IgnoreLiveObjectsLocked(const char* name, const char* name2); + + // Do the overall whole-program heap leak check if needed; + // returns true when did the leak check. + static bool DoMainHeapCheck(); + + // Type of task for UseProcMapsLocked + enum ProcMapsTask { + RECORD_GLOBAL_DATA, + DISABLE_LIBRARY_ALLOCS + }; + + // Success/Error Return codes for UseProcMapsLocked. + enum ProcMapsResult { + PROC_MAPS_USED, + CANT_OPEN_PROC_MAPS, + NO_SHARED_LIBS_IN_PROC_MAPS + }; + + // Read /proc/self/maps, parse it, and do the 'proc_maps_task' for each line. + static ProcMapsResult UseProcMapsLocked(ProcMapsTask proc_maps_task); + + // A ProcMapsTask to disable allocations from 'library' + // that is mapped to [start_address..end_address) + // (only if library is a certain system library). + static void DisableLibraryAllocsLocked(const char* library, + uintptr_t start_address, + uintptr_t end_address); + + // Return true iff "*ptr" points to a heap object + // ("*ptr" can point at the start or inside of a heap object + // so that this works e.g. for pointers to C++ arrays, C++ strings, + // multiple-inherited objects, or pointers to members). + // We also fill *object_size for this object then + // and we move "*ptr" to point to the very start of the heap object. + static inline bool HaveOnHeapLocked(const void** ptr, size_t* object_size); + + // Helper to shutdown heap leak checker when it's not needed + // or can't function properly. + static void TurnItselfOffLocked(); + + // Internally-used c-tor to start whole-executable checking. + HeapLeakChecker(); + + // ----------------------------------------------------------------------- // + // Friends and externally accessed helpers. + + // Helper for VerifyHeapProfileTableStackGet in the unittest + // to get the recorded allocation caller for ptr, + // which must be a heap object. + static const void* GetAllocCaller(void* ptr); + friend void VerifyHeapProfileTableStackGet(); + + // This gets to execute before constructors for all global objects + static void BeforeConstructorsLocked(); + friend void HeapLeakChecker_BeforeConstructors(); + + // This gets to execute after destructors for all global objects + friend void HeapLeakChecker_AfterDestructors(); + + // Full starting of recommended whole-program checking. + friend void HeapLeakChecker_InternalInitStart(); + + // Runs REGISTER_HEAPCHECK_CLEANUP cleanups and potentially + // calls DoMainHeapCheck + friend void HeapLeakChecker_RunHeapCleanups(); + + // ----------------------------------------------------------------------- // + // Member data. + + class SpinLock* lock_; // to make HeapLeakChecker objects thread-safe + const char* name_; // our remembered name (we own it) + // NULL means this leak checker is a noop + + // Snapshot taken when the checker was created. May be NULL + // for the global heap checker object. We use void* instead of + // HeapProfileTable::Snapshot* to avoid including heap-profile-table.h. + void* start_snapshot_; + + bool has_checked_; // if we have done the leak check, so these are ready: + ssize_t inuse_bytes_increase_; // bytes-in-use increase for this checker + ssize_t inuse_allocs_increase_; // allocations-in-use increase + // for this checker + bool keep_profiles_; // iff we should keep the heap profiles we've made + + // ----------------------------------------------------------------------- // + + // Disallow "evil" constructors. + HeapLeakChecker(const HeapLeakChecker&); + void operator=(const HeapLeakChecker&); +}; + + +// Holds a pointer that will not be traversed by the heap checker. +// Contrast with HeapLeakChecker::IgnoreObject(o), in which o and +// all objects reachable from o are ignored by the heap checker. +template <class T> +class HiddenPointer { + public: + explicit HiddenPointer(T* t) + : masked_t_(reinterpret_cast<uintptr_t>(t) ^ kHideMask) { + } + // Returns unhidden pointer. Be careful where you save the result. + T* get() const { return reinterpret_cast<T*>(masked_t_ ^ kHideMask); } + + private: + // Arbitrary value, but not such that xor'ing with it is likely + // to map one valid pointer to another valid pointer: + static const uintptr_t kHideMask = + static_cast<uintptr_t>(0xF03A5F7BF03A5F7Bll); + uintptr_t masked_t_; +}; + +// A class that exists solely to run its destructor. This class should not be +// used directly, but instead by the REGISTER_HEAPCHECK_CLEANUP macro below. +class PERFTOOLS_DLL_DECL HeapCleaner { + public: + typedef void (*void_function)(void); + HeapCleaner(void_function f); + static void RunHeapCleanups(); + private: + static std::vector<void_function>* heap_cleanups_; +}; + +// A macro to declare module heap check cleanup tasks +// (they run only if we are doing heap leak checking.) +// 'body' should be the cleanup code to run. 'name' doesn't matter, +// but must be unique amongst all REGISTER_HEAPCHECK_CLEANUP calls. +#define REGISTER_HEAPCHECK_CLEANUP(name, body) \ + namespace { \ + void heapcheck_cleanup_##name() { body; } \ + static HeapCleaner heapcheck_cleaner_##name(&heapcheck_cleanup_##name); \ + } + +#endif // BASE_HEAP_CHECKER_H_ diff --git a/src/third_party/gperftools-2.5/src/gperftools/heap-profiler.h b/src/third_party/gperftools-2.5/src/gperftools/heap-profiler.h new file mode 100644 index 00000000000..9b673645747 --- /dev/null +++ b/src/third_party/gperftools-2.5/src/gperftools/heap-profiler.h @@ -0,0 +1,105 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +/* Copyright (c) 2005, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: Sanjay Ghemawat + * + * Module for heap-profiling. + * + * For full(er) information, see doc/heapprofile.html + * + * This module can be linked into your program with + * no slowdown caused by this unless you activate the profiler + * using one of the following methods: + * + * 1. Before starting the program, set the environment variable + * "HEAPPROFILE" to be the name of the file to which the profile + * data should be written. + * + * 2. Programmatically, start and stop the profiler using the + * routines "HeapProfilerStart(filename)" and "HeapProfilerStop()". + * + */ + +#ifndef BASE_HEAP_PROFILER_H_ +#define BASE_HEAP_PROFILER_H_ + +#include <stddef.h> + +/* Annoying stuff for windows; makes sure clients can import these functions */ +#ifndef PERFTOOLS_DLL_DECL +# ifdef _WIN32 +# define PERFTOOLS_DLL_DECL __declspec(dllimport) +# else +# define PERFTOOLS_DLL_DECL +# endif +#endif + +/* All this code should be usable from within C apps. */ +#ifdef __cplusplus +extern "C" { +#endif + +/* Start profiling and arrange to write profile data to file names + * of the form: "prefix.0000", "prefix.0001", ... + */ +PERFTOOLS_DLL_DECL void HeapProfilerStart(const char* prefix); + +/* Returns non-zero if we are currently profiling the heap. (Returns + * an int rather than a bool so it's usable from C.) This is true + * between calls to HeapProfilerStart() and HeapProfilerStop(), and + * also if the program has been run with HEAPPROFILER, or some other + * way to turn on whole-program profiling. + */ +int IsHeapProfilerRunning(); + +/* Stop heap profiling. Can be restarted again with HeapProfilerStart(), + * but the currently accumulated profiling information will be cleared. + */ +PERFTOOLS_DLL_DECL void HeapProfilerStop(); + +/* Dump a profile now - can be used for dumping at a hopefully + * quiescent state in your program, in order to more easily track down + * memory leaks. Will include the reason in the logged message + */ +PERFTOOLS_DLL_DECL void HeapProfilerDump(const char *reason); + +/* Generate current heap profiling information. + * Returns an empty string when heap profiling is not active. + * The returned pointer is a '\0'-terminated string allocated using malloc() + * and should be free()-ed as soon as the caller does not need it anymore. + */ +PERFTOOLS_DLL_DECL char* GetHeapProfile(); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif /* BASE_HEAP_PROFILER_H_ */ diff --git a/src/third_party/gperftools-2.5/src/gperftools/malloc_extension.h b/src/third_party/gperftools-2.5/src/gperftools/malloc_extension.h new file mode 100644 index 00000000000..689b5f17cef --- /dev/null +++ b/src/third_party/gperftools-2.5/src/gperftools/malloc_extension.h @@ -0,0 +1,434 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Sanjay Ghemawat <opensource@google.com> +// +// Extra extensions exported by some malloc implementations. These +// extensions are accessed through a virtual base class so an +// application can link against a malloc that does not implement these +// extensions, and it will get default versions that do nothing. +// +// NOTE FOR C USERS: If you wish to use this functionality from within +// a C program, see malloc_extension_c.h. + +#ifndef BASE_MALLOC_EXTENSION_H_ +#define BASE_MALLOC_EXTENSION_H_ + +#include <stddef.h> +// I can't #include config.h in this public API file, but I should +// really use configure (and make malloc_extension.h a .in file) to +// figure out if the system has stdint.h or not. But I'm lazy, so +// for now I'm assuming it's a problem only with MSVC. +#ifndef _MSC_VER +#include <stdint.h> +#endif +#include <string> +#include <vector> + +// Annoying stuff for windows -- makes sure clients can import these functions +#ifndef PERFTOOLS_DLL_DECL +# ifdef _WIN32 +# define PERFTOOLS_DLL_DECL __declspec(dllimport) +# else +# define PERFTOOLS_DLL_DECL +# endif +#endif + +static const int kMallocHistogramSize = 64; + +// One day, we could support other types of writers (perhaps for C?) +typedef std::string MallocExtensionWriter; + +namespace base { +struct MallocRange; +} + +// Interface to a pluggable system allocator. +class PERFTOOLS_DLL_DECL SysAllocator { + public: + SysAllocator() { + } + virtual ~SysAllocator(); + + // Allocates "size"-byte of memory from system aligned with "alignment". + // Returns NULL if failed. Otherwise, the returned pointer p up to and + // including (p + actual_size -1) have been allocated. + virtual void* Alloc(size_t size, size_t *actual_size, size_t alignment) = 0; +}; + +// The default implementations of the following routines do nothing. +// All implementations should be thread-safe; the current one +// (TCMallocImplementation) is. +class PERFTOOLS_DLL_DECL MallocExtension { + public: + virtual ~MallocExtension(); + + // Call this very early in the program execution -- say, in a global + // constructor -- to set up parameters and state needed by all + // instrumented malloc implemenatations. One example: this routine + // sets environemnt variables to tell STL to use libc's malloc() + // instead of doing its own memory management. This is safe to call + // multiple times, as long as each time is before threads start up. + static void Initialize(); + + // See "verify_memory.h" to see what these routines do + virtual bool VerifyAllMemory(); + virtual bool VerifyNewMemory(const void* p); + virtual bool VerifyArrayNewMemory(const void* p); + virtual bool VerifyMallocMemory(const void* p); + virtual bool MallocMemoryStats(int* blocks, size_t* total, + int histogram[kMallocHistogramSize]); + + // Get a human readable description of the following malloc data structures. + // - Total inuse memory by application. + // - Free memory(thread, central and page heap), + // - Freelist of central cache, each class. + // - Page heap freelist. + // The state is stored as a null-terminated string + // in a prefix of "buffer[0,buffer_length-1]". + // REQUIRES: buffer_length > 0. + virtual void GetStats(char* buffer, int buffer_length); + + // Outputs to "writer" a sample of live objects and the stack traces + // that allocated these objects. The format of the returned output + // is equivalent to the output of the heap profiler and can + // therefore be passed to "pprof". This function is equivalent to + // ReadStackTraces. The main difference is that this function returns + // serialized data appropriately formatted for use by the pprof tool. + // NOTE: by default, tcmalloc does not do any heap sampling, and this + // function will always return an empty sample. To get useful + // data from GetHeapSample, you must also set the environment + // variable TCMALLOC_SAMPLE_PARAMETER to a value such as 524288. + virtual void GetHeapSample(MallocExtensionWriter* writer); + + // Outputs to "writer" the stack traces that caused growth in the + // address space size. The format of the returned output is + // equivalent to the output of the heap profiler and can therefore + // be passed to "pprof". This function is equivalent to + // ReadHeapGrowthStackTraces. The main difference is that this function + // returns serialized data appropriately formatted for use by the + // pprof tool. (This does not depend on, or require, + // TCMALLOC_SAMPLE_PARAMETER.) + virtual void GetHeapGrowthStacks(MallocExtensionWriter* writer); + + // Invokes func(arg, range) for every controlled memory + // range. *range is filled in with information about the range. + // + // This is a best-effort interface useful only for performance + // analysis. The implementation may not call func at all. + typedef void (RangeFunction)(void*, const base::MallocRange*); + virtual void Ranges(void* arg, RangeFunction func); + + // ------------------------------------------------------------------- + // Control operations for getting and setting malloc implementation + // specific parameters. Some currently useful properties: + // + // generic + // ------- + // "generic.current_allocated_bytes" + // Number of bytes currently allocated by application + // This property is not writable. + // + // "generic.heap_size" + // Number of bytes in the heap == + // current_allocated_bytes + + // fragmentation + + // freed memory regions + // This property is not writable. + // + // tcmalloc + // -------- + // "tcmalloc.max_total_thread_cache_bytes" + // Upper limit on total number of bytes stored across all + // per-thread caches. Default: 16MB. + // + // "tcmalloc.current_total_thread_cache_bytes" + // Number of bytes used across all thread caches. + // This property is not writable. + // + // "tcmalloc.central_cache_free_bytes" + // Number of free bytes in the central cache that have been + // assigned to size classes. They always count towards virtual + // memory usage, and unless the underlying memory is swapped out + // by the OS, they also count towards physical memory usage. + // This property is not writable. + // + // "tcmalloc.transfer_cache_free_bytes" + // Number of free bytes that are waiting to be transfered between + // the central cache and a thread cache. They always count + // towards virtual memory usage, and unless the underlying memory + // is swapped out by the OS, they also count towards physical + // memory usage. This property is not writable. + // + // "tcmalloc.thread_cache_free_bytes" + // Number of free bytes in thread caches. They always count + // towards virtual memory usage, and unless the underlying memory + // is swapped out by the OS, they also count towards physical + // memory usage. This property is not writable. + // + // "tcmalloc.pageheap_free_bytes" + // Number of bytes in free, mapped pages in page heap. These + // bytes can be used to fulfill allocation requests. They + // always count towards virtual memory usage, and unless the + // underlying memory is swapped out by the OS, they also count + // towards physical memory usage. This property is not writable. + // + // "tcmalloc.pageheap_unmapped_bytes" + // Number of bytes in free, unmapped pages in page heap. + // These are bytes that have been released back to the OS, + // possibly by one of the MallocExtension "Release" calls. + // They can be used to fulfill allocation requests, but + // typically incur a page fault. They always count towards + // virtual memory usage, and depending on the OS, typically + // do not count towards physical memory usage. This property + // is not writable. + // ------------------------------------------------------------------- + + // Get the named "property"'s value. Returns true if the property + // is known. Returns false if the property is not a valid property + // name for the current malloc implementation. + // REQUIRES: property != NULL; value != NULL + virtual bool GetNumericProperty(const char* property, size_t* value); + + // Set the named "property"'s value. Returns true if the property + // is known and writable. Returns false if the property is not a + // valid property name for the current malloc implementation, or + // is not writable. + // REQUIRES: property != NULL + virtual bool SetNumericProperty(const char* property, size_t value); + + // Mark the current thread as "idle". This routine may optionally + // be called by threads as a hint to the malloc implementation that + // any thread-specific resources should be released. Note: this may + // be an expensive routine, so it should not be called too often. + // + // Also, if the code that calls this routine will go to sleep for + // a while, it should take care to not allocate anything between + // the call to this routine and the beginning of the sleep. + // + // Most malloc implementations ignore this routine. + virtual void MarkThreadIdle(); + + // Mark the current thread as "busy". This routine should be + // called after MarkThreadIdle() if the thread will now do more + // work. If this method is not called, performance may suffer. + // + // Most malloc implementations ignore this routine. + virtual void MarkThreadBusy(); + + // Gets the system allocator used by the malloc extension instance. Returns + // NULL for malloc implementations that do not support pluggable system + // allocators. + virtual SysAllocator* GetSystemAllocator(); + + // Sets the system allocator to the specified. + // + // Users could register their own system allocators for malloc implementation + // that supports pluggable system allocators, such as TCMalloc, by doing: + // alloc = new MyOwnSysAllocator(); + // MallocExtension::instance()->SetSystemAllocator(alloc); + // It's up to users whether to fall back (recommended) to the default + // system allocator (use GetSystemAllocator() above) or not. The caller is + // responsible to any necessary locking. + // See tcmalloc/system-alloc.h for the interface and + // tcmalloc/memfs_malloc.cc for the examples. + // + // It's a no-op for malloc implementations that do not support pluggable + // system allocators. + virtual void SetSystemAllocator(SysAllocator *a); + + // Try to release num_bytes of free memory back to the operating + // system for reuse. Use this extension with caution -- to get this + // memory back may require faulting pages back in by the OS, and + // that may be slow. (Currently only implemented in tcmalloc.) + virtual void ReleaseToSystem(size_t num_bytes); + + // Same as ReleaseToSystem() but release as much memory as possible. + virtual void ReleaseFreeMemory(); + + // Sets the rate at which we release unused memory to the system. + // Zero means we never release memory back to the system. Increase + // this flag to return memory faster; decrease it to return memory + // slower. Reasonable rates are in the range [0,10]. (Currently + // only implemented in tcmalloc). + virtual void SetMemoryReleaseRate(double rate); + + // Gets the release rate. Returns a value < 0 if unknown. + virtual double GetMemoryReleaseRate(); + + // Returns the estimated number of bytes that will be allocated for + // a request of "size" bytes. This is an estimate: an allocation of + // SIZE bytes may reserve more bytes, but will never reserve less. + // (Currently only implemented in tcmalloc, other implementations + // always return SIZE.) + // This is equivalent to malloc_good_size() in OS X. + virtual size_t GetEstimatedAllocatedSize(size_t size); + + // Returns the actual number N of bytes reserved by tcmalloc for the + // pointer p. The client is allowed to use the range of bytes + // [p, p+N) in any way it wishes (i.e. N is the "usable size" of this + // allocation). This number may be equal to or greater than the number + // of bytes requested when p was allocated. + // p must have been allocated by this malloc implementation, + // must not be an interior pointer -- that is, must be exactly + // the pointer returned to by malloc() et al., not some offset + // from that -- and should not have been freed yet. p may be NULL. + // (Currently only implemented in tcmalloc; other implementations + // will return 0.) + // This is equivalent to malloc_size() in OS X, malloc_usable_size() + // in glibc, and _msize() for windows. + virtual size_t GetAllocatedSize(const void* p); + + // Returns kOwned if this malloc implementation allocated the memory + // pointed to by p, or kNotOwned if some other malloc implementation + // allocated it or p is NULL. May also return kUnknownOwnership if + // the malloc implementation does not keep track of ownership. + // REQUIRES: p must be a value returned from a previous call to + // malloc(), calloc(), realloc(), memalign(), posix_memalign(), + // valloc(), pvalloc(), new, or new[], and must refer to memory that + // is currently allocated (so, for instance, you should not pass in + // a pointer after having called free() on it). + enum Ownership { + // NOTE: Enum values MUST be kept in sync with the version in + // malloc_extension_c.h + kUnknownOwnership = 0, + kOwned, + kNotOwned + }; + virtual Ownership GetOwnership(const void* p); + + // The current malloc implementation. Always non-NULL. + static MallocExtension* instance(); + + // Change the malloc implementation. Typically called by the + // malloc implementation during initialization. + static void Register(MallocExtension* implementation); + + // Returns detailed information about malloc's freelists. For each list, + // return a FreeListInfo: + struct FreeListInfo { + size_t min_object_size; + size_t max_object_size; + size_t total_bytes_free; + const char* type; + }; + // Each item in the vector refers to a different freelist. The lists + // are identified by the range of allocations that objects in the + // list can satisfy ([min_object_size, max_object_size]) and the + // type of freelist (see below). The current size of the list is + // returned in total_bytes_free (which count against a processes + // resident and virtual size). + // + // Currently supported types are: + // + // "tcmalloc.page{_unmapped}" - tcmalloc's page heap. An entry for each size + // class in the page heap is returned. Bytes in "page_unmapped" + // are no longer backed by physical memory and do not count against + // the resident size of a process. + // + // "tcmalloc.large{_unmapped}" - tcmalloc's list of objects larger + // than the largest page heap size class. Only one "large" + // entry is returned. There is no upper-bound on the size + // of objects in the large free list; this call returns + // kint64max for max_object_size. Bytes in + // "large_unmapped" are no longer backed by physical memory + // and do not count against the resident size of a process. + // + // "tcmalloc.central" - tcmalloc's central free-list. One entry per + // size-class is returned. Never unmapped. + // + // "debug.free_queue" - free objects queued by the debug allocator + // and not returned to tcmalloc. + // + // "tcmalloc.thread" - tcmalloc's per-thread caches. Never unmapped. + virtual void GetFreeListSizes(std::vector<FreeListInfo>* v); + + // Get a list of stack traces of sampled allocation points. Returns + // a pointer to a "new[]-ed" result array, and stores the sample + // period in "sample_period". + // + // The state is stored as a sequence of adjacent entries + // in the returned array. Each entry has the following form: + // uintptr_t count; // Number of objects with following trace + // uintptr_t size; // Total size of objects with following trace + // uintptr_t depth; // Number of PC values in stack trace + // void* stack[depth]; // PC values that form the stack trace + // + // The list of entries is terminated by a "count" of 0. + // + // It is the responsibility of the caller to "delete[]" the returned array. + // + // May return NULL to indicate no results. + // + // This is an internal extension. Callers should use the more + // convenient "GetHeapSample(string*)" method defined above. + virtual void** ReadStackTraces(int* sample_period); + + // Like ReadStackTraces(), but returns stack traces that caused growth + // in the address space size. + virtual void** ReadHeapGrowthStackTraces(); + + // Returns the size in bytes of the calling threads cache. + virtual size_t GetThreadCacheSize(); + + // Like MarkThreadIdle, but does not destroy the internal data + // structures of the thread cache. When the thread resumes, it wil + // have an empty cache but will not need to pay to reconstruct the + // cache data structures. + virtual void MarkThreadTemporarilyIdle(); +}; + +namespace base { + +// Information passed per range. More fields may be added later. +struct MallocRange { + enum Type { + INUSE, // Application is using this range + FREE, // Range is currently free + UNMAPPED, // Backing physical memory has been returned to the OS + UNKNOWN + // More enum values may be added in the future + }; + + uintptr_t address; // Address of range + size_t length; // Byte length of range + Type type; // Type of this range + double fraction; // Fraction of range that is being used (0 if !INUSE) + + // Perhaps add the following: + // - stack trace if this range was sampled + // - heap growth stack trace if applicable to this range + // - age when allocated (for inuse) or freed (if not in use) +}; + +} // namespace base + +#endif // BASE_MALLOC_EXTENSION_H_ diff --git a/src/third_party/gperftools-2.5/src/gperftools/malloc_extension_c.h b/src/third_party/gperftools-2.5/src/gperftools/malloc_extension_c.h new file mode 100644 index 00000000000..70ff6868ecf --- /dev/null +++ b/src/third_party/gperftools-2.5/src/gperftools/malloc_extension_c.h @@ -0,0 +1,101 @@ +/* Copyright (c) 2008, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * -- + * Author: Craig Silverstein + * + * C shims for the C++ malloc_extension.h. See malloc_extension.h for + * details. Note these C shims always work on + * MallocExtension::instance(); it is not possible to have more than + * one MallocExtension object in C applications. + */ + +#ifndef _MALLOC_EXTENSION_C_H_ +#define _MALLOC_EXTENSION_C_H_ + +#include <stddef.h> +#include <sys/types.h> + +/* Annoying stuff for windows -- makes sure clients can import these fns */ +#ifndef PERFTOOLS_DLL_DECL +# ifdef _WIN32 +# define PERFTOOLS_DLL_DECL __declspec(dllimport) +# else +# define PERFTOOLS_DLL_DECL +# endif +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +#define kMallocExtensionHistogramSize 64 + +PERFTOOLS_DLL_DECL int MallocExtension_VerifyAllMemory(void); +PERFTOOLS_DLL_DECL int MallocExtension_VerifyNewMemory(const void* p); +PERFTOOLS_DLL_DECL int MallocExtension_VerifyArrayNewMemory(const void* p); +PERFTOOLS_DLL_DECL int MallocExtension_VerifyMallocMemory(const void* p); +PERFTOOLS_DLL_DECL int MallocExtension_MallocMemoryStats(int* blocks, size_t* total, + int histogram[kMallocExtensionHistogramSize]); +PERFTOOLS_DLL_DECL void MallocExtension_GetStats(char* buffer, int buffer_length); + +/* TODO(csilvers): write a C version of these routines, that perhaps + * takes a function ptr and a void *. + */ +/* void MallocExtension_GetHeapSample(string* result); */ +/* void MallocExtension_GetHeapGrowthStacks(string* result); */ + +PERFTOOLS_DLL_DECL int MallocExtension_GetNumericProperty(const char* property, size_t* value); +PERFTOOLS_DLL_DECL int MallocExtension_SetNumericProperty(const char* property, size_t value); +PERFTOOLS_DLL_DECL void MallocExtension_MarkThreadIdle(void); +PERFTOOLS_DLL_DECL void MallocExtension_MarkThreadBusy(void); +PERFTOOLS_DLL_DECL void MallocExtension_ReleaseToSystem(size_t num_bytes); +PERFTOOLS_DLL_DECL void MallocExtension_ReleaseFreeMemory(void); +PERFTOOLS_DLL_DECL size_t MallocExtension_GetEstimatedAllocatedSize(size_t size); +PERFTOOLS_DLL_DECL size_t MallocExtension_GetAllocatedSize(const void* p); +PERFTOOLS_DLL_DECL size_t MallocExtension_GetThreadCacheSize(void); +PERFTOOLS_DLL_DECL void MallocExtension_MarkThreadTemporarilyIdle(void); + +/* + * NOTE: These enum values MUST be kept in sync with the version in + * malloc_extension.h + */ +typedef enum { + MallocExtension_kUnknownOwnership = 0, + MallocExtension_kOwned, + MallocExtension_kNotOwned +} MallocExtension_Ownership; + +PERFTOOLS_DLL_DECL MallocExtension_Ownership MallocExtension_GetOwnership(const void* p); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* _MALLOC_EXTENSION_C_H_ */ diff --git a/src/third_party/gperftools-2.5/src/gperftools/malloc_hook.h b/src/third_party/gperftools-2.5/src/gperftools/malloc_hook.h new file mode 100644 index 00000000000..b76411fb590 --- /dev/null +++ b/src/third_party/gperftools-2.5/src/gperftools/malloc_hook.h @@ -0,0 +1,359 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Sanjay Ghemawat +// +// Some of our malloc implementations can invoke the following hooks whenever +// memory is allocated or deallocated. MallocHook is thread-safe, and things +// you do before calling AddFooHook(MyHook) are visible to any resulting calls +// to MyHook. Hooks must be thread-safe. If you write: +// +// CHECK(MallocHook::AddNewHook(&MyNewHook)); +// +// MyNewHook will be invoked in subsequent calls in the current thread, but +// there are no guarantees on when it might be invoked in other threads. +// +// There are a limited number of slots available for each hook type. Add*Hook +// will return false if there are no slots available. Remove*Hook will return +// false if the given hook was not already installed. +// +// The order in which individual hooks are called in Invoke*Hook is undefined. +// +// It is safe for a hook to remove itself within Invoke*Hook and add other +// hooks. Any hooks added inside a hook invocation (for the same hook type) +// will not be invoked for the current invocation. +// +// One important user of these hooks is the heap profiler. +// +// CAVEAT: If you add new MallocHook::Invoke* calls then those calls must be +// directly in the code of the (de)allocation function that is provided to the +// user and that function must have an ATTRIBUTE_SECTION(malloc_hook) attribute. +// +// Note: the Invoke*Hook() functions are defined in malloc_hook-inl.h. If you +// need to invoke a hook (which you shouldn't unless you're part of tcmalloc), +// be sure to #include malloc_hook-inl.h in addition to malloc_hook.h. +// +// NOTE FOR C USERS: If you want to use malloc_hook functionality from +// a C program, #include malloc_hook_c.h instead of this file. + +#ifndef _MALLOC_HOOK_H_ +#define _MALLOC_HOOK_H_ + +#include <stddef.h> +#include <sys/types.h> +extern "C" { +#include "malloc_hook_c.h" // a C version of the malloc_hook interface +} + +// Annoying stuff for windows -- makes sure clients can import these functions +#ifndef PERFTOOLS_DLL_DECL +# ifdef _WIN32 +# define PERFTOOLS_DLL_DECL __declspec(dllimport) +# else +# define PERFTOOLS_DLL_DECL +# endif +#endif + +// The C++ methods below call the C version (MallocHook_*), and thus +// convert between an int and a bool. Windows complains about this +// (a "performance warning") which we don't care about, so we suppress. +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable:4800) +#endif + +// Note: malloc_hook_c.h defines MallocHook_*Hook and +// MallocHook_{Add,Remove}*Hook. The version of these inside the MallocHook +// class are defined in terms of the malloc_hook_c version. See malloc_hook_c.h +// for details of these types/functions. + +class PERFTOOLS_DLL_DECL MallocHook { + public: + // The NewHook is invoked whenever an object is allocated. + // It may be passed NULL if the allocator returned NULL. + typedef MallocHook_NewHook NewHook; + inline static bool AddNewHook(NewHook hook) { + return MallocHook_AddNewHook(hook); + } + inline static bool RemoveNewHook(NewHook hook) { + return MallocHook_RemoveNewHook(hook); + } + inline static void InvokeNewHook(const void* p, size_t s); + + // The DeleteHook is invoked whenever an object is deallocated. + // It may be passed NULL if the caller is trying to delete NULL. + typedef MallocHook_DeleteHook DeleteHook; + inline static bool AddDeleteHook(DeleteHook hook) { + return MallocHook_AddDeleteHook(hook); + } + inline static bool RemoveDeleteHook(DeleteHook hook) { + return MallocHook_RemoveDeleteHook(hook); + } + inline static void InvokeDeleteHook(const void* p); + + // The PreMmapHook is invoked with mmap or mmap64 arguments just + // before the call is actually made. Such a hook may be useful + // in memory limited contexts, to catch allocations that will exceed + // a memory limit, and take outside actions to increase that limit. + typedef MallocHook_PreMmapHook PreMmapHook; + inline static bool AddPreMmapHook(PreMmapHook hook) { + return MallocHook_AddPreMmapHook(hook); + } + inline static bool RemovePreMmapHook(PreMmapHook hook) { + return MallocHook_RemovePreMmapHook(hook); + } + inline static void InvokePreMmapHook(const void* start, + size_t size, + int protection, + int flags, + int fd, + off_t offset); + + // The MmapReplacement is invoked after the PreMmapHook but before + // the call is actually made. The MmapReplacement should return true + // if it handled the call, or false if it is still necessary to + // call mmap/mmap64. + // This should be used only by experts, and users must be be + // extremely careful to avoid recursive calls to mmap. The replacement + // should be async signal safe. + // Only one MmapReplacement is supported. After setting an MmapReplacement + // you must call RemoveMmapReplacement before calling SetMmapReplacement + // again. + typedef MallocHook_MmapReplacement MmapReplacement; + inline static bool SetMmapReplacement(MmapReplacement hook) { + return MallocHook_SetMmapReplacement(hook); + } + inline static bool RemoveMmapReplacement(MmapReplacement hook) { + return MallocHook_RemoveMmapReplacement(hook); + } + inline static bool InvokeMmapReplacement(const void* start, + size_t size, + int protection, + int flags, + int fd, + off_t offset, + void** result); + + + // The MmapHook is invoked whenever a region of memory is mapped. + // It may be passed MAP_FAILED if the mmap failed. + typedef MallocHook_MmapHook MmapHook; + inline static bool AddMmapHook(MmapHook hook) { + return MallocHook_AddMmapHook(hook); + } + inline static bool RemoveMmapHook(MmapHook hook) { + return MallocHook_RemoveMmapHook(hook); + } + inline static void InvokeMmapHook(const void* result, + const void* start, + size_t size, + int protection, + int flags, + int fd, + off_t offset); + + // The MunmapReplacement is invoked with munmap arguments just before + // the call is actually made. The MunmapReplacement should return true + // if it handled the call, or false if it is still necessary to + // call munmap. + // This should be used only by experts. The replacement should be + // async signal safe. + // Only one MunmapReplacement is supported. After setting an + // MunmapReplacement you must call RemoveMunmapReplacement before + // calling SetMunmapReplacement again. + typedef MallocHook_MunmapReplacement MunmapReplacement; + inline static bool SetMunmapReplacement(MunmapReplacement hook) { + return MallocHook_SetMunmapReplacement(hook); + } + inline static bool RemoveMunmapReplacement(MunmapReplacement hook) { + return MallocHook_RemoveMunmapReplacement(hook); + } + inline static bool InvokeMunmapReplacement(const void* p, + size_t size, + int* result); + + // The MunmapHook is invoked whenever a region of memory is unmapped. + typedef MallocHook_MunmapHook MunmapHook; + inline static bool AddMunmapHook(MunmapHook hook) { + return MallocHook_AddMunmapHook(hook); + } + inline static bool RemoveMunmapHook(MunmapHook hook) { + return MallocHook_RemoveMunmapHook(hook); + } + inline static void InvokeMunmapHook(const void* p, size_t size); + + // The MremapHook is invoked whenever a region of memory is remapped. + typedef MallocHook_MremapHook MremapHook; + inline static bool AddMremapHook(MremapHook hook) { + return MallocHook_AddMremapHook(hook); + } + inline static bool RemoveMremapHook(MremapHook hook) { + return MallocHook_RemoveMremapHook(hook); + } + inline static void InvokeMremapHook(const void* result, + const void* old_addr, + size_t old_size, + size_t new_size, + int flags, + const void* new_addr); + + // The PreSbrkHook is invoked just before sbrk is called -- except when + // the increment is 0. This is because sbrk(0) is often called + // to get the top of the memory stack, and is not actually a + // memory-allocation call. It may be useful in memory-limited contexts, + // to catch allocations that will exceed the limit and take outside + // actions to increase such a limit. + typedef MallocHook_PreSbrkHook PreSbrkHook; + inline static bool AddPreSbrkHook(PreSbrkHook hook) { + return MallocHook_AddPreSbrkHook(hook); + } + inline static bool RemovePreSbrkHook(PreSbrkHook hook) { + return MallocHook_RemovePreSbrkHook(hook); + } + inline static void InvokePreSbrkHook(ptrdiff_t increment); + + // The SbrkHook is invoked whenever sbrk is called -- except when + // the increment is 0. This is because sbrk(0) is often called + // to get the top of the memory stack, and is not actually a + // memory-allocation call. + typedef MallocHook_SbrkHook SbrkHook; + inline static bool AddSbrkHook(SbrkHook hook) { + return MallocHook_AddSbrkHook(hook); + } + inline static bool RemoveSbrkHook(SbrkHook hook) { + return MallocHook_RemoveSbrkHook(hook); + } + inline static void InvokeSbrkHook(const void* result, ptrdiff_t increment); + + // Get the current stack trace. Try to skip all routines up to and + // and including the caller of MallocHook::Invoke*. + // Use "skip_count" (similarly to GetStackTrace from stacktrace.h) + // as a hint about how many routines to skip if better information + // is not available. + inline static int GetCallerStackTrace(void** result, int max_depth, + int skip_count) { + return MallocHook_GetCallerStackTrace(result, max_depth, skip_count); + } + + // Unhooked versions of mmap() and munmap(). These should be used + // only by experts, since they bypass heapchecking, etc. + // Note: These do not run hooks, but they still use the MmapReplacement + // and MunmapReplacement. + static void* UnhookedMMap(void *start, size_t length, int prot, int flags, + int fd, off_t offset); + static int UnhookedMUnmap(void *start, size_t length); + + // The following are DEPRECATED. + inline static NewHook GetNewHook(); + inline static NewHook SetNewHook(NewHook hook) { + return MallocHook_SetNewHook(hook); + } + + inline static DeleteHook GetDeleteHook(); + inline static DeleteHook SetDeleteHook(DeleteHook hook) { + return MallocHook_SetDeleteHook(hook); + } + + inline static PreMmapHook GetPreMmapHook(); + inline static PreMmapHook SetPreMmapHook(PreMmapHook hook) { + return MallocHook_SetPreMmapHook(hook); + } + + inline static MmapHook GetMmapHook(); + inline static MmapHook SetMmapHook(MmapHook hook) { + return MallocHook_SetMmapHook(hook); + } + + inline static MunmapHook GetMunmapHook(); + inline static MunmapHook SetMunmapHook(MunmapHook hook) { + return MallocHook_SetMunmapHook(hook); + } + + inline static MremapHook GetMremapHook(); + inline static MremapHook SetMremapHook(MremapHook hook) { + return MallocHook_SetMremapHook(hook); + } + + inline static PreSbrkHook GetPreSbrkHook(); + inline static PreSbrkHook SetPreSbrkHook(PreSbrkHook hook) { + return MallocHook_SetPreSbrkHook(hook); + } + + inline static SbrkHook GetSbrkHook(); + inline static SbrkHook SetSbrkHook(SbrkHook hook) { + return MallocHook_SetSbrkHook(hook); + } + // End of DEPRECATED methods. + + private: + // Slow path versions of Invoke*Hook. + static void InvokeNewHookSlow(const void* p, size_t s); + static void InvokeDeleteHookSlow(const void* p); + static void InvokePreMmapHookSlow(const void* start, + size_t size, + int protection, + int flags, + int fd, + off_t offset); + static void InvokeMmapHookSlow(const void* result, + const void* start, + size_t size, + int protection, + int flags, + int fd, + off_t offset); + static bool InvokeMmapReplacementSlow(const void* start, + size_t size, + int protection, + int flags, + int fd, + off_t offset, + void** result); + static void InvokeMunmapHookSlow(const void* p, size_t size); + static bool InvokeMunmapReplacementSlow(const void* p, + size_t size, + int* result); + static void InvokeMremapHookSlow(const void* result, + const void* old_addr, + size_t old_size, + size_t new_size, + int flags, + const void* new_addr); + static void InvokePreSbrkHookSlow(ptrdiff_t increment); + static void InvokeSbrkHookSlow(const void* result, ptrdiff_t increment); +}; + +#ifdef _MSC_VER +#pragma warning(pop) +#endif + + +#endif /* _MALLOC_HOOK_H_ */ diff --git a/src/third_party/gperftools-2.5/src/gperftools/malloc_hook_c.h b/src/third_party/gperftools-2.5/src/gperftools/malloc_hook_c.h new file mode 100644 index 00000000000..56337e15e83 --- /dev/null +++ b/src/third_party/gperftools-2.5/src/gperftools/malloc_hook_c.h @@ -0,0 +1,173 @@ +/* Copyright (c) 2008, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * -- + * Author: Craig Silverstein + * + * C shims for the C++ malloc_hook.h. See malloc_hook.h for details + * on how to use these. + */ + +#ifndef _MALLOC_HOOK_C_H_ +#define _MALLOC_HOOK_C_H_ + +#include <stddef.h> +#include <sys/types.h> + +/* Annoying stuff for windows; makes sure clients can import these functions */ +#ifndef PERFTOOLS_DLL_DECL +# ifdef _WIN32 +# define PERFTOOLS_DLL_DECL __declspec(dllimport) +# else +# define PERFTOOLS_DLL_DECL +# endif +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +/* Get the current stack trace. Try to skip all routines up to and + * and including the caller of MallocHook::Invoke*. + * Use "skip_count" (similarly to GetStackTrace from stacktrace.h) + * as a hint about how many routines to skip if better information + * is not available. + */ +PERFTOOLS_DLL_DECL +int MallocHook_GetCallerStackTrace(void** result, int max_depth, + int skip_count); + +/* The MallocHook_{Add,Remove}*Hook functions return 1 on success and 0 on + * failure. + */ + +typedef void (*MallocHook_NewHook)(const void* ptr, size_t size); +PERFTOOLS_DLL_DECL +int MallocHook_AddNewHook(MallocHook_NewHook hook); +PERFTOOLS_DLL_DECL +int MallocHook_RemoveNewHook(MallocHook_NewHook hook); + +typedef void (*MallocHook_DeleteHook)(const void* ptr); +PERFTOOLS_DLL_DECL +int MallocHook_AddDeleteHook(MallocHook_DeleteHook hook); +PERFTOOLS_DLL_DECL +int MallocHook_RemoveDeleteHook(MallocHook_DeleteHook hook); + +typedef void (*MallocHook_PreMmapHook)(const void *start, + size_t size, + int protection, + int flags, + int fd, + off_t offset); +PERFTOOLS_DLL_DECL +int MallocHook_AddPreMmapHook(MallocHook_PreMmapHook hook); +PERFTOOLS_DLL_DECL +int MallocHook_RemovePreMmapHook(MallocHook_PreMmapHook hook); + +typedef void (*MallocHook_MmapHook)(const void* result, + const void* start, + size_t size, + int protection, + int flags, + int fd, + off_t offset); +PERFTOOLS_DLL_DECL +int MallocHook_AddMmapHook(MallocHook_MmapHook hook); +PERFTOOLS_DLL_DECL +int MallocHook_RemoveMmapHook(MallocHook_MmapHook hook); + +typedef int (*MallocHook_MmapReplacement)(const void* start, + size_t size, + int protection, + int flags, + int fd, + off_t offset, + void** result); +int MallocHook_SetMmapReplacement(MallocHook_MmapReplacement hook); +int MallocHook_RemoveMmapReplacement(MallocHook_MmapReplacement hook); + +typedef void (*MallocHook_MunmapHook)(const void* ptr, size_t size); +PERFTOOLS_DLL_DECL +int MallocHook_AddMunmapHook(MallocHook_MunmapHook hook); +PERFTOOLS_DLL_DECL +int MallocHook_RemoveMunmapHook(MallocHook_MunmapHook hook); + +typedef int (*MallocHook_MunmapReplacement)(const void* ptr, + size_t size, + int* result); +int MallocHook_SetMunmapReplacement(MallocHook_MunmapReplacement hook); +int MallocHook_RemoveMunmapReplacement(MallocHook_MunmapReplacement hook); + +typedef void (*MallocHook_MremapHook)(const void* result, + const void* old_addr, + size_t old_size, + size_t new_size, + int flags, + const void* new_addr); +PERFTOOLS_DLL_DECL +int MallocHook_AddMremapHook(MallocHook_MremapHook hook); +PERFTOOLS_DLL_DECL +int MallocHook_RemoveMremapHook(MallocHook_MremapHook hook); + +typedef void (*MallocHook_PreSbrkHook)(ptrdiff_t increment); +PERFTOOLS_DLL_DECL +int MallocHook_AddPreSbrkHook(MallocHook_PreSbrkHook hook); +PERFTOOLS_DLL_DECL +int MallocHook_RemovePreSbrkHook(MallocHook_PreSbrkHook hook); + +typedef void (*MallocHook_SbrkHook)(const void* result, ptrdiff_t increment); +PERFTOOLS_DLL_DECL +int MallocHook_AddSbrkHook(MallocHook_SbrkHook hook); +PERFTOOLS_DLL_DECL +int MallocHook_RemoveSbrkHook(MallocHook_SbrkHook hook); + +/* The following are DEPRECATED. */ +PERFTOOLS_DLL_DECL +MallocHook_NewHook MallocHook_SetNewHook(MallocHook_NewHook hook); +PERFTOOLS_DLL_DECL +MallocHook_DeleteHook MallocHook_SetDeleteHook(MallocHook_DeleteHook hook); +PERFTOOLS_DLL_DECL +MallocHook_PreMmapHook MallocHook_SetPreMmapHook(MallocHook_PreMmapHook hook); +PERFTOOLS_DLL_DECL +MallocHook_MmapHook MallocHook_SetMmapHook(MallocHook_MmapHook hook); +PERFTOOLS_DLL_DECL +MallocHook_MunmapHook MallocHook_SetMunmapHook(MallocHook_MunmapHook hook); +PERFTOOLS_DLL_DECL +MallocHook_MremapHook MallocHook_SetMremapHook(MallocHook_MremapHook hook); +PERFTOOLS_DLL_DECL +MallocHook_PreSbrkHook MallocHook_SetPreSbrkHook(MallocHook_PreSbrkHook hook); +PERFTOOLS_DLL_DECL +MallocHook_SbrkHook MallocHook_SetSbrkHook(MallocHook_SbrkHook hook); +/* End of DEPRECATED functions. */ + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif /* _MALLOC_HOOK_C_H_ */ diff --git a/src/third_party/gperftools-2.5/src/gperftools/profiler.h b/src/third_party/gperftools-2.5/src/gperftools/profiler.h new file mode 100644 index 00000000000..2d272d616a9 --- /dev/null +++ b/src/third_party/gperftools-2.5/src/gperftools/profiler.h @@ -0,0 +1,169 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +/* Copyright (c) 2005, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: Sanjay Ghemawat + * + * Module for CPU profiling based on periodic pc-sampling. + * + * For full(er) information, see doc/cpuprofile.html + * + * This module is linked into your program with + * no slowdown caused by this unless you activate the profiler + * using one of the following methods: + * + * 1. Before starting the program, set the environment variable + * "CPUPROFILE" to be the name of the file to which the profile + * data should be written. + * + * 2. Programmatically, start and stop the profiler using the + * routines "ProfilerStart(filename)" and "ProfilerStop()". + * + * + * (Note: if using linux 2.4 or earlier, only the main thread may be + * profiled.) + * + * Use pprof to view the resulting profile output. + * % pprof <path_to_executable> <profile_file_name> + * % pprof --gv <path_to_executable> <profile_file_name> + * + * These functions are thread-safe. + */ + +#ifndef BASE_PROFILER_H_ +#define BASE_PROFILER_H_ + +#include <time.h> /* For time_t */ + +/* Annoying stuff for windows; makes sure clients can import these functions */ +#ifndef PERFTOOLS_DLL_DECL +# ifdef _WIN32 +# define PERFTOOLS_DLL_DECL __declspec(dllimport) +# else +# define PERFTOOLS_DLL_DECL +# endif +#endif + +/* All this code should be usable from within C apps. */ +#ifdef __cplusplus +extern "C" { +#endif + +/* Profiler options, for use with ProfilerStartWithOptions. To use: + * + * struct ProfilerOptions options; + * memset(&options, 0, sizeof options); + * + * then fill in fields as needed. + * + * This structure is intended to be usable from C code, so no constructor + * is provided to initialize it. (Use memset as described above). + */ +struct ProfilerOptions { + /* Filter function and argument. + * + * If filter_in_thread is not NULL, when a profiling tick is delivered + * the profiler will call: + * + * (*filter_in_thread)(filter_in_thread_arg) + * + * If it returns nonzero, the sample will be included in the profile. + * Note that filter_in_thread runs in a signal handler, so must be + * async-signal-safe. + * + * A typical use would be to set up filter results for each thread + * in the system before starting the profiler, then to make + * filter_in_thread be a very simple function which retrieves those + * results in an async-signal-safe way. Retrieval could be done + * using thread-specific data, or using a shared data structure that + * supports async-signal-safe lookups. + */ + int (*filter_in_thread)(void *arg); + void *filter_in_thread_arg; +}; + +/* Start profiling and write profile info into fname, discarding any + * existing profiling data in that file. + * + * This is equivalent to calling ProfilerStartWithOptions(fname, NULL). + */ +PERFTOOLS_DLL_DECL int ProfilerStart(const char* fname); + +/* Start profiling and write profile into fname, discarding any + * existing profiling data in that file. + * + * The profiler is configured using the options given by 'options'. + * Options which are not specified are given default values. + * + * 'options' may be NULL, in which case all are given default values. + * + * Returns nonzero if profiling was started successfully, or zero else. + */ +PERFTOOLS_DLL_DECL int ProfilerStartWithOptions( + const char *fname, const struct ProfilerOptions *options); + +/* Stop profiling. Can be started again with ProfilerStart(), but + * the currently accumulated profiling data will be cleared. + */ +PERFTOOLS_DLL_DECL void ProfilerStop(void); + +/* Flush any currently buffered profiling state to the profile file. + * Has no effect if the profiler has not been started. + */ +PERFTOOLS_DLL_DECL void ProfilerFlush(void); + + +/* DEPRECATED: these functions were used to enable/disable profiling + * in the current thread, but no longer do anything. + */ +PERFTOOLS_DLL_DECL void ProfilerEnable(void); +PERFTOOLS_DLL_DECL void ProfilerDisable(void); + +/* Returns nonzero if profile is currently enabled, zero if it's not. */ +PERFTOOLS_DLL_DECL int ProfilingIsEnabledForAllThreads(void); + +/* Routine for registering new threads with the profiler. + */ +PERFTOOLS_DLL_DECL void ProfilerRegisterThread(void); + +/* Stores state about profiler's current status into "*state". */ +struct ProfilerState { + int enabled; /* Is profiling currently enabled? */ + time_t start_time; /* If enabled, when was profiling started? */ + char profile_name[1024]; /* Name of profile file being written, or '\0' */ + int samples_gathered; /* Number of samples gathered so far (or 0) */ +}; +PERFTOOLS_DLL_DECL void ProfilerGetCurrentState(struct ProfilerState* state); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif /* BASE_PROFILER_H_ */ diff --git a/src/third_party/gperftools-2.5/src/gperftools/stacktrace.h b/src/third_party/gperftools-2.5/src/gperftools/stacktrace.h new file mode 100644 index 00000000000..2b9c5a13209 --- /dev/null +++ b/src/third_party/gperftools-2.5/src/gperftools/stacktrace.h @@ -0,0 +1,117 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Sanjay Ghemawat +// +// Routines to extract the current stack trace. These functions are +// thread-safe. + +#ifndef GOOGLE_STACKTRACE_H_ +#define GOOGLE_STACKTRACE_H_ + +// Annoying stuff for windows -- makes sure clients can import these functions +#ifndef PERFTOOLS_DLL_DECL +# ifdef _WIN32 +# define PERFTOOLS_DLL_DECL __declspec(dllimport) +# else +# define PERFTOOLS_DLL_DECL +# endif +#endif + + +// Skips the most recent "skip_count" stack frames (also skips the +// frame generated for the "GetStackFrames" routine itself), and then +// records the pc values for up to the next "max_depth" frames in +// "result", and the corresponding stack frame sizes in "sizes". +// Returns the number of values recorded in "result"/"sizes". +// +// Example: +// main() { foo(); } +// foo() { bar(); } +// bar() { +// void* result[10]; +// int sizes[10]; +// int depth = GetStackFrames(result, sizes, 10, 1); +// } +// +// The GetStackFrames call will skip the frame for "bar". It will +// return 2 and will produce pc values that map to the following +// procedures: +// result[0] foo +// result[1] main +// (Actually, there may be a few more entries after "main" to account for +// startup procedures.) +// And corresponding stack frame sizes will also be recorded: +// sizes[0] 16 +// sizes[1] 16 +// (Stack frame sizes of 16 above are just for illustration purposes.) +// Stack frame sizes of 0 or less indicate that those frame sizes couldn't +// be identified. +// +// This routine may return fewer stack frame entries than are +// available. Also note that "result" and "sizes" must both be non-NULL. +extern PERFTOOLS_DLL_DECL int GetStackFrames(void** result, int* sizes, int max_depth, + int skip_count); + +// Same as above, but to be used from a signal handler. The "uc" parameter +// should be the pointer to ucontext_t which was passed as the 3rd parameter +// to sa_sigaction signal handler. It may help the unwinder to get a +// better stack trace under certain conditions. The "uc" may safely be NULL. +extern PERFTOOLS_DLL_DECL int GetStackFramesWithContext(void** result, int* sizes, int max_depth, + int skip_count, const void *uc); + +// This is similar to the GetStackFrames routine, except that it returns +// the stack trace only, and not the stack frame sizes as well. +// Example: +// main() { foo(); } +// foo() { bar(); } +// bar() { +// void* result[10]; +// int depth = GetStackTrace(result, 10, 1); +// } +// +// This produces: +// result[0] foo +// result[1] main +// .... ... +// +// "result" must not be NULL. +extern PERFTOOLS_DLL_DECL int GetStackTrace(void** result, int max_depth, + int skip_count); + +// Same as above, but to be used from a signal handler. The "uc" parameter +// should be the pointer to ucontext_t which was passed as the 3rd parameter +// to sa_sigaction signal handler. It may help the unwinder to get a +// better stack trace under certain conditions. The "uc" may safely be NULL. +extern PERFTOOLS_DLL_DECL int GetStackTraceWithContext(void** result, int max_depth, + int skip_count, const void *uc); + +#endif /* GOOGLE_STACKTRACE_H_ */ diff --git a/src/third_party/gperftools-2.5/src/gperftools/t1.sh b/src/third_party/gperftools-2.5/src/gperftools/t1.sh new file mode 100644 index 00000000000..c66e7c95a4d --- /dev/null +++ b/src/third_party/gperftools-2.5/src/gperftools/t1.sh @@ -0,0 +1,8 @@ + +TCMALLOC_H=tcmalloc.h +TCMALLOC_H_TMP=tcmalloc.h.bak +for line_number in $(grep -n "@ac_cv_have_struct_mallinfo@" tcmalloc.h.in | cut -d: -f1) ; do + sed "${line_number}s/.*/#ifdef HAVE_STRUCT_MALLINFO/" < $TCMALLOC_H > $TCMALLOC_H_TMP + cp $TCMALLOC_H_TMP $TCMALLOC_H +done + diff --git a/src/third_party/gperftools-2.5/src/gperftools/tcmalloc.h b/src/third_party/gperftools-2.5/src/gperftools/tcmalloc.h new file mode 100644 index 00000000000..bb5dadb5a98 --- /dev/null +++ b/src/third_party/gperftools-2.5/src/gperftools/tcmalloc.h @@ -0,0 +1,147 @@ +// -*- Mode: C; c-basic-offset: 2; indent-tabs-mode: nil -*- +/* Copyright (c) 2003, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: Sanjay Ghemawat <opensource@google.com> + * .h file by Craig Silverstein <opensource@google.com> + */ + +#ifndef TCMALLOC_TCMALLOC_H_ +#define TCMALLOC_TCMALLOC_H_ + +#include <stddef.h> /* for size_t */ + +/* Define the version number so folks can check against it */ +#define TC_VERSION_MAJOR 2 +#define TC_VERSION_MINOR 5 +#define TC_VERSION_PATCH "" +#define TC_VERSION_STRING "gperftools 2.5" + +/* For struct mallinfo, if it's defined. */ +#ifdef HAVE_STRUCT_MALLINFO +# include <malloc.h> +#endif + +#ifdef __cplusplus +#define PERFTOOLS_THROW throw() +#else +# ifdef __GNUC__ +# define PERFTOOLS_THROW __attribute__((__nothrow__)) +# else +# define PERFTOOLS_THROW +# endif +#endif + +#ifndef PERFTOOLS_DLL_DECL +#define PERFTOOLS_DLL_DECL_DEFINED +# ifdef _WIN32 +# define PERFTOOLS_DLL_DECL __declspec(dllimport) +# else +# define PERFTOOLS_DLL_DECL +# endif +#endif + +#ifdef __cplusplus +namespace std { +struct nothrow_t; +} + +extern "C" { +#endif + /* + * Returns a human-readable version string. If major, minor, + * and/or patch are not NULL, they are set to the major version, + * minor version, and patch-code (a string, usually ""). + */ + PERFTOOLS_DLL_DECL const char* tc_version(int* major, int* minor, + const char** patch) PERFTOOLS_THROW; + + PERFTOOLS_DLL_DECL void* tc_malloc(size_t size) PERFTOOLS_THROW; + PERFTOOLS_DLL_DECL void* tc_malloc_skip_new_handler(size_t size) PERFTOOLS_THROW; + PERFTOOLS_DLL_DECL void tc_free(void* ptr) PERFTOOLS_THROW; + PERFTOOLS_DLL_DECL void tc_free_sized(void *ptr, size_t size) PERFTOOLS_THROW; + PERFTOOLS_DLL_DECL void* tc_realloc(void* ptr, size_t size) PERFTOOLS_THROW; + PERFTOOLS_DLL_DECL void* tc_calloc(size_t nmemb, size_t size) PERFTOOLS_THROW; + PERFTOOLS_DLL_DECL void tc_cfree(void* ptr) PERFTOOLS_THROW; + + PERFTOOLS_DLL_DECL void* tc_memalign(size_t __alignment, + size_t __size) PERFTOOLS_THROW; + PERFTOOLS_DLL_DECL int tc_posix_memalign(void** ptr, + size_t align, size_t size) PERFTOOLS_THROW; + PERFTOOLS_DLL_DECL void* tc_valloc(size_t __size) PERFTOOLS_THROW; + PERFTOOLS_DLL_DECL void* tc_pvalloc(size_t __size) PERFTOOLS_THROW; + + PERFTOOLS_DLL_DECL void tc_malloc_stats(void) PERFTOOLS_THROW; + PERFTOOLS_DLL_DECL int tc_mallopt(int cmd, int value) PERFTOOLS_THROW; +#ifdef HAVE_STRUCT_MALLINFO + PERFTOOLS_DLL_DECL struct mallinfo tc_mallinfo(void) PERFTOOLS_THROW; +#endif + + /* + * This is an alias for MallocExtension::instance()->GetAllocatedSize(). + * It is equivalent to + * OS X: malloc_size() + * glibc: malloc_usable_size() + * Windows: _msize() + */ + PERFTOOLS_DLL_DECL size_t tc_malloc_size(void* ptr) PERFTOOLS_THROW; + +#ifdef __cplusplus + PERFTOOLS_DLL_DECL int tc_set_new_mode(int flag) PERFTOOLS_THROW; + PERFTOOLS_DLL_DECL void* tc_new(size_t size); + PERFTOOLS_DLL_DECL void* tc_new_nothrow(size_t size, + const std::nothrow_t&) PERFTOOLS_THROW; + PERFTOOLS_DLL_DECL void tc_delete(void* p) PERFTOOLS_THROW; + PERFTOOLS_DLL_DECL void tc_delete_sized(void* p, size_t size) throw(); + PERFTOOLS_DLL_DECL void tc_delete_nothrow(void* p, + const std::nothrow_t&) PERFTOOLS_THROW; + PERFTOOLS_DLL_DECL void* tc_newarray(size_t size); + PERFTOOLS_DLL_DECL void* tc_newarray_nothrow(size_t size, + const std::nothrow_t&) PERFTOOLS_THROW; + PERFTOOLS_DLL_DECL void tc_deletearray(void* p) PERFTOOLS_THROW; + PERFTOOLS_DLL_DECL void tc_deletearray_sized(void* p, size_t size) throw(); + PERFTOOLS_DLL_DECL void tc_deletearray_nothrow(void* p, + const std::nothrow_t&) PERFTOOLS_THROW; +} +#endif + +/* We're only un-defining those for public */ +#if !defined(GPERFTOOLS_CONFIG_H_) + +#undef PERFTOOLS_THROW + +#ifdef PERFTOOLS_DLL_DECL_DEFINED +#undef PERFTOOLS_DLL_DECL +#undef PERFTOOLS_DLL_DECL_DEFINED +#endif + +#endif /* GPERFTOOLS_CONFIG_H_ */ + +#endif /* #ifndef TCMALLOC_TCMALLOC_H_ */ diff --git a/src/third_party/gperftools-2.5/src/gperftools/tcmalloc.h.bak b/src/third_party/gperftools-2.5/src/gperftools/tcmalloc.h.bak new file mode 100644 index 00000000000..bb5dadb5a98 --- /dev/null +++ b/src/third_party/gperftools-2.5/src/gperftools/tcmalloc.h.bak @@ -0,0 +1,147 @@ +// -*- Mode: C; c-basic-offset: 2; indent-tabs-mode: nil -*- +/* Copyright (c) 2003, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: Sanjay Ghemawat <opensource@google.com> + * .h file by Craig Silverstein <opensource@google.com> + */ + +#ifndef TCMALLOC_TCMALLOC_H_ +#define TCMALLOC_TCMALLOC_H_ + +#include <stddef.h> /* for size_t */ + +/* Define the version number so folks can check against it */ +#define TC_VERSION_MAJOR 2 +#define TC_VERSION_MINOR 5 +#define TC_VERSION_PATCH "" +#define TC_VERSION_STRING "gperftools 2.5" + +/* For struct mallinfo, if it's defined. */ +#ifdef HAVE_STRUCT_MALLINFO +# include <malloc.h> +#endif + +#ifdef __cplusplus +#define PERFTOOLS_THROW throw() +#else +# ifdef __GNUC__ +# define PERFTOOLS_THROW __attribute__((__nothrow__)) +# else +# define PERFTOOLS_THROW +# endif +#endif + +#ifndef PERFTOOLS_DLL_DECL +#define PERFTOOLS_DLL_DECL_DEFINED +# ifdef _WIN32 +# define PERFTOOLS_DLL_DECL __declspec(dllimport) +# else +# define PERFTOOLS_DLL_DECL +# endif +#endif + +#ifdef __cplusplus +namespace std { +struct nothrow_t; +} + +extern "C" { +#endif + /* + * Returns a human-readable version string. If major, minor, + * and/or patch are not NULL, they are set to the major version, + * minor version, and patch-code (a string, usually ""). + */ + PERFTOOLS_DLL_DECL const char* tc_version(int* major, int* minor, + const char** patch) PERFTOOLS_THROW; + + PERFTOOLS_DLL_DECL void* tc_malloc(size_t size) PERFTOOLS_THROW; + PERFTOOLS_DLL_DECL void* tc_malloc_skip_new_handler(size_t size) PERFTOOLS_THROW; + PERFTOOLS_DLL_DECL void tc_free(void* ptr) PERFTOOLS_THROW; + PERFTOOLS_DLL_DECL void tc_free_sized(void *ptr, size_t size) PERFTOOLS_THROW; + PERFTOOLS_DLL_DECL void* tc_realloc(void* ptr, size_t size) PERFTOOLS_THROW; + PERFTOOLS_DLL_DECL void* tc_calloc(size_t nmemb, size_t size) PERFTOOLS_THROW; + PERFTOOLS_DLL_DECL void tc_cfree(void* ptr) PERFTOOLS_THROW; + + PERFTOOLS_DLL_DECL void* tc_memalign(size_t __alignment, + size_t __size) PERFTOOLS_THROW; + PERFTOOLS_DLL_DECL int tc_posix_memalign(void** ptr, + size_t align, size_t size) PERFTOOLS_THROW; + PERFTOOLS_DLL_DECL void* tc_valloc(size_t __size) PERFTOOLS_THROW; + PERFTOOLS_DLL_DECL void* tc_pvalloc(size_t __size) PERFTOOLS_THROW; + + PERFTOOLS_DLL_DECL void tc_malloc_stats(void) PERFTOOLS_THROW; + PERFTOOLS_DLL_DECL int tc_mallopt(int cmd, int value) PERFTOOLS_THROW; +#ifdef HAVE_STRUCT_MALLINFO + PERFTOOLS_DLL_DECL struct mallinfo tc_mallinfo(void) PERFTOOLS_THROW; +#endif + + /* + * This is an alias for MallocExtension::instance()->GetAllocatedSize(). + * It is equivalent to + * OS X: malloc_size() + * glibc: malloc_usable_size() + * Windows: _msize() + */ + PERFTOOLS_DLL_DECL size_t tc_malloc_size(void* ptr) PERFTOOLS_THROW; + +#ifdef __cplusplus + PERFTOOLS_DLL_DECL int tc_set_new_mode(int flag) PERFTOOLS_THROW; + PERFTOOLS_DLL_DECL void* tc_new(size_t size); + PERFTOOLS_DLL_DECL void* tc_new_nothrow(size_t size, + const std::nothrow_t&) PERFTOOLS_THROW; + PERFTOOLS_DLL_DECL void tc_delete(void* p) PERFTOOLS_THROW; + PERFTOOLS_DLL_DECL void tc_delete_sized(void* p, size_t size) throw(); + PERFTOOLS_DLL_DECL void tc_delete_nothrow(void* p, + const std::nothrow_t&) PERFTOOLS_THROW; + PERFTOOLS_DLL_DECL void* tc_newarray(size_t size); + PERFTOOLS_DLL_DECL void* tc_newarray_nothrow(size_t size, + const std::nothrow_t&) PERFTOOLS_THROW; + PERFTOOLS_DLL_DECL void tc_deletearray(void* p) PERFTOOLS_THROW; + PERFTOOLS_DLL_DECL void tc_deletearray_sized(void* p, size_t size) throw(); + PERFTOOLS_DLL_DECL void tc_deletearray_nothrow(void* p, + const std::nothrow_t&) PERFTOOLS_THROW; +} +#endif + +/* We're only un-defining those for public */ +#if !defined(GPERFTOOLS_CONFIG_H_) + +#undef PERFTOOLS_THROW + +#ifdef PERFTOOLS_DLL_DECL_DEFINED +#undef PERFTOOLS_DLL_DECL +#undef PERFTOOLS_DLL_DECL_DEFINED +#endif + +#endif /* GPERFTOOLS_CONFIG_H_ */ + +#endif /* #ifndef TCMALLOC_TCMALLOC_H_ */ diff --git a/src/third_party/gperftools-2.5/src/gperftools/tcmalloc.h.in b/src/third_party/gperftools-2.5/src/gperftools/tcmalloc.h.in new file mode 100644 index 00000000000..adf04b437ec --- /dev/null +++ b/src/third_party/gperftools-2.5/src/gperftools/tcmalloc.h.in @@ -0,0 +1,147 @@ +// -*- Mode: C; c-basic-offset: 2; indent-tabs-mode: nil -*- +/* Copyright (c) 2003, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: Sanjay Ghemawat <opensource@google.com> + * .h file by Craig Silverstein <opensource@google.com> + */ + +#ifndef TCMALLOC_TCMALLOC_H_ +#define TCMALLOC_TCMALLOC_H_ + +#include <stddef.h> /* for size_t */ + +/* Define the version number so folks can check against it */ +#define TC_VERSION_MAJOR @TC_VERSION_MAJOR@ +#define TC_VERSION_MINOR @TC_VERSION_MINOR@ +#define TC_VERSION_PATCH "@TC_VERSION_PATCH@" +#define TC_VERSION_STRING "gperftools @TC_VERSION_MAJOR@.@TC_VERSION_MINOR@@TC_VERSION_PATCH@" + +/* For struct mallinfo, if it's defined. */ +#if @ac_cv_have_struct_mallinfo@ +# include <malloc.h> +#endif + +#ifdef __cplusplus +#define PERFTOOLS_THROW throw() +#else +# ifdef __GNUC__ +# define PERFTOOLS_THROW __attribute__((__nothrow__)) +# else +# define PERFTOOLS_THROW +# endif +#endif + +#ifndef PERFTOOLS_DLL_DECL +#define PERFTOOLS_DLL_DECL_DEFINED +# ifdef _WIN32 +# define PERFTOOLS_DLL_DECL __declspec(dllimport) +# else +# define PERFTOOLS_DLL_DECL +# endif +#endif + +#ifdef __cplusplus +namespace std { +struct nothrow_t; +} + +extern "C" { +#endif + /* + * Returns a human-readable version string. If major, minor, + * and/or patch are not NULL, they are set to the major version, + * minor version, and patch-code (a string, usually ""). + */ + PERFTOOLS_DLL_DECL const char* tc_version(int* major, int* minor, + const char** patch) PERFTOOLS_THROW; + + PERFTOOLS_DLL_DECL void* tc_malloc(size_t size) PERFTOOLS_THROW; + PERFTOOLS_DLL_DECL void* tc_malloc_skip_new_handler(size_t size) PERFTOOLS_THROW; + PERFTOOLS_DLL_DECL void tc_free(void* ptr) PERFTOOLS_THROW; + PERFTOOLS_DLL_DECL void tc_free_sized(void *ptr, size_t size) PERFTOOLS_THROW; + PERFTOOLS_DLL_DECL void* tc_realloc(void* ptr, size_t size) PERFTOOLS_THROW; + PERFTOOLS_DLL_DECL void* tc_calloc(size_t nmemb, size_t size) PERFTOOLS_THROW; + PERFTOOLS_DLL_DECL void tc_cfree(void* ptr) PERFTOOLS_THROW; + + PERFTOOLS_DLL_DECL void* tc_memalign(size_t __alignment, + size_t __size) PERFTOOLS_THROW; + PERFTOOLS_DLL_DECL int tc_posix_memalign(void** ptr, + size_t align, size_t size) PERFTOOLS_THROW; + PERFTOOLS_DLL_DECL void* tc_valloc(size_t __size) PERFTOOLS_THROW; + PERFTOOLS_DLL_DECL void* tc_pvalloc(size_t __size) PERFTOOLS_THROW; + + PERFTOOLS_DLL_DECL void tc_malloc_stats(void) PERFTOOLS_THROW; + PERFTOOLS_DLL_DECL int tc_mallopt(int cmd, int value) PERFTOOLS_THROW; +#if @ac_cv_have_struct_mallinfo@ + PERFTOOLS_DLL_DECL struct mallinfo tc_mallinfo(void) PERFTOOLS_THROW; +#endif + + /* + * This is an alias for MallocExtension::instance()->GetAllocatedSize(). + * It is equivalent to + * OS X: malloc_size() + * glibc: malloc_usable_size() + * Windows: _msize() + */ + PERFTOOLS_DLL_DECL size_t tc_malloc_size(void* ptr) PERFTOOLS_THROW; + +#ifdef __cplusplus + PERFTOOLS_DLL_DECL int tc_set_new_mode(int flag) PERFTOOLS_THROW; + PERFTOOLS_DLL_DECL void* tc_new(size_t size); + PERFTOOLS_DLL_DECL void* tc_new_nothrow(size_t size, + const std::nothrow_t&) PERFTOOLS_THROW; + PERFTOOLS_DLL_DECL void tc_delete(void* p) PERFTOOLS_THROW; + PERFTOOLS_DLL_DECL void tc_delete_sized(void* p, size_t size) throw(); + PERFTOOLS_DLL_DECL void tc_delete_nothrow(void* p, + const std::nothrow_t&) PERFTOOLS_THROW; + PERFTOOLS_DLL_DECL void* tc_newarray(size_t size); + PERFTOOLS_DLL_DECL void* tc_newarray_nothrow(size_t size, + const std::nothrow_t&) PERFTOOLS_THROW; + PERFTOOLS_DLL_DECL void tc_deletearray(void* p) PERFTOOLS_THROW; + PERFTOOLS_DLL_DECL void tc_deletearray_sized(void* p, size_t size) throw(); + PERFTOOLS_DLL_DECL void tc_deletearray_nothrow(void* p, + const std::nothrow_t&) PERFTOOLS_THROW; +} +#endif + +/* We're only un-defining those for public */ +#if !defined(GPERFTOOLS_CONFIG_H_) + +#undef PERFTOOLS_THROW + +#ifdef PERFTOOLS_DLL_DECL_DEFINED +#undef PERFTOOLS_DLL_DECL +#undef PERFTOOLS_DLL_DECL_DEFINED +#endif + +#endif /* GPERFTOOLS_CONFIG_H_ */ + +#endif /* #ifndef TCMALLOC_TCMALLOC_H_ */ diff --git a/src/third_party/gperftools-2.5/src/gperftools/tcmalloc.hbak b/src/third_party/gperftools-2.5/src/gperftools/tcmalloc.hbak new file mode 100644 index 00000000000..ebb59af8472 --- /dev/null +++ b/src/third_party/gperftools-2.5/src/gperftools/tcmalloc.hbak @@ -0,0 +1,147 @@ +// -*- Mode: C; c-basic-offset: 2; indent-tabs-mode: nil -*- +/* Copyright (c) 2003, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: Sanjay Ghemawat <opensource@google.com> + * .h file by Craig Silverstein <opensource@google.com> + */ + +#ifndef TCMALLOC_TCMALLOC_H_ +#define TCMALLOC_TCMALLOC_H_ + +#include <stddef.h> /* for size_t */ + +/* Define the version number so folks can check against it */ +#define TC_VERSION_MAJOR 2 +#define TC_VERSION_MINOR 5 +#define TC_VERSION_PATCH "" +#define TC_VERSION_STRING "gperftools 2.5" + +/* For struct mallinfo, if it's defined. */ +#if HAVE_STRUCT_MALLINFO +# include <malloc.h> +#endif + +#ifdef __cplusplus +#define PERFTOOLS_THROW throw() +#else +# ifdef __GNUC__ +# define PERFTOOLS_THROW __attribute__((__nothrow__)) +# else +# define PERFTOOLS_THROW +# endif +#endif + +#ifndef PERFTOOLS_DLL_DECL +#define PERFTOOLS_DLL_DECL_DEFINED +# ifdef _WIN32 +# define PERFTOOLS_DLL_DECL __declspec(dllimport) +# else +# define PERFTOOLS_DLL_DECL +# endif +#endif + +#ifdef __cplusplus +namespace std { +struct nothrow_t; +} + +extern "C" { +#endif + /* + * Returns a human-readable version string. If major, minor, + * and/or patch are not NULL, they are set to the major version, + * minor version, and patch-code (a string, usually ""). + */ + PERFTOOLS_DLL_DECL const char* tc_version(int* major, int* minor, + const char** patch) PERFTOOLS_THROW; + + PERFTOOLS_DLL_DECL void* tc_malloc(size_t size) PERFTOOLS_THROW; + PERFTOOLS_DLL_DECL void* tc_malloc_skip_new_handler(size_t size) PERFTOOLS_THROW; + PERFTOOLS_DLL_DECL void tc_free(void* ptr) PERFTOOLS_THROW; + PERFTOOLS_DLL_DECL void tc_free_sized(void *ptr, size_t size) PERFTOOLS_THROW; + PERFTOOLS_DLL_DECL void* tc_realloc(void* ptr, size_t size) PERFTOOLS_THROW; + PERFTOOLS_DLL_DECL void* tc_calloc(size_t nmemb, size_t size) PERFTOOLS_THROW; + PERFTOOLS_DLL_DECL void tc_cfree(void* ptr) PERFTOOLS_THROW; + + PERFTOOLS_DLL_DECL void* tc_memalign(size_t __alignment, + size_t __size) PERFTOOLS_THROW; + PERFTOOLS_DLL_DECL int tc_posix_memalign(void** ptr, + size_t align, size_t size) PERFTOOLS_THROW; + PERFTOOLS_DLL_DECL void* tc_valloc(size_t __size) PERFTOOLS_THROW; + PERFTOOLS_DLL_DECL void* tc_pvalloc(size_t __size) PERFTOOLS_THROW; + + PERFTOOLS_DLL_DECL void tc_malloc_stats(void) PERFTOOLS_THROW; + PERFTOOLS_DLL_DECL int tc_mallopt(int cmd, int value) PERFTOOLS_THROW; +#if HAVE_STRUCT_MALLINFO + PERFTOOLS_DLL_DECL struct mallinfo tc_mallinfo(void) PERFTOOLS_THROW; +#endif + + /* + * This is an alias for MallocExtension::instance()->GetAllocatedSize(). + * It is equivalent to + * OS X: malloc_size() + * glibc: malloc_usable_size() + * Windows: _msize() + */ + PERFTOOLS_DLL_DECL size_t tc_malloc_size(void* ptr) PERFTOOLS_THROW; + +#ifdef __cplusplus + PERFTOOLS_DLL_DECL int tc_set_new_mode(int flag) PERFTOOLS_THROW; + PERFTOOLS_DLL_DECL void* tc_new(size_t size); + PERFTOOLS_DLL_DECL void* tc_new_nothrow(size_t size, + const std::nothrow_t&) PERFTOOLS_THROW; + PERFTOOLS_DLL_DECL void tc_delete(void* p) PERFTOOLS_THROW; + PERFTOOLS_DLL_DECL void tc_delete_sized(void* p, size_t size) throw(); + PERFTOOLS_DLL_DECL void tc_delete_nothrow(void* p, + const std::nothrow_t&) PERFTOOLS_THROW; + PERFTOOLS_DLL_DECL void* tc_newarray(size_t size); + PERFTOOLS_DLL_DECL void* tc_newarray_nothrow(size_t size, + const std::nothrow_t&) PERFTOOLS_THROW; + PERFTOOLS_DLL_DECL void tc_deletearray(void* p) PERFTOOLS_THROW; + PERFTOOLS_DLL_DECL void tc_deletearray_sized(void* p, size_t size) throw(); + PERFTOOLS_DLL_DECL void tc_deletearray_nothrow(void* p, + const std::nothrow_t&) PERFTOOLS_THROW; +} +#endif + +/* We're only un-defining those for public */ +#if !defined(GPERFTOOLS_CONFIG_H_) + +#undef PERFTOOLS_THROW + +#ifdef PERFTOOLS_DLL_DECL_DEFINED +#undef PERFTOOLS_DLL_DECL +#undef PERFTOOLS_DLL_DECL_DEFINED +#endif + +#endif /* GPERFTOOLS_CONFIG_H_ */ + +#endif /* #ifndef TCMALLOC_TCMALLOC_H_ */ diff --git a/src/third_party/gperftools-2.5/src/heap-checker-bcad.cc b/src/third_party/gperftools-2.5/src/heap-checker-bcad.cc new file mode 100644 index 00000000000..00efdb7cfd4 --- /dev/null +++ b/src/third_party/gperftools-2.5/src/heap-checker-bcad.cc @@ -0,0 +1,93 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// All Rights Reserved. +// +// Author: Maxim Lifantsev +// +// A file to ensure that components of heap leak checker run before +// all global object constructors and after all global object +// destructors. +// +// This file must be the last library any binary links against. +// Otherwise, the heap checker may not be able to run early enough to +// catalog all the global objects in your program. If this happens, +// and later in the program you allocate memory and have one of these +// "uncataloged" global objects point to it, the heap checker will +// consider that allocation to be a leak, even though it's not (since +// the allocated object is reachable from global data and hence "live"). + +#include <stdlib.h> // for abort() +#include <gperftools/malloc_extension.h> + +// A dummy variable to refer from heap-checker.cc. This is to make +// sure this file is not optimized out by the linker. +bool heap_leak_checker_bcad_variable; + +extern void HeapLeakChecker_AfterDestructors(); // in heap-checker.cc + +// A helper class to ensure that some components of heap leak checking +// can happen before construction and after destruction +// of all global/static objects. +class HeapLeakCheckerGlobalPrePost { + public: + HeapLeakCheckerGlobalPrePost() { + if (count_ == 0) { + // The 'new int' will ensure that we have run an initial malloc + // hook, which will set up the heap checker via + // MallocHook_InitAtFirstAllocation_HeapLeakChecker. See malloc_hook.cc. + // This is done in this roundabout fashion in order to avoid self-deadlock + // if we directly called HeapLeakChecker_BeforeConstructors here. + delete new int; + // This needs to be called before the first allocation of an STL + // object, but after libc is done setting up threads (because it + // calls setenv, which requires a thread-aware errno). By + // putting it here, we hope it's the first bit of code executed + // after the libc global-constructor code. + MallocExtension::Initialize(); + } + ++count_; + } + ~HeapLeakCheckerGlobalPrePost() { + if (count_ <= 0) abort(); + --count_; + if (count_ == 0) HeapLeakChecker_AfterDestructors(); + } + private: + // Counter of constructions/destructions of objects of this class + // (just in case there are more than one of them). + static int count_; +}; + +int HeapLeakCheckerGlobalPrePost::count_ = 0; + +// The early-construction/late-destruction global object. +static const HeapLeakCheckerGlobalPrePost heap_leak_checker_global_pre_post; diff --git a/src/third_party/gperftools-2.5/src/heap-checker.cc b/src/third_party/gperftools-2.5/src/heap-checker.cc new file mode 100755 index 00000000000..9c82dea08e4 --- /dev/null +++ b/src/third_party/gperftools-2.5/src/heap-checker.cc @@ -0,0 +1,2388 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// All Rights Reserved. +// +// Author: Maxim Lifantsev +// + +#include "config.h" + +#include <fcntl.h> // for O_RDONLY (we use syscall to do actual reads) +#include <string.h> +#include <errno.h> +#ifdef HAVE_UNISTD_H +#include <unistd.h> +#endif +#ifdef HAVE_MMAP +#include <sys/mman.h> +#endif +#ifdef HAVE_PTHREAD +#include <pthread.h> +#endif +#include <sys/stat.h> +#include <sys/types.h> +#include <time.h> +#include <assert.h> + +#if defined(HAVE_LINUX_PTRACE_H) +#include <linux/ptrace.h> +#endif +#ifdef HAVE_SYS_SYSCALL_H +#include <sys/syscall.h> +#endif +#if defined(_WIN32) || defined(__CYGWIN__) || defined(__CYGWIN32__) || defined(__MINGW32__) +#include <wtypes.h> +#include <winbase.h> +#undef ERROR // windows defines these as macros, which can cause trouble +#undef max +#undef min +#endif + +#include <string> +#include <vector> +#include <map> +#include <set> +#include <algorithm> +#include <functional> + +#include <gperftools/heap-checker.h> + +#include "base/basictypes.h" +#include "base/googleinit.h" +#include "base/logging.h" +#include <gperftools/stacktrace.h> +#include "base/commandlineflags.h" +#include "base/elfcore.h" // for i386_regs +#include "base/thread_lister.h" +#include "heap-profile-table.h" +#include "base/low_level_alloc.h" +#include "malloc_hook-inl.h" +#include <gperftools/malloc_hook.h> +#include <gperftools/malloc_extension.h> +#include "maybe_threads.h" +#include "memory_region_map.h" +#include "base/spinlock.h" +#include "base/sysinfo.h" +#include "base/stl_allocator.h" + +using std::string; +using std::basic_string; +using std::pair; +using std::map; +using std::set; +using std::vector; +using std::swap; +using std::make_pair; +using std::min; +using std::max; +using std::less; +using std::char_traits; + +// If current process is being ptrace()d, 'TracerPid' in /proc/self/status +// will be non-zero. +static bool IsDebuggerAttached(void) { // only works under linux, probably + char buf[256]; // TracerPid comes relatively earlier in status output + int fd = open("/proc/self/status", O_RDONLY); + if (fd == -1) { + return false; // Can't tell for sure. + } + const int len = read(fd, buf, sizeof(buf)); + bool rc = false; + if (len > 0) { + const char *const kTracerPid = "TracerPid:\t"; + buf[len - 1] = '\0'; + const char *p = strstr(buf, kTracerPid); + if (p != NULL) { + rc = (strncmp(p + strlen(kTracerPid), "0\n", 2) != 0); + } + } + close(fd); + return rc; +} + +// This is the default if you don't link in -lprofiler +extern "C" { +ATTRIBUTE_WEAK PERFTOOLS_DLL_DECL bool ProfilingIsEnabledForAllThreads(); +bool ProfilingIsEnabledForAllThreads() { return false; } +} + +//---------------------------------------------------------------------- +// Flags that control heap-checking +//---------------------------------------------------------------------- + +DEFINE_string(heap_check, + EnvToString("HEAPCHECK", ""), + "The heap leak checking to be done over the whole executable: " + "\"minimal\", \"normal\", \"strict\", " + "\"draconian\", \"as-is\", and \"local\" " + " or the empty string are the supported choices. " + "(See HeapLeakChecker_InternalInitStart for details.)"); + +DEFINE_bool(heap_check_report, true, "Obsolete"); + +DEFINE_bool(heap_check_before_constructors, + true, + "deprecated; pretty much always true now"); + +DEFINE_bool(heap_check_after_destructors, + EnvToBool("HEAP_CHECK_AFTER_DESTRUCTORS", false), + "If overall heap check is to end after global destructors " + "or right after all REGISTER_HEAPCHECK_CLEANUP's"); + +DEFINE_bool(heap_check_strict_check, true, "Obsolete"); + +DEFINE_bool(heap_check_ignore_global_live, + EnvToBool("HEAP_CHECK_IGNORE_GLOBAL_LIVE", true), + "If overall heap check is to ignore heap objects reachable " + "from the global data"); + +DEFINE_bool(heap_check_identify_leaks, + EnvToBool("HEAP_CHECK_IDENTIFY_LEAKS", false), + "If heap check should generate the addresses of the leaked " + "objects in the memory leak profiles. This may be useful " + "in tracking down leaks where only a small fraction of " + "objects allocated at the same stack trace are leaked."); + +DEFINE_bool(heap_check_ignore_thread_live, + EnvToBool("HEAP_CHECK_IGNORE_THREAD_LIVE", true), + "If set to true, objects reachable from thread stacks " + "and registers are not reported as leaks"); + +DEFINE_bool(heap_check_test_pointer_alignment, + EnvToBool("HEAP_CHECK_TEST_POINTER_ALIGNMENT", false), + "Set to true to check if the found leak can be due to " + "use of unaligned pointers"); + +// Alignment at which all pointers in memory are supposed to be located; +// use 1 if any alignment is ok. +// heap_check_test_pointer_alignment flag guides if we try the value of 1. +// The larger it can be, the lesser is the chance of missing real leaks. +static const size_t kPointerSourceAlignment = sizeof(void*); +DEFINE_int32(heap_check_pointer_source_alignment, + EnvToInt("HEAP_CHECK_POINTER_SOURCE_ALIGNMENT", + kPointerSourceAlignment), + "Alignment at which all pointers in memory are supposed to be " + "located. Use 1 if any alignment is ok."); + +// A reasonable default to handle pointers inside of typical class objects: +// Too low and we won't be able to traverse pointers to normally-used +// nested objects and base parts of multiple-inherited objects. +// Too high and it will both slow down leak checking (FindInsideAlloc +// in HaveOnHeapLocked will get slower when there are large on-heap objects) +// and make it probabilistically more likely to miss leaks +// of large-sized objects. +static const int64 kHeapCheckMaxPointerOffset = 1024; +DEFINE_int64(heap_check_max_pointer_offset, + EnvToInt("HEAP_CHECK_MAX_POINTER_OFFSET", + kHeapCheckMaxPointerOffset), + "Largest pointer offset for which we traverse " + "pointers going inside of heap allocated objects. " + "Set to -1 to use the actual largest heap object size."); + +DEFINE_bool(heap_check_run_under_gdb, + EnvToBool("HEAP_CHECK_RUN_UNDER_GDB", false), + "If false, turns off heap-checking library when running under gdb " + "(normally, set to 'true' only when debugging the heap-checker)"); + +DEFINE_int32(heap_check_delay_seconds, 0, + "Number of seconds to delay on-exit heap checking." + " If you set this flag," + " you may also want to set exit_timeout_seconds in order to" + " avoid exit timeouts.\n" + "NOTE: This flag is to be used only to help diagnose issues" + " where it is suspected that the heap checker is reporting" + " false leaks that will disappear if the heap checker delays" + " its checks. Report any such issues to the heap-checker" + " maintainer(s)."); + +//---------------------------------------------------------------------- + +DEFINE_string(heap_profile_pprof, + EnvToString("PPROF_PATH", "pprof"), + "OBSOLETE; not used"); + +DEFINE_string(heap_check_dump_directory, + EnvToString("HEAP_CHECK_DUMP_DIRECTORY", "/tmp"), + "Directory to put heap-checker leak dump information"); + + +//---------------------------------------------------------------------- +// HeapLeakChecker global data +//---------------------------------------------------------------------- + +// Global lock for all the global data of this module. +static SpinLock heap_checker_lock(SpinLock::LINKER_INITIALIZED); + +//---------------------------------------------------------------------- + +// Heap profile prefix for leak checking profiles. +// Gets assigned once when leak checking is turned on, then never modified. +static const string* profile_name_prefix = NULL; + +// Whole-program heap leak checker. +// Gets assigned once when leak checking is turned on, +// then main_heap_checker is never deleted. +static HeapLeakChecker* main_heap_checker = NULL; + +// Whether we will use main_heap_checker to do a check at program exit +// automatically. In any case user can ask for more checks on main_heap_checker +// via GlobalChecker(). +static bool do_main_heap_check = false; + +// The heap profile we use to collect info about the heap. +// This is created in HeapLeakChecker::BeforeConstructorsLocked +// together with setting heap_checker_on (below) to true +// and registering our new/delete malloc hooks; +// similarly all are unset in HeapLeakChecker::TurnItselfOffLocked. +static HeapProfileTable* heap_profile = NULL; + +// If we are doing (or going to do) any kind of heap-checking. +static bool heap_checker_on = false; + +// pid of the process that does whole-program heap leak checking +static pid_t heap_checker_pid = 0; + +// If we did heap profiling during global constructors execution +static bool constructor_heap_profiling = false; + +// RAW_VLOG level we dump key INFO messages at. If you want to turn +// off these messages, set the environment variable PERFTOOLS_VERBOSE=-1. +static const int heap_checker_info_level = 0; + +//---------------------------------------------------------------------- +// HeapLeakChecker's own memory allocator that is +// independent of the normal program allocator. +//---------------------------------------------------------------------- + +// Wrapper of LowLevelAlloc for STL_Allocator and direct use. +// We always access this class under held heap_checker_lock, +// this allows us to in particular protect the period when threads are stopped +// at random spots with TCMalloc_ListAllProcessThreads by heap_checker_lock, +// w/o worrying about the lock in LowLevelAlloc::Arena. +// We rely on the fact that we use an own arena with an own lock here. +class HeapLeakChecker::Allocator { + public: + static void Init() { + RAW_DCHECK(heap_checker_lock.IsHeld(), ""); + RAW_DCHECK(arena_ == NULL, ""); + arena_ = LowLevelAlloc::NewArena(0, LowLevelAlloc::DefaultArena()); + } + static void Shutdown() { + RAW_DCHECK(heap_checker_lock.IsHeld(), ""); + if (!LowLevelAlloc::DeleteArena(arena_) || alloc_count_ != 0) { + RAW_LOG(FATAL, "Internal heap checker leak of %d objects", alloc_count_); + } + } + static int alloc_count() { + RAW_DCHECK(heap_checker_lock.IsHeld(), ""); + return alloc_count_; + } + static void* Allocate(size_t n) { + RAW_DCHECK(arena_ && heap_checker_lock.IsHeld(), ""); + void* p = LowLevelAlloc::AllocWithArena(n, arena_); + if (p) alloc_count_ += 1; + return p; + } + static void Free(void* p) { + RAW_DCHECK(heap_checker_lock.IsHeld(), ""); + if (p) alloc_count_ -= 1; + LowLevelAlloc::Free(p); + } + static void Free(void* p, size_t /* n */) { + Free(p); + } + // destruct, free, and make *p to be NULL + template<typename T> static void DeleteAndNull(T** p) { + (*p)->~T(); + Free(*p); + *p = NULL; + } + template<typename T> static void DeleteAndNullIfNot(T** p) { + if (*p != NULL) DeleteAndNull(p); + } + private: + static LowLevelAlloc::Arena* arena_; + static int alloc_count_; +}; + +LowLevelAlloc::Arena* HeapLeakChecker::Allocator::arena_ = NULL; +int HeapLeakChecker::Allocator::alloc_count_ = 0; + +//---------------------------------------------------------------------- +// HeapLeakChecker live object tracking components +//---------------------------------------------------------------------- + +// Cases of live object placement we distinguish +enum ObjectPlacement { + MUST_BE_ON_HEAP, // Must point to a live object of the matching size in the + // heap_profile map of the heap when we get to it + IGNORED_ON_HEAP, // Is a live (ignored) object on heap + MAYBE_LIVE, // Is a piece of writable memory from /proc/self/maps + IN_GLOBAL_DATA, // Is part of global data region of the executable + THREAD_DATA, // Part of a thread stack and a thread descriptor with TLS + THREAD_REGISTERS, // Values in registers of some thread +}; + +// Information about an allocated object +struct AllocObject { + const void* ptr; // the object + uintptr_t size; // its size + ObjectPlacement place; // where ptr points to + + AllocObject(const void* p, size_t s, ObjectPlacement l) + : ptr(p), size(s), place(l) { } +}; + +// All objects (memory ranges) ignored via HeapLeakChecker::IgnoreObject +// Key is the object's address; value is its size. +typedef map<uintptr_t, size_t, less<uintptr_t>, + STL_Allocator<pair<const uintptr_t, size_t>, + HeapLeakChecker::Allocator> + > IgnoredObjectsMap; +static IgnoredObjectsMap* ignored_objects = NULL; + +// All objects (memory ranges) that we consider to be the sources of pointers +// to live (not leaked) objects. +// At different times this holds (what can be reached from) global data regions +// and the objects we've been told to ignore. +// For any AllocObject::ptr "live_objects" is supposed to contain at most one +// record at any time. We maintain this by checking with the heap_profile map +// of the heap and removing the live heap objects we've handled from it. +// This vector is maintained as a stack and the frontier of reachable +// live heap objects in our flood traversal of them. +typedef vector<AllocObject, + STL_Allocator<AllocObject, HeapLeakChecker::Allocator> + > LiveObjectsStack; +static LiveObjectsStack* live_objects = NULL; + +// A special string type that uses my allocator +typedef basic_string<char, char_traits<char>, + STL_Allocator<char, HeapLeakChecker::Allocator> + > HCL_string; + +// A placeholder to fill-in the starting values for live_objects +// for each library so we can keep the library-name association for logging. +typedef map<HCL_string, LiveObjectsStack, less<HCL_string>, + STL_Allocator<pair<const HCL_string, LiveObjectsStack>, + HeapLeakChecker::Allocator> + > LibraryLiveObjectsStacks; +static LibraryLiveObjectsStacks* library_live_objects = NULL; + +// Value stored in the map of disabled address ranges; +// its key is the end of the address range. +// We'll ignore allocations with a return address in a disabled range +// if the address occurs at 'max_depth' or less in the stack trace. +struct HeapLeakChecker::RangeValue { + uintptr_t start_address; // the start of the range + int max_depth; // the maximal stack depth to disable at +}; +typedef map<uintptr_t, HeapLeakChecker::RangeValue, less<uintptr_t>, + STL_Allocator<pair<const uintptr_t, HeapLeakChecker::RangeValue>, + HeapLeakChecker::Allocator> + > DisabledRangeMap; +// The disabled program counter address ranges for profile dumping +// that are registered with HeapLeakChecker::DisableChecksFromToLocked. +static DisabledRangeMap* disabled_ranges = NULL; + +// Set of stack tops. +// These are used to consider live only appropriate chunks of the memory areas +// that are used for stacks (and maybe thread-specific data as well) +// so that we do not treat pointers from outdated stack frames as live. +typedef set<uintptr_t, less<uintptr_t>, + STL_Allocator<uintptr_t, HeapLeakChecker::Allocator> + > StackTopSet; +static StackTopSet* stack_tops = NULL; + +// A map of ranges of code addresses for the system libraries +// that can mmap/mremap/sbrk-allocate memory regions for stacks +// and thread-local storage that we want to consider as live global data. +// Maps from the end address to the start address. +typedef map<uintptr_t, uintptr_t, less<uintptr_t>, + STL_Allocator<pair<const uintptr_t, uintptr_t>, + HeapLeakChecker::Allocator> + > GlobalRegionCallerRangeMap; +static GlobalRegionCallerRangeMap* global_region_caller_ranges = NULL; + +// TODO(maxim): make our big data structs into own modules + +// Disabler is implemented by keeping track of a per-thread count +// of active Disabler objects. Any objects allocated while the +// count > 0 are not reported. + +#ifdef HAVE_TLS + +static __thread int thread_disable_counter +// The "inital exec" model is faster than the default TLS model, at +// the cost you can't dlopen this library. But dlopen on heap-checker +// doesn't work anyway -- it must run before main -- so this is a good +// trade-off. +# ifdef HAVE___ATTRIBUTE__ + __attribute__ ((tls_model ("initial-exec"))) +# endif + ; +inline int get_thread_disable_counter() { + return thread_disable_counter; +} +inline void set_thread_disable_counter(int value) { + thread_disable_counter = value; +} + +#else // #ifdef HAVE_TLS + +static pthread_key_t thread_disable_counter_key; +static int main_thread_counter; // storage for use before main() +static bool use_main_thread_counter = true; + +// TODO(csilvers): this is called from NewHook, in the middle of malloc(). +// If perftools_pthread_getspecific calls malloc, that will lead to an +// infinite loop. I don't know how to fix that, so I hope it never happens! +inline int get_thread_disable_counter() { + if (use_main_thread_counter) // means we're running really early + return main_thread_counter; + void* p = perftools_pthread_getspecific(thread_disable_counter_key); + return (intptr_t)p; // kinda evil: store the counter directly in the void* +} + +inline void set_thread_disable_counter(int value) { + if (use_main_thread_counter) { // means we're running really early + main_thread_counter = value; + return; + } + intptr_t pointer_sized_value = value; + // kinda evil: store the counter directly in the void* + void* p = (void*)pointer_sized_value; + // NOTE: this may call malloc, which will call NewHook which will call + // get_thread_disable_counter() which will call pthread_getspecific(). I + // don't know if anything bad can happen if we call getspecific() in the + // middle of a setspecific() call. It seems to work ok in practice... + perftools_pthread_setspecific(thread_disable_counter_key, p); +} + +// The idea here is that this initializer will run pretty late: after +// pthreads have been totally set up. At this point we can call +// pthreads routines, so we set those up. +class InitThreadDisableCounter { + public: + InitThreadDisableCounter() { + perftools_pthread_key_create(&thread_disable_counter_key, NULL); + // Set up the main thread's value, which we have a special variable for. + void* p = (void*)main_thread_counter; // store the counter directly + perftools_pthread_setspecific(thread_disable_counter_key, p); + use_main_thread_counter = false; + } +}; +InitThreadDisableCounter init_thread_disable_counter; + +#endif // #ifdef HAVE_TLS + +HeapLeakChecker::Disabler::Disabler() { + // It is faster to unconditionally increment the thread-local + // counter than to check whether or not heap-checking is on + // in a thread-safe manner. + int counter = get_thread_disable_counter(); + set_thread_disable_counter(counter + 1); + RAW_VLOG(10, "Increasing thread disable counter to %d", counter + 1); +} + +HeapLeakChecker::Disabler::~Disabler() { + int counter = get_thread_disable_counter(); + RAW_DCHECK(counter > 0, ""); + if (counter > 0) { + set_thread_disable_counter(counter - 1); + RAW_VLOG(10, "Decreasing thread disable counter to %d", counter); + } else { + RAW_VLOG(0, "Thread disable counter underflow : %d", counter); + } +} + +//---------------------------------------------------------------------- + +// The size of the largest heap object allocated so far. +static size_t max_heap_object_size = 0; +// The possible range of addresses that can point +// into one of the elements of heap_objects. +static uintptr_t min_heap_address = uintptr_t(-1LL); +static uintptr_t max_heap_address = 0; + +//---------------------------------------------------------------------- + +// Simple casting helpers for uintptr_t and void*: +template<typename T> +inline static const void* AsPtr(T addr) { + return reinterpret_cast<void*>(addr); +} +inline static uintptr_t AsInt(const void* ptr) { + return reinterpret_cast<uintptr_t>(ptr); +} + +//---------------------------------------------------------------------- + +// We've seen reports that strstr causes heap-checker crashes in some +// libc's (?): +// http://code.google.com/p/gperftools/issues/detail?id=263 +// It's simple enough to use our own. This is not in time-critical code. +static const char* hc_strstr(const char* s1, const char* s2) { + const size_t len = strlen(s2); + RAW_CHECK(len > 0, "Unexpected empty string passed to strstr()"); + for (const char* p = strchr(s1, *s2); p != NULL; p = strchr(p+1, *s2)) { + if (strncmp(p, s2, len) == 0) { + return p; + } + } + return NULL; +} + +//---------------------------------------------------------------------- + +// Our hooks for MallocHook +static void NewHook(const void* ptr, size_t size) { + if (ptr != NULL) { + const int counter = get_thread_disable_counter(); + const bool ignore = (counter > 0); + RAW_VLOG(16, "Recording Alloc: %p of %" PRIuS "; %d", ptr, size, + int(counter)); + + // Fetch the caller's stack trace before acquiring heap_checker_lock. + void* stack[HeapProfileTable::kMaxStackDepth]; + int depth = HeapProfileTable::GetCallerStackTrace(0, stack); + + { SpinLockHolder l(&heap_checker_lock); + if (size > max_heap_object_size) max_heap_object_size = size; + uintptr_t addr = AsInt(ptr); + if (addr < min_heap_address) min_heap_address = addr; + addr += size; + if (addr > max_heap_address) max_heap_address = addr; + if (heap_checker_on) { + heap_profile->RecordAlloc(ptr, size, depth, stack); + if (ignore) { + heap_profile->MarkAsIgnored(ptr); + } + } + } + RAW_VLOG(17, "Alloc Recorded: %p of %" PRIuS "", ptr, size); + } +} + +static void DeleteHook(const void* ptr) { + if (ptr != NULL) { + RAW_VLOG(16, "Recording Free %p", ptr); + { SpinLockHolder l(&heap_checker_lock); + if (heap_checker_on) heap_profile->RecordFree(ptr); + } + RAW_VLOG(17, "Free Recorded: %p", ptr); + } +} + +//---------------------------------------------------------------------- + +enum StackDirection { + GROWS_TOWARDS_HIGH_ADDRESSES, + GROWS_TOWARDS_LOW_ADDRESSES, + UNKNOWN_DIRECTION +}; + +// Determine which way the stack grows: + +static StackDirection ATTRIBUTE_NOINLINE GetStackDirection( + const uintptr_t *const ptr) { + uintptr_t x; + if (&x < ptr) + return GROWS_TOWARDS_LOW_ADDRESSES; + if (ptr < &x) + return GROWS_TOWARDS_HIGH_ADDRESSES; + + RAW_CHECK(0, ""); // Couldn't determine the stack direction. + + return UNKNOWN_DIRECTION; +} + +// Direction of stack growth (will initialize via GetStackDirection()) +static StackDirection stack_direction = UNKNOWN_DIRECTION; + +// This routine is called for every thread stack we know about to register it. +static void RegisterStackLocked(const void* top_ptr) { + RAW_DCHECK(heap_checker_lock.IsHeld(), ""); + RAW_DCHECK(MemoryRegionMap::LockIsHeld(), ""); + RAW_VLOG(10, "Thread stack at %p", top_ptr); + uintptr_t top = AsInt(top_ptr); + stack_tops->insert(top); // add for later use + + // make sure stack_direction is initialized + if (stack_direction == UNKNOWN_DIRECTION) { + stack_direction = GetStackDirection(&top); + } + + // Find memory region with this stack + MemoryRegionMap::Region region; + if (MemoryRegionMap::FindAndMarkStackRegion(top, ®ion)) { + // Make the proper portion of the stack live: + if (stack_direction == GROWS_TOWARDS_LOW_ADDRESSES) { + RAW_VLOG(11, "Live stack at %p of %" PRIuPTR " bytes", + top_ptr, region.end_addr - top); + live_objects->push_back(AllocObject(top_ptr, region.end_addr - top, + THREAD_DATA)); + } else { // GROWS_TOWARDS_HIGH_ADDRESSES + RAW_VLOG(11, "Live stack at %p of %" PRIuPTR " bytes", + AsPtr(region.start_addr), + top - region.start_addr); + live_objects->push_back(AllocObject(AsPtr(region.start_addr), + top - region.start_addr, + THREAD_DATA)); + } + // not in MemoryRegionMap, look in library_live_objects: + } else if (FLAGS_heap_check_ignore_global_live) { + for (LibraryLiveObjectsStacks::iterator lib = library_live_objects->begin(); + lib != library_live_objects->end(); ++lib) { + for (LiveObjectsStack::iterator span = lib->second.begin(); + span != lib->second.end(); ++span) { + uintptr_t start = AsInt(span->ptr); + uintptr_t end = start + span->size; + if (start <= top && top < end) { + RAW_VLOG(11, "Stack at %p is inside /proc/self/maps chunk %p..%p", + top_ptr, AsPtr(start), AsPtr(end)); + // Shrink start..end region by chopping away the memory regions in + // MemoryRegionMap that land in it to undo merging of regions + // in /proc/self/maps, so that we correctly identify what portion + // of start..end is actually the stack region. + uintptr_t stack_start = start; + uintptr_t stack_end = end; + // can optimize-away this loop, but it does not run often + RAW_DCHECK(MemoryRegionMap::LockIsHeld(), ""); + for (MemoryRegionMap::RegionIterator r = + MemoryRegionMap::BeginRegionLocked(); + r != MemoryRegionMap::EndRegionLocked(); ++r) { + if (top < r->start_addr && r->start_addr < stack_end) { + stack_end = r->start_addr; + } + if (stack_start < r->end_addr && r->end_addr <= top) { + stack_start = r->end_addr; + } + } + if (stack_start != start || stack_end != end) { + RAW_VLOG(11, "Stack at %p is actually inside memory chunk %p..%p", + top_ptr, AsPtr(stack_start), AsPtr(stack_end)); + } + // Make the proper portion of the stack live: + if (stack_direction == GROWS_TOWARDS_LOW_ADDRESSES) { + RAW_VLOG(11, "Live stack at %p of %" PRIuPTR " bytes", + top_ptr, stack_end - top); + live_objects->push_back( + AllocObject(top_ptr, stack_end - top, THREAD_DATA)); + } else { // GROWS_TOWARDS_HIGH_ADDRESSES + RAW_VLOG(11, "Live stack at %p of %" PRIuPTR " bytes", + AsPtr(stack_start), top - stack_start); + live_objects->push_back( + AllocObject(AsPtr(stack_start), top - stack_start, THREAD_DATA)); + } + lib->second.erase(span); // kill the rest of the region + // Put the non-stack part(s) of the region back: + if (stack_start != start) { + lib->second.push_back(AllocObject(AsPtr(start), stack_start - start, + MAYBE_LIVE)); + } + if (stack_end != end) { + lib->second.push_back(AllocObject(AsPtr(stack_end), end - stack_end, + MAYBE_LIVE)); + } + return; + } + } + } + RAW_LOG(ERROR, "Memory region for stack at %p not found. " + "Will likely report false leak positives.", top_ptr); + } +} + +// Iterator for heap allocation map data to make ignored objects "live" +// (i.e., treated as roots for the mark-and-sweep phase) +static void MakeIgnoredObjectsLiveCallbackLocked( + const void* ptr, const HeapProfileTable::AllocInfo& info) { + RAW_DCHECK(heap_checker_lock.IsHeld(), ""); + if (info.ignored) { + live_objects->push_back(AllocObject(ptr, info.object_size, + MUST_BE_ON_HEAP)); + } +} + +// Iterator for heap allocation map data to make objects allocated from +// disabled regions of code to be live. +static void MakeDisabledLiveCallbackLocked( + const void* ptr, const HeapProfileTable::AllocInfo& info) { + RAW_DCHECK(heap_checker_lock.IsHeld(), ""); + bool stack_disable = false; + bool range_disable = false; + for (int depth = 0; depth < info.stack_depth; depth++) { + uintptr_t addr = AsInt(info.call_stack[depth]); + if (disabled_ranges) { + DisabledRangeMap::const_iterator iter + = disabled_ranges->upper_bound(addr); + if (iter != disabled_ranges->end()) { + RAW_DCHECK(iter->first > addr, ""); + if (iter->second.start_address < addr && + iter->second.max_depth > depth) { + range_disable = true; // in range; dropping + break; + } + } + } + } + if (stack_disable || range_disable) { + uintptr_t start_address = AsInt(ptr); + uintptr_t end_address = start_address + info.object_size; + StackTopSet::const_iterator iter + = stack_tops->lower_bound(start_address); + if (iter != stack_tops->end()) { + RAW_DCHECK(*iter >= start_address, ""); + if (*iter < end_address) { + // We do not disable (treat as live) whole allocated regions + // if they are used to hold thread call stacks + // (i.e. when we find a stack inside). + // The reason is that we'll treat as live the currently used + // stack portions anyway (see RegisterStackLocked), + // and the rest of the region where the stack lives can well + // contain outdated stack variables which are not live anymore, + // hence should not be treated as such. + RAW_VLOG(11, "Not %s-disabling %" PRIuS " bytes at %p" + ": have stack inside: %p", + (stack_disable ? "stack" : "range"), + info.object_size, ptr, AsPtr(*iter)); + return; + } + } + RAW_VLOG(11, "%s-disabling %" PRIuS " bytes at %p", + (stack_disable ? "Stack" : "Range"), info.object_size, ptr); + live_objects->push_back(AllocObject(ptr, info.object_size, + MUST_BE_ON_HEAP)); + } +} + +static const char kUnnamedProcSelfMapEntry[] = "UNNAMED"; + +// This function takes some fields from a /proc/self/maps line: +// +// start_address start address of a memory region. +// end_address end address of a memory region +// permissions rwx + private/shared bit +// filename filename of the mapped file +// +// If the region is not writeable, then it cannot have any heap +// pointers in it, otherwise we record it as a candidate live region +// to get filtered later. +static void RecordGlobalDataLocked(uintptr_t start_address, + uintptr_t end_address, + const char* permissions, + const char* filename) { + RAW_DCHECK(heap_checker_lock.IsHeld(), ""); + // Ignore non-writeable regions. + if (strchr(permissions, 'w') == NULL) return; + if (filename == NULL || *filename == '\0') { + filename = kUnnamedProcSelfMapEntry; + } + RAW_VLOG(11, "Looking into %s: 0x%" PRIxPTR "..0x%" PRIxPTR, + filename, start_address, end_address); + (*library_live_objects)[filename]. + push_back(AllocObject(AsPtr(start_address), + end_address - start_address, + MAYBE_LIVE)); +} + +// See if 'library' from /proc/self/maps has base name 'library_base' +// i.e. contains it and has '.' or '-' after it. +static bool IsLibraryNamed(const char* library, const char* library_base) { + const char* p = hc_strstr(library, library_base); + size_t sz = strlen(library_base); + return p != NULL && (p[sz] == '.' || p[sz] == '-'); +} + +// static +void HeapLeakChecker::DisableLibraryAllocsLocked(const char* library, + uintptr_t start_address, + uintptr_t end_address) { + RAW_DCHECK(heap_checker_lock.IsHeld(), ""); + int depth = 0; + // TODO(maxim): maybe this should be extended to also use objdump + // and pick the text portion of the library more precisely. + if (IsLibraryNamed(library, "/libpthread") || + // libpthread has a lot of small "system" leaks we don't care about. + // In particular it allocates memory to store data supplied via + // pthread_setspecific (which can be the only pointer to a heap object). + IsLibraryNamed(library, "/libdl") || + // library loaders leak some "system" heap that we don't care about + IsLibraryNamed(library, "/libcrypto") || + // Sometimes libcrypto of OpenSSH is compiled with -fomit-frame-pointer + // (any library can be, of course, but this one often is because speed + // is so important for making crypto usable). We ignore all its + // allocations because we can't see the call stacks. We'd prefer + // to ignore allocations done in files/symbols that match + // "default_malloc_ex|default_realloc_ex" + // but that doesn't work when the end-result binary is stripped. + IsLibraryNamed(library, "/libjvm") || + // JVM has a lot of leaks we don't care about. + IsLibraryNamed(library, "/libzip") + // The JVM leaks java.util.zip.Inflater after loading classes. + ) { + depth = 1; // only disable allocation calls directly from the library code + } else if (IsLibraryNamed(library, "/ld") + // library loader leaks some "system" heap + // (e.g. thread-local storage) that we don't care about + ) { + depth = 2; // disable allocation calls directly from the library code + // and at depth 2 from it. + // We need depth 2 here solely because of a libc bug that + // forces us to jump through __memalign_hook and MemalignOverride hoops + // in tcmalloc.cc. + // Those buggy __libc_memalign() calls are in ld-linux.so and happen for + // thread-local storage allocations that we want to ignore here. + // We go with the depth-2 hack as a workaround for this libc bug: + // otherwise we'd need to extend MallocHook interface + // so that correct stack depth adjustment can be propagated from + // the exceptional case of MemalignOverride. + // Using depth 2 here should not mask real leaks because ld-linux.so + // does not call user code. + } + if (depth) { + RAW_VLOG(10, "Disabling allocations from %s at depth %d:", library, depth); + DisableChecksFromToLocked(AsPtr(start_address), AsPtr(end_address), depth); + if (IsLibraryNamed(library, "/libpthread") || + IsLibraryNamed(library, "/libdl") || + IsLibraryNamed(library, "/ld")) { + RAW_VLOG(10, "Global memory regions made by %s will be live data", + library); + if (global_region_caller_ranges == NULL) { + global_region_caller_ranges = + new(Allocator::Allocate(sizeof(GlobalRegionCallerRangeMap))) + GlobalRegionCallerRangeMap; + } + global_region_caller_ranges + ->insert(make_pair(end_address, start_address)); + } + } +} + +// static +HeapLeakChecker::ProcMapsResult HeapLeakChecker::UseProcMapsLocked( + ProcMapsTask proc_maps_task) { + RAW_DCHECK(heap_checker_lock.IsHeld(), ""); + // Need to provide own scratch memory to ProcMapsIterator: + ProcMapsIterator::Buffer buffer; + ProcMapsIterator it(0, &buffer); + if (!it.Valid()) { + int errsv = errno; + RAW_LOG(ERROR, "Could not open /proc/self/maps: errno=%d. " + "Libraries will not be handled correctly.", errsv); + return CANT_OPEN_PROC_MAPS; + } + uint64 start_address, end_address, file_offset; + int64 inode; + char *permissions, *filename; + bool saw_shared_lib = false; + bool saw_nonzero_inode = false; + bool saw_shared_lib_with_nonzero_inode = false; + while (it.Next(&start_address, &end_address, &permissions, + &file_offset, &inode, &filename)) { + if (start_address >= end_address) { + // Warn if a line we can be interested in is ill-formed: + if (inode != 0) { + RAW_LOG(ERROR, "Errors reading /proc/self/maps. " + "Some global memory regions will not " + "be handled correctly."); + } + // Silently skip other ill-formed lines: some are possible + // probably due to the interplay of how /proc/self/maps is updated + // while we read it in chunks in ProcMapsIterator and + // do things in this loop. + continue; + } + // Determine if any shared libraries are present (this is the same + // list of extensions as is found in pprof). We want to ignore + // 'fake' libraries with inode 0 when determining. However, some + // systems don't share inodes via /proc, so we turn off this check + // if we don't see any evidence that we're getting inode info. + if (inode != 0) { + saw_nonzero_inode = true; + } + if ((hc_strstr(filename, "lib") && hc_strstr(filename, ".so")) || + hc_strstr(filename, ".dll") || + // not all .dylib filenames start with lib. .dylib is big enough + // that we are unlikely to get false matches just checking that. + hc_strstr(filename, ".dylib") || hc_strstr(filename, ".bundle")) { + saw_shared_lib = true; + if (inode != 0) { + saw_shared_lib_with_nonzero_inode = true; + } + } + + switch (proc_maps_task) { + case DISABLE_LIBRARY_ALLOCS: + // All lines starting like + // "401dc000-4030f000 r??p 00132000 03:01 13991972 lib/bin" + // identify a data and code sections of a shared library or our binary + if (inode != 0 && strncmp(permissions, "r-xp", 4) == 0) { + DisableLibraryAllocsLocked(filename, start_address, end_address); + } + break; + case RECORD_GLOBAL_DATA: + RecordGlobalDataLocked(start_address, end_address, + permissions, filename); + break; + default: + RAW_CHECK(0, ""); + } + } + // If /proc/self/maps is reporting inodes properly (we saw a + // non-zero inode), then we only say we saw a shared lib if we saw a + // 'real' one, with a non-zero inode. + if (saw_nonzero_inode) { + saw_shared_lib = saw_shared_lib_with_nonzero_inode; + } + if (!saw_shared_lib) { + RAW_LOG(ERROR, "No shared libs detected. Will likely report false leak " + "positives for statically linked executables."); + return NO_SHARED_LIBS_IN_PROC_MAPS; + } + return PROC_MAPS_USED; +} + +// Total number and size of live objects dropped from the profile; +// (re)initialized in IgnoreAllLiveObjectsLocked. +static int64 live_objects_total; +static int64 live_bytes_total; + +// pid of the thread that is doing the current leak check +// (protected by our lock; IgnoreAllLiveObjectsLocked sets it) +static pid_t self_thread_pid = 0; + +// Status of our thread listing callback execution +// (protected by our lock; used from within IgnoreAllLiveObjectsLocked) +static enum { + CALLBACK_NOT_STARTED, + CALLBACK_STARTED, + CALLBACK_COMPLETED, +} thread_listing_status = CALLBACK_NOT_STARTED; + +// Ideally to avoid deadlocks this function should not result in any libc +// or other function calls that might need to lock a mutex: +// It is called when all threads of a process are stopped +// at arbitrary points thus potentially holding those locks. +// +// In practice we are calling some simple i/o and sprintf-type library functions +// for logging messages, but use only our own LowLevelAlloc::Arena allocator. +// +// This is known to be buggy: the library i/o function calls are able to cause +// deadlocks when they request a lock that a stopped thread happens to hold. +// This issue as far as we know have so far not resulted in any deadlocks +// in practice, so for now we are taking our chance that the deadlocks +// have insignificant frequency. +// +// If such deadlocks become a problem we should make the i/o calls +// into appropriately direct system calls (or eliminate them), +// in particular write() is not safe and vsnprintf() is potentially dangerous +// due to reliance on locale functions (these are called through RAW_LOG +// and in other ways). +// + +#if defined(HAVE_LINUX_PTRACE_H) && defined(HAVE_SYS_SYSCALL_H) && defined(DUMPER) +# if (defined(__i386__) || defined(__x86_64)) +# define THREAD_REGS i386_regs +# elif defined(__PPC__) +# define THREAD_REGS ppc_regs +# endif +#endif + +/*static*/ int HeapLeakChecker::IgnoreLiveThreadsLocked(void* parameter, + int num_threads, + pid_t* thread_pids, + va_list /*ap*/) { + RAW_DCHECK(heap_checker_lock.IsHeld(), ""); + thread_listing_status = CALLBACK_STARTED; + RAW_VLOG(11, "Found %d threads (from pid %d)", num_threads, getpid()); + + if (FLAGS_heap_check_ignore_global_live) { + UseProcMapsLocked(RECORD_GLOBAL_DATA); + } + + // We put the registers from other threads here + // to make pointers stored in them live. + vector<void*, STL_Allocator<void*, Allocator> > thread_registers; + + int failures = 0; + for (int i = 0; i < num_threads; ++i) { + // the leak checking thread itself is handled + // specially via self_thread_stack, not here: + if (thread_pids[i] == self_thread_pid) continue; + RAW_VLOG(11, "Handling thread with pid %d", thread_pids[i]); +#ifdef THREAD_REGS + THREAD_REGS thread_regs; +#define sys_ptrace(r, p, a, d) syscall(SYS_ptrace, (r), (p), (a), (d)) + // We use sys_ptrace to avoid thread locking + // because this is called from TCMalloc_ListAllProcessThreads + // when all but this thread are suspended. + if (sys_ptrace(PTRACE_GETREGS, thread_pids[i], NULL, &thread_regs) == 0) { + // Need to use SP to get all the data from the very last stack frame: + COMPILE_ASSERT(sizeof(thread_regs.SP) == sizeof(void*), + SP_register_does_not_look_like_a_pointer); + RegisterStackLocked(reinterpret_cast<void*>(thread_regs.SP)); + // Make registers live (just in case PTRACE_ATTACH resulted in some + // register pointers still being in the registers and not on the stack): + for (void** p = reinterpret_cast<void**>(&thread_regs); + p < reinterpret_cast<void**>(&thread_regs + 1); ++p) { + RAW_VLOG(12, "Thread register %p", *p); + thread_registers.push_back(*p); + } + } else { + failures += 1; + } +#else + failures += 1; +#endif + } + // Use all the collected thread (stack) liveness sources: + IgnoreLiveObjectsLocked("threads stack data", ""); + if (thread_registers.size()) { + // Make thread registers be live heap data sources. + // we rely here on the fact that vector is in one memory chunk: + RAW_VLOG(11, "Live registers at %p of %" PRIuS " bytes", + &thread_registers[0], thread_registers.size() * sizeof(void*)); + live_objects->push_back(AllocObject(&thread_registers[0], + thread_registers.size() * sizeof(void*), + THREAD_REGISTERS)); + IgnoreLiveObjectsLocked("threads register data", ""); + } + // Do all other liveness walking while all threads are stopped: + IgnoreNonThreadLiveObjectsLocked(); + // Can now resume the threads: + TCMalloc_ResumeAllProcessThreads(num_threads, thread_pids); + thread_listing_status = CALLBACK_COMPLETED; + return failures; +} + +// Stack top of the thread that is doing the current leak check +// (protected by our lock; IgnoreAllLiveObjectsLocked sets it) +static const void* self_thread_stack_top; + +// static +void HeapLeakChecker::IgnoreNonThreadLiveObjectsLocked() { + RAW_DCHECK(heap_checker_lock.IsHeld(), ""); + RAW_DCHECK(MemoryRegionMap::LockIsHeld(), ""); + RAW_VLOG(11, "Handling self thread with pid %d", self_thread_pid); + // Register our own stack: + + // Important that all stack ranges (including the one here) + // are known before we start looking at them + // in MakeDisabledLiveCallbackLocked: + RegisterStackLocked(self_thread_stack_top); + IgnoreLiveObjectsLocked("stack data", ""); + + // Make objects we were told to ignore live: + if (ignored_objects) { + for (IgnoredObjectsMap::const_iterator object = ignored_objects->begin(); + object != ignored_objects->end(); ++object) { + const void* ptr = AsPtr(object->first); + RAW_VLOG(11, "Ignored live object at %p of %" PRIuS " bytes", + ptr, object->second); + live_objects-> + push_back(AllocObject(ptr, object->second, MUST_BE_ON_HEAP)); + // we do this liveness check for ignored_objects before doing any + // live heap walking to make sure it does not fail needlessly: + size_t object_size; + if (!(heap_profile->FindAlloc(ptr, &object_size) && + object->second == object_size)) { + RAW_LOG(FATAL, "Object at %p of %" PRIuS " bytes from an" + " IgnoreObject() has disappeared", ptr, object->second); + } + } + IgnoreLiveObjectsLocked("ignored objects", ""); + } + + // Treat objects that were allocated when a Disabler was live as + // roots. I.e., if X was allocated while a Disabler was active, + // and Y is reachable from X, arrange that neither X nor Y are + // treated as leaks. + heap_profile->IterateAllocs(MakeIgnoredObjectsLiveCallbackLocked); + IgnoreLiveObjectsLocked("disabled objects", ""); + + // Make code-address-disabled objects live and ignored: + // This in particular makes all thread-specific data live + // because the basic data structure to hold pointers to thread-specific data + // is allocated from libpthreads and we have range-disabled that + // library code with UseProcMapsLocked(DISABLE_LIBRARY_ALLOCS); + // so now we declare all thread-specific data reachable from there as live. + heap_profile->IterateAllocs(MakeDisabledLiveCallbackLocked); + IgnoreLiveObjectsLocked("disabled code", ""); + + // Actually make global data live: + if (FLAGS_heap_check_ignore_global_live) { + bool have_null_region_callers = false; + for (LibraryLiveObjectsStacks::iterator l = library_live_objects->begin(); + l != library_live_objects->end(); ++l) { + RAW_CHECK(live_objects->empty(), ""); + // Process library_live_objects in l->second + // filtering them by MemoryRegionMap: + // It's safe to iterate over MemoryRegionMap + // w/o locks here as we are inside MemoryRegionMap::Lock(): + RAW_DCHECK(MemoryRegionMap::LockIsHeld(), ""); + // The only change to MemoryRegionMap possible in this loop + // is region addition as a result of allocating more memory + // for live_objects. This won't invalidate the RegionIterator + // or the intent of the loop. + // --see the comment by MemoryRegionMap::BeginRegionLocked(). + for (MemoryRegionMap::RegionIterator region = + MemoryRegionMap::BeginRegionLocked(); + region != MemoryRegionMap::EndRegionLocked(); ++region) { + // "region" from MemoryRegionMap is to be subtracted from + // (tentatively live) regions in l->second + // if it has a stack inside or it was allocated by + // a non-special caller (not one covered by a range + // in global_region_caller_ranges). + // This will in particular exclude all memory chunks used + // by the heap itself as well as what's been allocated with + // any allocator on top of mmap. + bool subtract = true; + if (!region->is_stack && global_region_caller_ranges) { + if (region->caller() == static_cast<uintptr_t>(NULL)) { + have_null_region_callers = true; + } else { + GlobalRegionCallerRangeMap::const_iterator iter + = global_region_caller_ranges->upper_bound(region->caller()); + if (iter != global_region_caller_ranges->end()) { + RAW_DCHECK(iter->first > region->caller(), ""); + if (iter->second < region->caller()) { // in special region + subtract = false; + } + } + } + } + if (subtract) { + // The loop puts the result of filtering l->second into live_objects: + for (LiveObjectsStack::const_iterator i = l->second.begin(); + i != l->second.end(); ++i) { + // subtract *region from *i + uintptr_t start = AsInt(i->ptr); + uintptr_t end = start + i->size; + if (region->start_addr <= start && end <= region->end_addr) { + // full deletion due to subsumption + } else if (start < region->start_addr && + region->end_addr < end) { // cutting-out split + live_objects->push_back(AllocObject(i->ptr, + region->start_addr - start, + IN_GLOBAL_DATA)); + live_objects->push_back(AllocObject(AsPtr(region->end_addr), + end - region->end_addr, + IN_GLOBAL_DATA)); + } else if (region->end_addr > start && + region->start_addr <= start) { // cut from start + live_objects->push_back(AllocObject(AsPtr(region->end_addr), + end - region->end_addr, + IN_GLOBAL_DATA)); + } else if (region->start_addr > start && + region->start_addr < end) { // cut from end + live_objects->push_back(AllocObject(i->ptr, + region->start_addr - start, + IN_GLOBAL_DATA)); + } else { // pass: no intersection + live_objects->push_back(AllocObject(i->ptr, i->size, + IN_GLOBAL_DATA)); + } + } + // Move live_objects back into l->second + // for filtering by the next region. + live_objects->swap(l->second); + live_objects->clear(); + } + } + // Now get and use live_objects from the final version of l->second: + if (VLOG_IS_ON(11)) { + for (LiveObjectsStack::const_iterator i = l->second.begin(); + i != l->second.end(); ++i) { + RAW_VLOG(11, "Library live region at %p of %" PRIuPTR " bytes", + i->ptr, i->size); + } + } + live_objects->swap(l->second); + IgnoreLiveObjectsLocked("in globals of\n ", l->first.c_str()); + } + if (have_null_region_callers) { + RAW_LOG(ERROR, "Have memory regions w/o callers: " + "might report false leaks"); + } + Allocator::DeleteAndNull(&library_live_objects); + } +} + +// Callback for TCMalloc_ListAllProcessThreads in IgnoreAllLiveObjectsLocked below +// to test/verify that we have just the one main thread, in which case +// we can do everything in that main thread, +// so that CPU profiler can collect all its samples. +// Returns the number of threads in the process. +static int IsOneThread(void* parameter, int num_threads, + pid_t* thread_pids, va_list ap) { + if (num_threads != 1) { + RAW_LOG(WARNING, "Have threads: Won't CPU-profile the bulk of leak " + "checking work happening in IgnoreLiveThreadsLocked!"); + } + TCMalloc_ResumeAllProcessThreads(num_threads, thread_pids); + return num_threads; +} + +// Dummy for IgnoreAllLiveObjectsLocked below. +// Making it global helps with compiler warnings. +static va_list dummy_ap; + +// static +void HeapLeakChecker::IgnoreAllLiveObjectsLocked(const void* self_stack_top) { + RAW_DCHECK(heap_checker_lock.IsHeld(), ""); + RAW_CHECK(live_objects == NULL, ""); + live_objects = new(Allocator::Allocate(sizeof(LiveObjectsStack))) + LiveObjectsStack; + stack_tops = new(Allocator::Allocate(sizeof(StackTopSet))) StackTopSet; + // reset the counts + live_objects_total = 0; + live_bytes_total = 0; + // Reduce max_heap_object_size to FLAGS_heap_check_max_pointer_offset + // for the time of leak check. + // FLAGS_heap_check_max_pointer_offset caps max_heap_object_size + // to manage reasonably low chances of random bytes + // appearing to be pointing into large actually leaked heap objects. + const size_t old_max_heap_object_size = max_heap_object_size; + max_heap_object_size = ( + FLAGS_heap_check_max_pointer_offset != -1 + ? min(size_t(FLAGS_heap_check_max_pointer_offset), max_heap_object_size) + : max_heap_object_size); + // Record global data as live: + if (FLAGS_heap_check_ignore_global_live) { + library_live_objects = + new(Allocator::Allocate(sizeof(LibraryLiveObjectsStacks))) + LibraryLiveObjectsStacks; + } + // Ignore all thread stacks: + thread_listing_status = CALLBACK_NOT_STARTED; + bool need_to_ignore_non_thread_objects = true; + self_thread_pid = getpid(); + self_thread_stack_top = self_stack_top; + if (FLAGS_heap_check_ignore_thread_live) { + // In case we are doing CPU profiling we'd like to do all the work + // in the main thread, not in the special thread created by + // TCMalloc_ListAllProcessThreads, so that CPU profiler can + // collect all its samples. The machinery of + // TCMalloc_ListAllProcessThreads conflicts with the CPU profiler + // by also relying on signals and ::sigaction. We can do this + // (run everything in the main thread) safely only if there's just + // the main thread itself in our process. This variable reflects + // these two conditions: + bool want_and_can_run_in_main_thread = + ProfilingIsEnabledForAllThreads() && + TCMalloc_ListAllProcessThreads(NULL, IsOneThread) == 1; + // When the normal path of TCMalloc_ListAllProcessThreads below is taken, + // we fully suspend the threads right here before any liveness checking + // and keep them suspended for the whole time of liveness checking + // inside of the IgnoreLiveThreadsLocked callback. + // (The threads can't (de)allocate due to lock on the delete hook but + // if not suspended they could still mess with the pointer + // graph while we walk it). + int r = want_and_can_run_in_main_thread + ? IgnoreLiveThreadsLocked(NULL, 1, &self_thread_pid, dummy_ap) + : TCMalloc_ListAllProcessThreads(NULL, IgnoreLiveThreadsLocked); + need_to_ignore_non_thread_objects = r < 0; + if (r < 0) { + RAW_LOG(WARNING, "Thread finding failed with %d errno=%d", r, errno); + if (thread_listing_status == CALLBACK_COMPLETED) { + RAW_LOG(INFO, "Thread finding callback " + "finished ok; hopefully everything is fine"); + need_to_ignore_non_thread_objects = false; + } else if (thread_listing_status == CALLBACK_STARTED) { + RAW_LOG(FATAL, "Thread finding callback was " + "interrupted or crashed; can't fix this"); + } else { // CALLBACK_NOT_STARTED + RAW_LOG(ERROR, "Could not find thread stacks. " + "Will likely report false leak positives."); + } + } else if (r != 0) { + RAW_LOG(ERROR, "Thread stacks not found for %d threads. " + "Will likely report false leak positives.", r); + } else { + RAW_VLOG(11, "Thread stacks appear to be found for all threads"); + } + } else { + RAW_LOG(WARNING, "Not looking for thread stacks; " + "objects reachable only from there " + "will be reported as leaks"); + } + // Do all other live data ignoring here if we did not do it + // within thread listing callback with all threads stopped. + if (need_to_ignore_non_thread_objects) { + if (FLAGS_heap_check_ignore_global_live) { + UseProcMapsLocked(RECORD_GLOBAL_DATA); + } + IgnoreNonThreadLiveObjectsLocked(); + } + if (live_objects_total) { + RAW_VLOG(10, "Ignoring %" PRId64 " reachable objects of %" PRId64 " bytes", + live_objects_total, live_bytes_total); + } + // Free these: we made them here and heap_profile never saw them + Allocator::DeleteAndNull(&live_objects); + Allocator::DeleteAndNull(&stack_tops); + max_heap_object_size = old_max_heap_object_size; // reset this var +} + +// Alignment at which we should consider pointer positions +// in IgnoreLiveObjectsLocked. Will normally use the value of +// FLAGS_heap_check_pointer_source_alignment. +static size_t pointer_source_alignment = kPointerSourceAlignment; +// Global lock for HeapLeakChecker::DoNoLeaks +// to protect pointer_source_alignment. +static SpinLock alignment_checker_lock(SpinLock::LINKER_INITIALIZED); + +// This function changes the live bits in the heap_profile-table's state: +// we only record the live objects to be skipped. +// +// When checking if a byte sequence points to a heap object we use +// HeapProfileTable::FindInsideAlloc to handle both pointers to +// the start and inside of heap-allocated objects. +// The "inside" case needs to be checked to support +// at least the following relatively common cases: +// - C++ arrays allocated with new FooClass[size] for classes +// with destructors have their size recorded in a sizeof(int) field +// before the place normal pointers point to. +// - basic_string<>-s for e.g. the C++ library of gcc 3.4 +// have the meta-info in basic_string<...>::_Rep recorded +// before the place normal pointers point to. +// - Multiple-inherited objects have their pointers when cast to +// different base classes pointing inside of the actually +// allocated object. +// - Sometimes reachability pointers point to member objects of heap objects, +// and then those member objects point to the full heap object. +// - Third party UnicodeString: it stores a 32-bit refcount +// (in both 32-bit and 64-bit binaries) as the first uint32 +// in the allocated memory and a normal pointer points at +// the second uint32 behind the refcount. +// By finding these additional objects here +// we slightly increase the chance to mistake random memory bytes +// for a pointer and miss a leak in a particular run of a binary. +// +/*static*/ void HeapLeakChecker::IgnoreLiveObjectsLocked(const char* name, + const char* name2) { + RAW_DCHECK(heap_checker_lock.IsHeld(), ""); + int64 live_object_count = 0; + int64 live_byte_count = 0; + while (!live_objects->empty()) { + const char* object = + reinterpret_cast<const char*>(live_objects->back().ptr); + size_t size = live_objects->back().size; + const ObjectPlacement place = live_objects->back().place; + live_objects->pop_back(); + if (place == MUST_BE_ON_HEAP && heap_profile->MarkAsLive(object)) { + live_object_count += 1; + live_byte_count += size; + } + RAW_VLOG(13, "Looking for heap pointers in %p of %" PRIuS " bytes", + object, size); + const char* const whole_object = object; + size_t const whole_size = size; + // Try interpretting any byte sequence in object,size as a heap pointer: + const size_t remainder = AsInt(object) % pointer_source_alignment; + if (remainder) { + object += pointer_source_alignment - remainder; + if (size >= pointer_source_alignment - remainder) { + size -= pointer_source_alignment - remainder; + } else { + size = 0; + } + } + if (size < sizeof(void*)) continue; + +#ifdef NO_FRAME_POINTER + // Frame pointer omission requires us to use libunwind, which uses direct + // mmap and munmap system calls, and that needs special handling. + if (name2 == kUnnamedProcSelfMapEntry) { + static const uintptr_t page_mask = ~(getpagesize() - 1); + const uintptr_t addr = reinterpret_cast<uintptr_t>(object); + if ((addr & page_mask) == 0 && (size & page_mask) == 0) { + // This is an object we slurped from /proc/self/maps. + // It may or may not be readable at this point. + // + // In case all the above conditions made a mistake, and the object is + // not related to libunwind, we also verify that it's not readable + // before ignoring it. + if (msync(const_cast<char*>(object), size, MS_ASYNC) != 0) { + // Skip unreadable object, so we don't crash trying to sweep it. + RAW_VLOG(0, "Ignoring inaccessible object [%p, %p) " + "(msync error %d (%s))", + object, object + size, errno, strerror(errno)); + continue; + } + } + } +#endif + + const char* const max_object = object + size - sizeof(void*); + while (object <= max_object) { + // potentially unaligned load: + const uintptr_t addr = *reinterpret_cast<const uintptr_t*>(object); + // Do fast check before the more expensive HaveOnHeapLocked lookup: + // this code runs for all memory words that are potentially pointers: + const bool can_be_on_heap = + // Order tests by the likelyhood of the test failing in 64/32 bit modes. + // Yes, this matters: we either lose 5..6% speed in 32 bit mode + // (which is already slower) or by a factor of 1.5..1.91 in 64 bit mode. + // After the alignment test got dropped the above performance figures + // must have changed; might need to revisit this. +#if defined(__x86_64__) + addr <= max_heap_address && // <= is for 0-sized object with max addr + min_heap_address <= addr; +#else + min_heap_address <= addr && + addr <= max_heap_address; // <= is for 0-sized object with max addr +#endif + if (can_be_on_heap) { + const void* ptr = reinterpret_cast<const void*>(addr); + // Too expensive (inner loop): manually uncomment when debugging: + // RAW_VLOG(17, "Trying pointer to %p at %p", ptr, object); + size_t object_size; + if (HaveOnHeapLocked(&ptr, &object_size) && + heap_profile->MarkAsLive(ptr)) { + // We take the (hopefully low) risk here of encountering by accident + // a byte sequence in memory that matches an address of + // a heap object which is in fact leaked. + // I.e. in very rare and probably not repeatable/lasting cases + // we might miss some real heap memory leaks. + RAW_VLOG(14, "Found pointer to %p of %" PRIuS " bytes at %p " + "inside %p of size %" PRIuS "", + ptr, object_size, object, whole_object, whole_size); + if (VLOG_IS_ON(15)) { + // log call stacks to help debug how come something is not a leak + HeapProfileTable::AllocInfo alloc; + if (!heap_profile->FindAllocDetails(ptr, &alloc)) { + RAW_LOG(FATAL, "FindAllocDetails failed on ptr %p", ptr); + } + RAW_LOG(INFO, "New live %p object's alloc stack:", ptr); + for (int i = 0; i < alloc.stack_depth; ++i) { + RAW_LOG(INFO, " @ %p", alloc.call_stack[i]); + } + } + live_object_count += 1; + live_byte_count += object_size; + live_objects->push_back(AllocObject(ptr, object_size, + IGNORED_ON_HEAP)); + } + } + object += pointer_source_alignment; + } + } + live_objects_total += live_object_count; + live_bytes_total += live_byte_count; + if (live_object_count) { + RAW_VLOG(10, "Removed %" PRId64 " live heap objects of %" PRId64 " bytes: %s%s", + live_object_count, live_byte_count, name, name2); + } +} + +//---------------------------------------------------------------------- +// HeapLeakChecker leak check disabling components +//---------------------------------------------------------------------- + +// static +void HeapLeakChecker::DisableChecksIn(const char* pattern) { + RAW_LOG(WARNING, "DisableChecksIn(%s) is ignored", pattern); +} + +// static +void HeapLeakChecker::DoIgnoreObject(const void* ptr) { + SpinLockHolder l(&heap_checker_lock); + if (!heap_checker_on) return; + size_t object_size; + if (!HaveOnHeapLocked(&ptr, &object_size)) { + RAW_LOG(ERROR, "No live heap object at %p to ignore", ptr); + } else { + RAW_VLOG(10, "Going to ignore live object at %p of %" PRIuS " bytes", + ptr, object_size); + if (ignored_objects == NULL) { + ignored_objects = new(Allocator::Allocate(sizeof(IgnoredObjectsMap))) + IgnoredObjectsMap; + } + if (!ignored_objects->insert(make_pair(AsInt(ptr), object_size)).second) { + RAW_LOG(WARNING, "Object at %p is already being ignored", ptr); + } + } +} + +// static +void HeapLeakChecker::UnIgnoreObject(const void* ptr) { + SpinLockHolder l(&heap_checker_lock); + if (!heap_checker_on) return; + size_t object_size; + if (!HaveOnHeapLocked(&ptr, &object_size)) { + RAW_LOG(FATAL, "No live heap object at %p to un-ignore", ptr); + } else { + bool found = false; + if (ignored_objects) { + IgnoredObjectsMap::iterator object = ignored_objects->find(AsInt(ptr)); + if (object != ignored_objects->end() && object_size == object->second) { + ignored_objects->erase(object); + found = true; + RAW_VLOG(10, "Now not going to ignore live object " + "at %p of %" PRIuS " bytes", ptr, object_size); + } + } + if (!found) RAW_LOG(FATAL, "Object at %p has not been ignored", ptr); + } +} + +//---------------------------------------------------------------------- +// HeapLeakChecker non-static functions +//---------------------------------------------------------------------- + +char* HeapLeakChecker::MakeProfileNameLocked() { + RAW_DCHECK(lock_->IsHeld(), ""); + RAW_DCHECK(heap_checker_lock.IsHeld(), ""); + const int len = profile_name_prefix->size() + strlen(name_) + 5 + + strlen(HeapProfileTable::kFileExt) + 1; + char* file_name = reinterpret_cast<char*>(Allocator::Allocate(len)); + snprintf(file_name, len, "%s.%s-end%s", + profile_name_prefix->c_str(), name_, + HeapProfileTable::kFileExt); + return file_name; +} + +void HeapLeakChecker::Create(const char *name, bool make_start_snapshot) { + SpinLockHolder l(lock_); + name_ = NULL; // checker is inactive + start_snapshot_ = NULL; + has_checked_ = false; + inuse_bytes_increase_ = 0; + inuse_allocs_increase_ = 0; + keep_profiles_ = false; + char* n = new char[strlen(name) + 1]; // do this before we lock + IgnoreObject(n); // otherwise it might be treated as live due to our stack + { // Heap activity in other threads is paused for this whole scope. + SpinLockHolder al(&alignment_checker_lock); + SpinLockHolder hl(&heap_checker_lock); + MemoryRegionMap::LockHolder ml; + if (heap_checker_on && profile_name_prefix != NULL) { + RAW_DCHECK(strchr(name, '/') == NULL, "must be a simple name"); + memcpy(n, name, strlen(name) + 1); + name_ = n; // checker is active + if (make_start_snapshot) { + start_snapshot_ = heap_profile->TakeSnapshot(); + } + + const HeapProfileTable::Stats& t = heap_profile->total(); + const size_t start_inuse_bytes = t.alloc_size - t.free_size; + const size_t start_inuse_allocs = t.allocs - t.frees; + RAW_VLOG(10, "Start check \"%s\" profile: %" PRIuS " bytes " + "in %" PRIuS " objects", + name_, start_inuse_bytes, start_inuse_allocs); + } else { + RAW_LOG(WARNING, "Heap checker is not active, " + "hence checker \"%s\" will do nothing!", name); + RAW_LOG(WARNING, "To activate set the HEAPCHECK environment variable.\n"); + } + } + if (name_ == NULL) { + UnIgnoreObject(n); + delete[] n; // must be done after we unlock + } +} + +HeapLeakChecker::HeapLeakChecker(const char *name) : lock_(new SpinLock) { + RAW_DCHECK(strcmp(name, "_main_") != 0, "_main_ is reserved"); + Create(name, true/*create start_snapshot_*/); +} + +HeapLeakChecker::HeapLeakChecker() : lock_(new SpinLock) { + if (FLAGS_heap_check_before_constructors) { + // We want to check for leaks of objects allocated during global + // constructors (i.e., objects allocated already). So we do not + // create a baseline snapshot and hence check for leaks of objects + // that may have already been created. + Create("_main_", false); + } else { + // We want to ignore leaks of objects allocated during global + // constructors (i.e., objects allocated already). So we snapshot + // the current heap contents and use them as a baseline that is + // not reported by the leak checker. + Create("_main_", true); + } +} + +ssize_t HeapLeakChecker::BytesLeaked() const { + SpinLockHolder l(lock_); + if (!has_checked_) { + RAW_LOG(FATAL, "*NoLeaks|SameHeap must execute before this call"); + } + return inuse_bytes_increase_; +} + +ssize_t HeapLeakChecker::ObjectsLeaked() const { + SpinLockHolder l(lock_); + if (!has_checked_) { + RAW_LOG(FATAL, "*NoLeaks|SameHeap must execute before this call"); + } + return inuse_allocs_increase_; +} + +// Save pid of main thread for using in naming dump files +static int32 main_thread_pid = getpid(); +#ifdef HAVE_PROGRAM_INVOCATION_NAME +#ifdef __UCLIBC__ +extern const char* program_invocation_name; +extern const char* program_invocation_short_name; +#else +extern char* program_invocation_name; +extern char* program_invocation_short_name; +#endif +static const char* invocation_name() { return program_invocation_short_name; } +static string invocation_path() { return program_invocation_name; } +#else +static const char* invocation_name() { return "<your binary>"; } +static string invocation_path() { return "<your binary>"; } +#endif + +// Prints commands that users can run to get more information +// about the reported leaks. +static void SuggestPprofCommand(const char* pprof_file_arg) { + // Extra help information to print for the user when the test is + // being run in a way where the straightforward pprof command will + // not suffice. + string extra_help; + + // Common header info to print for remote runs + const string remote_header = + "This program is being executed remotely and therefore the pprof\n" + "command printed above will not work. Either run this program\n" + "locally, or adjust the pprof command as follows to allow it to\n" + "work on your local machine:\n"; + + // Extra command for fetching remote data + string fetch_cmd; + + RAW_LOG(WARNING, + "\n\n" + "If the preceding stack traces are not enough to find " + "the leaks, try running THIS shell command:\n\n" + "%s%s %s \"%s\" --inuse_objects --lines --heapcheck " + " --edgefraction=1e-10 --nodefraction=1e-10 --gv\n" + "\n" + "%s" + "If you are still puzzled about why the leaks are " + "there, try rerunning this program with " + "HEAP_CHECK_TEST_POINTER_ALIGNMENT=1 and/or with " + "HEAP_CHECK_MAX_POINTER_OFFSET=-1\n" + "If the leak report occurs in a small fraction of runs, " + "try running with TCMALLOC_MAX_FREE_QUEUE_SIZE of few hundred MB " + "or with TCMALLOC_RECLAIM_MEMORY=false, " // only works for debugalloc + "it might help find leaks more repeatably\n", + fetch_cmd.c_str(), + "pprof", // works as long as pprof is on your path + invocation_path().c_str(), + pprof_file_arg, + extra_help.c_str() + ); +} + +bool HeapLeakChecker::DoNoLeaks(ShouldSymbolize should_symbolize) { + SpinLockHolder l(lock_); + // The locking also helps us keep the messages + // for the two checks close together. + SpinLockHolder al(&alignment_checker_lock); + + // thread-safe: protected by alignment_checker_lock + static bool have_disabled_hooks_for_symbolize = false; + // Once we've checked for leaks and symbolized the results once, it's + // not safe to do it again. This is because in order to symbolize + // safely, we had to disable all the malloc hooks here, so we no + // longer can be confident we've collected all the data we need. + if (have_disabled_hooks_for_symbolize) { + RAW_LOG(FATAL, "Must not call heap leak checker manually after " + " program-exit's automatic check."); + } + + HeapProfileTable::Snapshot* leaks = NULL; + char* pprof_file = NULL; + + { + // Heap activity in other threads is paused during this function + // (i.e. until we got all profile difference info). + SpinLockHolder hl(&heap_checker_lock); + if (heap_checker_on == false) { + if (name_ != NULL) { // leak checking enabled when created the checker + RAW_LOG(WARNING, "Heap leak checker got turned off after checker " + "\"%s\" has been created, no leak check is being done for it!", + name_); + } + return true; + } + + // Update global_region_caller_ranges. They may need to change since + // e.g. initialization because shared libraries might have been loaded or + // unloaded. + Allocator::DeleteAndNullIfNot(&global_region_caller_ranges); + ProcMapsResult pm_result = UseProcMapsLocked(DISABLE_LIBRARY_ALLOCS); + RAW_CHECK(pm_result == PROC_MAPS_USED, ""); + + // Keep track of number of internally allocated objects so we + // can detect leaks in the heap-leak-checket itself + const int initial_allocs = Allocator::alloc_count(); + + if (name_ == NULL) { + RAW_LOG(FATAL, "Heap leak checker must not be turned on " + "after construction of a HeapLeakChecker"); + } + + MemoryRegionMap::LockHolder ml; + int a_local_var; // Use our stack ptr to make stack data live: + + // Make the heap profile, other threads are locked out. + HeapProfileTable::Snapshot* base = + reinterpret_cast<HeapProfileTable::Snapshot*>(start_snapshot_); + RAW_DCHECK(FLAGS_heap_check_pointer_source_alignment > 0, ""); + pointer_source_alignment = FLAGS_heap_check_pointer_source_alignment; + IgnoreAllLiveObjectsLocked(&a_local_var); + leaks = heap_profile->NonLiveSnapshot(base); + + inuse_bytes_increase_ = static_cast<ssize_t>(leaks->total().alloc_size); + inuse_allocs_increase_ = static_cast<ssize_t>(leaks->total().allocs); + if (leaks->Empty()) { + heap_profile->ReleaseSnapshot(leaks); + leaks = NULL; + + // We can only check for internal leaks along the no-user-leak + // path since in the leak path we temporarily release + // heap_checker_lock and another thread can come in and disturb + // allocation counts. + if (Allocator::alloc_count() != initial_allocs) { + RAW_LOG(FATAL, "Internal HeapChecker leak of %d objects ; %d -> %d", + Allocator::alloc_count() - initial_allocs, + initial_allocs, Allocator::alloc_count()); + } + } else if (FLAGS_heap_check_test_pointer_alignment) { + if (pointer_source_alignment == 1) { + RAW_LOG(WARNING, "--heap_check_test_pointer_alignment has no effect: " + "--heap_check_pointer_source_alignment was already set to 1"); + } else { + // Try with reduced pointer aligment + pointer_source_alignment = 1; + IgnoreAllLiveObjectsLocked(&a_local_var); + HeapProfileTable::Snapshot* leaks_wo_align = + heap_profile->NonLiveSnapshot(base); + pointer_source_alignment = FLAGS_heap_check_pointer_source_alignment; + if (leaks_wo_align->Empty()) { + RAW_LOG(WARNING, "Found no leaks without pointer alignment: " + "something might be placing pointers at " + "unaligned addresses! This needs to be fixed."); + } else { + RAW_LOG(INFO, "Found leaks without pointer alignment as well: " + "unaligned pointers must not be the cause of leaks."); + RAW_LOG(INFO, "--heap_check_test_pointer_alignment did not help " + "to diagnose the leaks."); + } + heap_profile->ReleaseSnapshot(leaks_wo_align); + } + } + + if (leaks != NULL) { + pprof_file = MakeProfileNameLocked(); + } + } + + has_checked_ = true; + if (leaks == NULL) { + if (FLAGS_heap_check_max_pointer_offset == -1) { + RAW_LOG(WARNING, + "Found no leaks without max_pointer_offset restriction: " + "it's possible that the default value of " + "heap_check_max_pointer_offset flag is too low. " + "Do you use pointers with larger than that offsets " + "pointing in the middle of heap-allocated objects?"); + } + const HeapProfileTable::Stats& stats = heap_profile->total(); + RAW_VLOG(heap_checker_info_level, + "No leaks found for check \"%s\" " + "(but no 100%% guarantee that there aren't any): " + "found %" PRId64 " reachable heap objects of %" PRId64 " bytes", + name_, + int64(stats.allocs - stats.frees), + int64(stats.alloc_size - stats.free_size)); + } else { + if (should_symbolize == SYMBOLIZE) { + // To turn addresses into symbols, we need to fork, which is a + // problem if both parent and child end up trying to call the + // same malloc-hooks we've set up, at the same time. To avoid + // trouble, we turn off the hooks before symbolizing. Note that + // this makes it unsafe to ever leak-report again! Luckily, we + // typically only want to report once in a program's run, at the + // very end. + if (MallocHook::GetNewHook() == NewHook) + MallocHook::SetNewHook(NULL); + if (MallocHook::GetDeleteHook() == DeleteHook) + MallocHook::SetDeleteHook(NULL); + MemoryRegionMap::Shutdown(); + // Make sure all the hooks really got unset: + RAW_CHECK(MallocHook::GetNewHook() == NULL, ""); + RAW_CHECK(MallocHook::GetDeleteHook() == NULL, ""); + RAW_CHECK(MallocHook::GetMmapHook() == NULL, ""); + RAW_CHECK(MallocHook::GetSbrkHook() == NULL, ""); + have_disabled_hooks_for_symbolize = true; + leaks->ReportLeaks(name_, pprof_file, true); // true = should_symbolize + } else { + leaks->ReportLeaks(name_, pprof_file, false); + } + if (FLAGS_heap_check_identify_leaks) { + leaks->ReportIndividualObjects(); + } + + SuggestPprofCommand(pprof_file); + + { + SpinLockHolder hl(&heap_checker_lock); + heap_profile->ReleaseSnapshot(leaks); + Allocator::Free(pprof_file); + } + } + + return (leaks == NULL); +} + +HeapLeakChecker::~HeapLeakChecker() { + if (name_ != NULL) { // had leak checking enabled when created the checker + if (!has_checked_) { + RAW_LOG(FATAL, "Some *NoLeaks|SameHeap method" + " must be called on any created HeapLeakChecker"); + } + + // Deallocate any snapshot taken at start + if (start_snapshot_ != NULL) { + SpinLockHolder l(&heap_checker_lock); + heap_profile->ReleaseSnapshot( + reinterpret_cast<HeapProfileTable::Snapshot*>(start_snapshot_)); + } + + UnIgnoreObject(name_); + delete[] name_; + name_ = NULL; + } + delete lock_; +} + +//---------------------------------------------------------------------- +// HeapLeakChecker overall heap check components +//---------------------------------------------------------------------- + +// static +bool HeapLeakChecker::IsActive() { + SpinLockHolder l(&heap_checker_lock); + return heap_checker_on; +} + +vector<HeapCleaner::void_function>* HeapCleaner::heap_cleanups_ = NULL; + +// When a HeapCleaner object is intialized, add its function to the static list +// of cleaners to be run before leaks checking. +HeapCleaner::HeapCleaner(void_function f) { + if (heap_cleanups_ == NULL) + heap_cleanups_ = new vector<HeapCleaner::void_function>; + heap_cleanups_->push_back(f); +} + +// Run all of the cleanup functions and delete the vector. +void HeapCleaner::RunHeapCleanups() { + if (!heap_cleanups_) + return; + for (int i = 0; i < heap_cleanups_->size(); i++) { + void (*f)(void) = (*heap_cleanups_)[i]; + f(); + } + delete heap_cleanups_; + heap_cleanups_ = NULL; +} + +// Program exit heap cleanup registered as a module object destructor. +// Will not get executed when we crash on a signal. +// +void HeapLeakChecker_RunHeapCleanups() { + if (FLAGS_heap_check == "local") // don't check heap in this mode + return; + { SpinLockHolder l(&heap_checker_lock); + // can get here (via forks?) with other pids + if (heap_checker_pid != getpid()) return; + } + HeapCleaner::RunHeapCleanups(); + if (!FLAGS_heap_check_after_destructors) HeapLeakChecker::DoMainHeapCheck(); +} + +static bool internal_init_start_has_run = false; + +// Called exactly once, before main() (but hopefully just before). +// This picks a good unique name for the dumped leak checking heap profiles. +// +// Because we crash when InternalInitStart is called more than once, +// it's fine that we hold heap_checker_lock only around pieces of +// this function: this is still enough for thread-safety w.r.t. other functions +// of this module. +// We can't hold heap_checker_lock throughout because it would deadlock +// on a memory allocation since our new/delete hooks can be on. +// +void HeapLeakChecker_InternalInitStart() { + { SpinLockHolder l(&heap_checker_lock); + RAW_CHECK(!internal_init_start_has_run, + "Heap-check constructor called twice. Perhaps you both linked" + " in the heap checker, and also used LD_PRELOAD to load it?"); + internal_init_start_has_run = true; + +#ifdef ADDRESS_SANITIZER + // AddressSanitizer's custom malloc conflicts with HeapChecker. + FLAGS_heap_check = ""; +#endif + + if (FLAGS_heap_check.empty()) { + // turns out we do not need checking in the end; can stop profiling + HeapLeakChecker::TurnItselfOffLocked(); + return; + } else if (RunningOnValgrind()) { + // There is no point in trying -- we'll just fail. + RAW_LOG(WARNING, "Can't run under Valgrind; will turn itself off"); + HeapLeakChecker::TurnItselfOffLocked(); + return; + } + } + + // Changing this to false can be useful when debugging heap-checker itself: + if (!FLAGS_heap_check_run_under_gdb && IsDebuggerAttached()) { + RAW_LOG(WARNING, "Someone is ptrace()ing us; will turn itself off"); + SpinLockHolder l(&heap_checker_lock); + HeapLeakChecker::TurnItselfOffLocked(); + return; + } + + { SpinLockHolder l(&heap_checker_lock); + if (!constructor_heap_profiling) { + RAW_LOG(FATAL, "Can not start so late. You have to enable heap checking " + "with HEAPCHECK=<mode>."); + } + } + + // Set all flags + RAW_DCHECK(FLAGS_heap_check_pointer_source_alignment > 0, ""); + if (FLAGS_heap_check == "minimal") { + // The least we can check. + FLAGS_heap_check_before_constructors = false; // from after main + // (ignore more) + FLAGS_heap_check_after_destructors = false; // to after cleanup + // (most data is live) + FLAGS_heap_check_ignore_thread_live = true; // ignore all live + FLAGS_heap_check_ignore_global_live = true; // ignore all live + } else if (FLAGS_heap_check == "normal") { + // Faster than 'minimal' and not much stricter. + FLAGS_heap_check_before_constructors = true; // from no profile (fast) + FLAGS_heap_check_after_destructors = false; // to after cleanup + // (most data is live) + FLAGS_heap_check_ignore_thread_live = true; // ignore all live + FLAGS_heap_check_ignore_global_live = true; // ignore all live + } else if (FLAGS_heap_check == "strict") { + // A bit stricter than 'normal': global destructors must fully clean up + // after themselves if they are present. + FLAGS_heap_check_before_constructors = true; // from no profile (fast) + FLAGS_heap_check_after_destructors = true; // to after destructors + // (less data live) + FLAGS_heap_check_ignore_thread_live = true; // ignore all live + FLAGS_heap_check_ignore_global_live = true; // ignore all live + } else if (FLAGS_heap_check == "draconian") { + // Drop not very portable and not very exact live heap flooding. + FLAGS_heap_check_before_constructors = true; // from no profile (fast) + FLAGS_heap_check_after_destructors = true; // to after destructors + // (need them) + FLAGS_heap_check_ignore_thread_live = false; // no live flood (stricter) + FLAGS_heap_check_ignore_global_live = false; // no live flood (stricter) + } else if (FLAGS_heap_check == "as-is") { + // do nothing: use other flags as is + } else if (FLAGS_heap_check == "local") { + // do nothing + } else { + RAW_LOG(FATAL, "Unsupported heap_check flag: %s", + FLAGS_heap_check.c_str()); + } + // FreeBSD doesn't seem to honor atexit execution order: + // http://code.google.com/p/gperftools/issues/detail?id=375 + // Since heap-checking before destructors depends on atexit running + // at the right time, on FreeBSD we always check after, even in the + // less strict modes. This just means FreeBSD is always a bit + // stricter in its checking than other OSes. + // This now appears to be the case in other OSes as well; + // so always check afterwards. + FLAGS_heap_check_after_destructors = true; + + { SpinLockHolder l(&heap_checker_lock); + RAW_DCHECK(heap_checker_pid == getpid(), ""); + heap_checker_on = true; + RAW_DCHECK(heap_profile, ""); + HeapLeakChecker::ProcMapsResult pm_result = HeapLeakChecker::UseProcMapsLocked(HeapLeakChecker::DISABLE_LIBRARY_ALLOCS); + // might neeed to do this more than once + // if one later dynamically loads libraries that we want disabled + if (pm_result != HeapLeakChecker::PROC_MAPS_USED) { // can't function + HeapLeakChecker::TurnItselfOffLocked(); + return; + } + } + + // make a good place and name for heap profile leak dumps + string* profile_prefix = + new string(FLAGS_heap_check_dump_directory + "/" + invocation_name()); + + // Finalize prefix for dumping leak checking profiles. + const int32 our_pid = getpid(); // safest to call getpid() outside lock + { SpinLockHolder l(&heap_checker_lock); + // main_thread_pid might still be 0 if this function is being called before + // global constructors. In that case, our pid *is* the main pid. + if (main_thread_pid == 0) + main_thread_pid = our_pid; + } + char pid_buf[15]; + snprintf(pid_buf, sizeof(pid_buf), ".%d", main_thread_pid); + *profile_prefix += pid_buf; + { SpinLockHolder l(&heap_checker_lock); + RAW_DCHECK(profile_name_prefix == NULL, ""); + profile_name_prefix = profile_prefix; + } + + // Make sure new/delete hooks are installed properly + // and heap profiler is indeed able to keep track + // of the objects being allocated. + // We test this to make sure we are indeed checking for leaks. + char* test_str = new char[5]; + size_t size; + { SpinLockHolder l(&heap_checker_lock); + RAW_CHECK(heap_profile->FindAlloc(test_str, &size), + "our own new/delete not linked?"); + } + delete[] test_str; + { SpinLockHolder l(&heap_checker_lock); + // This check can fail when it should not if another thread allocates + // into this same spot right this moment, + // which is unlikely since this code runs in InitGoogle. + RAW_CHECK(!heap_profile->FindAlloc(test_str, &size), + "our own new/delete not linked?"); + } + // If we crash in the above code, it probably means that + // "nm <this_binary> | grep new" will show that tcmalloc's new/delete + // implementation did not get linked-in into this binary + // (i.e. nm will list __builtin_new and __builtin_vec_new as undefined). + // If this happens, it is a BUILD bug to be fixed. + + RAW_VLOG(heap_checker_info_level, + "WARNING: Perftools heap leak checker is active " + "-- Performance may suffer"); + + if (FLAGS_heap_check != "local") { + HeapLeakChecker* main_hc = new HeapLeakChecker(); + SpinLockHolder l(&heap_checker_lock); + RAW_DCHECK(main_heap_checker == NULL, + "Repeated creation of main_heap_checker"); + main_heap_checker = main_hc; + do_main_heap_check = true; + } + + { SpinLockHolder l(&heap_checker_lock); + RAW_CHECK(heap_checker_on && constructor_heap_profiling, + "Leak checking is expected to be fully turned on now"); + } + + // For binaries built in debug mode, this will set release queue of + // debugallocation.cc to 100M to make it less likely for real leaks to + // be hidden due to reuse of heap memory object addresses. + // Running a test with --malloc_reclaim_memory=0 would help find leaks even + // better, but the test might run out of memory as a result. + // The scenario is that a heap object at address X is allocated and freed, + // but some other data-structure still retains a pointer to X. + // Then the same heap memory is used for another object, which is leaked, + // but the leak is not noticed due to the pointer to the original object at X. + // TODO(csilvers): support this in some manner. +#if 0 + SetCommandLineOptionWithMode("max_free_queue_size", "104857600", // 100M + SET_FLAG_IF_DEFAULT); +#endif +} + +// We want this to run early as well, but not so early as +// ::BeforeConstructors (we want flag assignments to have already +// happened, for instance). Initializer-registration does the trick. +REGISTER_MODULE_INITIALIZER(init_start, HeapLeakChecker_InternalInitStart()); +REGISTER_MODULE_DESTRUCTOR(init_start, HeapLeakChecker_RunHeapCleanups()); + +// static +bool HeapLeakChecker::NoGlobalLeaksMaybeSymbolize( + ShouldSymbolize should_symbolize) { + // we never delete or change main_heap_checker once it's set: + HeapLeakChecker* main_hc = GlobalChecker(); + if (main_hc) { + RAW_VLOG(10, "Checking for whole-program memory leaks"); + return main_hc->DoNoLeaks(should_symbolize); + } + return true; +} + +// static +bool HeapLeakChecker::DoMainHeapCheck() { + if (FLAGS_heap_check_delay_seconds > 0) { + sleep(FLAGS_heap_check_delay_seconds); + } + { SpinLockHolder l(&heap_checker_lock); + if (!do_main_heap_check) return false; + RAW_DCHECK(heap_checker_pid == getpid(), ""); + do_main_heap_check = false; // will do it now; no need to do it more + } + + // The program is over, so it's safe to symbolize addresses (which + // requires a fork) because no serious work is expected to be done + // after this. Symbolizing is really useful -- knowing what + // function has a leak is better than knowing just an address -- + // and while we can only safely symbolize once in a program run, + // now is the time (after all, there's no "later" that would be better). + if (!NoGlobalLeaksMaybeSymbolize(SYMBOLIZE)) { + if (FLAGS_heap_check_identify_leaks) { + RAW_LOG(FATAL, "Whole-program memory leaks found."); + } + RAW_LOG(ERROR, "Exiting with error code (instead of crashing) " + "because of whole-program memory leaks"); + _exit(1); // we don't want to call atexit() routines! + } + return true; +} + +// static +HeapLeakChecker* HeapLeakChecker::GlobalChecker() { + SpinLockHolder l(&heap_checker_lock); + return main_heap_checker; +} + +// static +bool HeapLeakChecker::NoGlobalLeaks() { + // symbolizing requires a fork, which isn't safe to do in general. + return NoGlobalLeaksMaybeSymbolize(DO_NOT_SYMBOLIZE); +} + +// static +void HeapLeakChecker::CancelGlobalCheck() { + SpinLockHolder l(&heap_checker_lock); + if (do_main_heap_check) { + RAW_VLOG(heap_checker_info_level, + "Canceling the automatic at-exit whole-program memory leak check"); + do_main_heap_check = false; + } +} + +// static +void HeapLeakChecker::BeforeConstructorsLocked() { + RAW_DCHECK(heap_checker_lock.IsHeld(), ""); + RAW_CHECK(!constructor_heap_profiling, + "BeforeConstructorsLocked called multiple times"); +#ifdef ADDRESS_SANITIZER + // AddressSanitizer's custom malloc conflicts with HeapChecker. + return; +#endif + // Set hooks early to crash if 'new' gets called before we make heap_profile, + // and make sure no other hooks existed: + RAW_CHECK(MallocHook::AddNewHook(&NewHook), ""); + RAW_CHECK(MallocHook::AddDeleteHook(&DeleteHook), ""); + constructor_heap_profiling = true; + MemoryRegionMap::Init(1, /* use_buckets */ false); + // Set up MemoryRegionMap with (at least) one caller stack frame to record + // (important that it's done before HeapProfileTable creation below). + Allocator::Init(); + RAW_CHECK(heap_profile == NULL, ""); + heap_profile = new(Allocator::Allocate(sizeof(HeapProfileTable))) + HeapProfileTable(&Allocator::Allocate, &Allocator::Free, + /* profile_mmap */ false); + RAW_VLOG(10, "Starting tracking the heap"); + heap_checker_on = true; +} + +// static +void HeapLeakChecker::TurnItselfOffLocked() { + RAW_DCHECK(heap_checker_lock.IsHeld(), ""); + // Set FLAGS_heap_check to "", for users who test for it + if (!FLAGS_heap_check.empty()) // be a noop in the common case + FLAGS_heap_check.clear(); // because clear() could allocate memory + if (constructor_heap_profiling) { + RAW_CHECK(heap_checker_on, ""); + RAW_VLOG(heap_checker_info_level, "Turning perftools heap leak checking off"); + heap_checker_on = false; + // Unset our hooks checking they were set: + RAW_CHECK(MallocHook::RemoveNewHook(&NewHook), ""); + RAW_CHECK(MallocHook::RemoveDeleteHook(&DeleteHook), ""); + Allocator::DeleteAndNull(&heap_profile); + // free our optional global data: + Allocator::DeleteAndNullIfNot(&ignored_objects); + Allocator::DeleteAndNullIfNot(&disabled_ranges); + Allocator::DeleteAndNullIfNot(&global_region_caller_ranges); + Allocator::Shutdown(); + MemoryRegionMap::Shutdown(); + } + RAW_CHECK(!heap_checker_on, ""); +} + +extern bool heap_leak_checker_bcad_variable; // in heap-checker-bcad.cc + +static bool has_called_before_constructors = false; + +// TODO(maxim): inline this function with +// MallocHook_InitAtFirstAllocation_HeapLeakChecker, and also rename +// HeapLeakChecker::BeforeConstructorsLocked. +void HeapLeakChecker_BeforeConstructors() { + SpinLockHolder l(&heap_checker_lock); + // We can be called from several places: the first mmap/sbrk/alloc call + // or the first global c-tor from heap-checker-bcad.cc: + // Do not re-execute initialization: + if (has_called_before_constructors) return; + has_called_before_constructors = true; + + heap_checker_pid = getpid(); // set it always + heap_leak_checker_bcad_variable = true; + // just to reference it, so that heap-checker-bcad.o is linked in + + // This function can be called *very* early, before the normal + // global-constructor that sets FLAGS_verbose. Set it manually now, + // so the RAW_LOG messages here are controllable. + const char* verbose_str = GetenvBeforeMain("PERFTOOLS_VERBOSE"); + if (verbose_str && atoi(verbose_str)) { // different than the default of 0? + FLAGS_verbose = atoi(verbose_str); + } + + bool need_heap_check = true; + // The user indicates a desire for heap-checking via the HEAPCHECK + // environment variable. If it's not set, there's no way to do + // heap-checking. + if (!GetenvBeforeMain("HEAPCHECK")) { + need_heap_check = false; + } +#ifdef HAVE_GETEUID + if (need_heap_check && getuid() != geteuid()) { + // heap-checker writes out files. Thus, for security reasons, we don't + // recognize the env. var. to turn on heap-checking if we're setuid. + RAW_LOG(WARNING, ("HeapChecker: ignoring HEAPCHECK because " + "program seems to be setuid\n")); + need_heap_check = false; + } +#endif + if (need_heap_check) { + HeapLeakChecker::BeforeConstructorsLocked(); + } +} + +// This function overrides the weak function defined in malloc_hook.cc and +// called by one of the initial malloc hooks (malloc_hook.cc) when the very +// first memory allocation or an mmap/sbrk happens. This ensures that +// HeapLeakChecker is initialized and installs all its hooks early enough to +// track absolutely all memory allocations and all memory region acquisitions +// via mmap and sbrk. +extern "C" void MallocHook_InitAtFirstAllocation_HeapLeakChecker() { + HeapLeakChecker_BeforeConstructors(); +} + +// This function is executed after all global object destructors run. +void HeapLeakChecker_AfterDestructors() { + { SpinLockHolder l(&heap_checker_lock); + // can get here (via forks?) with other pids + if (heap_checker_pid != getpid()) return; + } + if (FLAGS_heap_check_after_destructors) { + if (HeapLeakChecker::DoMainHeapCheck()) { + const struct timespec sleep_time = { 0, 500000000 }; // 500 ms + nanosleep(&sleep_time, NULL); + // Need this hack to wait for other pthreads to exit. + // Otherwise tcmalloc find errors + // on a free() call from pthreads. + } + } + SpinLockHolder l(&heap_checker_lock); + RAW_CHECK(!do_main_heap_check, "should have done it"); +} + +//---------------------------------------------------------------------- +// HeapLeakChecker disabling helpers +//---------------------------------------------------------------------- + +// These functions are at the end of the file to prevent their inlining: + +// static +void HeapLeakChecker::DisableChecksFromToLocked(const void* start_address, + const void* end_address, + int max_depth) { + RAW_DCHECK(heap_checker_lock.IsHeld(), ""); + RAW_DCHECK(start_address < end_address, ""); + if (disabled_ranges == NULL) { + disabled_ranges = new(Allocator::Allocate(sizeof(DisabledRangeMap))) + DisabledRangeMap; + } + RangeValue value; + value.start_address = AsInt(start_address); + value.max_depth = max_depth; + if (disabled_ranges->insert(make_pair(AsInt(end_address), value)).second) { + RAW_VLOG(10, "Disabling leak checking in stack traces " + "under frame addresses between %p..%p", + start_address, end_address); + } else { // check that this is just a verbatim repetition + RangeValue const& val = disabled_ranges->find(AsInt(end_address))->second; + if (val.max_depth != value.max_depth || + val.start_address != value.start_address) { + RAW_LOG(FATAL, "Two DisableChecksToHereFrom calls conflict: " + "(%p, %p, %d) vs. (%p, %p, %d)", + AsPtr(val.start_address), end_address, val.max_depth, + start_address, end_address, max_depth); + } + } +} + +// static +inline bool HeapLeakChecker::HaveOnHeapLocked(const void** ptr, + size_t* object_size) { + // Commented-out because HaveOnHeapLocked is very performance-critical: + // RAW_DCHECK(heap_checker_lock.IsHeld(), ""); + const uintptr_t addr = AsInt(*ptr); + if (heap_profile->FindInsideAlloc( + *ptr, max_heap_object_size, ptr, object_size)) { + RAW_VLOG(16, "Got pointer into %p at +%" PRIuPTR " offset", + *ptr, addr - AsInt(*ptr)); + return true; + } + return false; +} + +// static +const void* HeapLeakChecker::GetAllocCaller(void* ptr) { + // this is used only in the unittest, so the heavy checks are fine + HeapProfileTable::AllocInfo info; + { SpinLockHolder l(&heap_checker_lock); + RAW_CHECK(heap_profile->FindAllocDetails(ptr, &info), ""); + } + RAW_CHECK(info.stack_depth >= 1, ""); + return info.call_stack[0]; +} diff --git a/src/third_party/gperftools-2.5/src/heap-profile-stats.h b/src/third_party/gperftools-2.5/src/heap-profile-stats.h new file mode 100644 index 00000000000..ae45d5883fa --- /dev/null +++ b/src/third_party/gperftools-2.5/src/heap-profile-stats.h @@ -0,0 +1,78 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2013, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// This file defines structs to accumulate memory allocation and deallocation +// counts. These structs are commonly used for malloc (in HeapProfileTable) +// and mmap (in MemoryRegionMap). + +// A bucket is data structure for heap profiling to store a pair of a stack +// trace and counts of (de)allocation. Buckets are stored in a hash table +// which is declared as "HeapProfileBucket**". +// +// A hash value is computed from a stack trace. Collision in the hash table +// is resolved by separate chaining with linked lists. The links in the list +// are implemented with the member "HeapProfileBucket* next". +// +// A structure of a hash table HeapProfileBucket** bucket_table would be like: +// bucket_table[0] => NULL +// bucket_table[1] => HeapProfileBucket() => HeapProfileBucket() => NULL +// ... +// bucket_table[i] => HeapProfileBucket() => NULL +// ... +// bucket_table[n] => HeapProfileBucket() => NULL + +#ifndef HEAP_PROFILE_STATS_H_ +#define HEAP_PROFILE_STATS_H_ + +struct HeapProfileStats { + // Returns true if the two HeapProfileStats are semantically equal. + bool Equivalent(const HeapProfileStats& other) const { + return allocs - frees == other.allocs - other.frees && + alloc_size - free_size == other.alloc_size - other.free_size; + } + + int32 allocs; // Number of allocation calls. + int32 frees; // Number of free calls. + int64 alloc_size; // Total size of all allocated objects so far. + int64 free_size; // Total size of all freed objects so far. +}; + +// Allocation and deallocation statistics per each stack trace. +struct HeapProfileBucket : public HeapProfileStats { + // Longest stack trace we record. + static const int kMaxStackDepth = 32; + + uintptr_t hash; // Hash value of the stack trace. + int depth; // Depth of stack trace. + const void** stack; // Stack trace. + HeapProfileBucket* next; // Next entry in hash-table. +}; + +#endif // HEAP_PROFILE_STATS_H_ diff --git a/src/third_party/gperftools-2.5/src/heap-profile-table.cc b/src/third_party/gperftools-2.5/src/heap-profile-table.cc new file mode 100644 index 00000000000..7486468c056 --- /dev/null +++ b/src/third_party/gperftools-2.5/src/heap-profile-table.cc @@ -0,0 +1,631 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2006, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Sanjay Ghemawat +// Maxim Lifantsev (refactoring) +// + +#include <config.h> + +#ifdef HAVE_UNISTD_H +#include <unistd.h> // for write() +#endif +#include <fcntl.h> // for open() +#ifdef HAVE_GLOB_H +#include <glob.h> +#ifndef GLOB_NOMATCH // true on some old cygwins +# define GLOB_NOMATCH 0 +#endif +#endif +#ifdef HAVE_INTTYPES_H +#include <inttypes.h> // for PRIxPTR +#endif +#ifdef HAVE_POLL_H +#include <poll.h> +#endif +#include <errno.h> +#include <stdarg.h> +#include <string> +#include <map> +#include <algorithm> // for sort(), equal(), and copy() + +#include "heap-profile-table.h" + +#include "base/logging.h" +#include "raw_printer.h" +#include "symbolize.h" +#include <gperftools/stacktrace.h> +#include <gperftools/malloc_hook.h> +#include "memory_region_map.h" +#include "base/commandlineflags.h" +#include "base/logging.h" // for the RawFD I/O commands +#include "base/sysinfo.h" + +using std::sort; +using std::equal; +using std::copy; +using std::string; +using std::map; + +using tcmalloc::FillProcSelfMaps; // from sysinfo.h +using tcmalloc::DumpProcSelfMaps; // from sysinfo.h + +//---------------------------------------------------------------------- + +DEFINE_bool(cleanup_old_heap_profiles, + EnvToBool("HEAP_PROFILE_CLEANUP", true), + "At initialization time, delete old heap profiles."); + +DEFINE_int32(heap_check_max_leaks, + EnvToInt("HEAP_CHECK_MAX_LEAKS", 20), + "The maximum number of leak reports to print."); + +//---------------------------------------------------------------------- + +// header of the dumped heap profile +static const char kProfileHeader[] = "heap profile: "; +static const char kProcSelfMapsHeader[] = "\nMAPPED_LIBRARIES:\n"; + +//---------------------------------------------------------------------- + +const char HeapProfileTable::kFileExt[] = ".heap"; + +//---------------------------------------------------------------------- + +static const int kHashTableSize = 179999; // Size for bucket_table_. +/*static*/ const int HeapProfileTable::kMaxStackDepth; + +//---------------------------------------------------------------------- + +// We strip out different number of stack frames in debug mode +// because less inlining happens in that case +#ifdef NDEBUG +static const int kStripFrames = 2; +#else +static const int kStripFrames = 3; +#endif + +// For sorting Stats or Buckets by in-use space +static bool ByAllocatedSpace(HeapProfileTable::Stats* a, + HeapProfileTable::Stats* b) { + // Return true iff "a" has more allocated space than "b" + return (a->alloc_size - a->free_size) > (b->alloc_size - b->free_size); +} + +//---------------------------------------------------------------------- + +HeapProfileTable::HeapProfileTable(Allocator alloc, + DeAllocator dealloc, + bool profile_mmap) + : alloc_(alloc), + dealloc_(dealloc), + profile_mmap_(profile_mmap), + bucket_table_(NULL), + num_buckets_(0), + address_map_(NULL) { + // Make a hash table for buckets. + const int table_bytes = kHashTableSize * sizeof(*bucket_table_); + bucket_table_ = static_cast<Bucket**>(alloc_(table_bytes)); + memset(bucket_table_, 0, table_bytes); + + // Make an allocation map. + address_map_ = + new(alloc_(sizeof(AllocationMap))) AllocationMap(alloc_, dealloc_); + + // Initialize. + memset(&total_, 0, sizeof(total_)); + num_buckets_ = 0; +} + +HeapProfileTable::~HeapProfileTable() { + // Free the allocation map. + address_map_->~AllocationMap(); + dealloc_(address_map_); + address_map_ = NULL; + + // Free the hash table. + for (int i = 0; i < kHashTableSize; i++) { + for (Bucket* curr = bucket_table_[i]; curr != 0; /**/) { + Bucket* bucket = curr; + curr = curr->next; + dealloc_(bucket->stack); + dealloc_(bucket); + } + } + dealloc_(bucket_table_); + bucket_table_ = NULL; +} + +HeapProfileTable::Bucket* HeapProfileTable::GetBucket(int depth, + const void* const key[]) { + // Make hash-value + uintptr_t h = 0; + for (int i = 0; i < depth; i++) { + h += reinterpret_cast<uintptr_t>(key[i]); + h += h << 10; + h ^= h >> 6; + } + h += h << 3; + h ^= h >> 11; + + // Lookup stack trace in table + unsigned int buck = ((unsigned int) h) % kHashTableSize; + for (Bucket* b = bucket_table_[buck]; b != 0; b = b->next) { + if ((b->hash == h) && + (b->depth == depth) && + equal(key, key + depth, b->stack)) { + return b; + } + } + + // Create new bucket + const size_t key_size = sizeof(key[0]) * depth; + const void** kcopy = reinterpret_cast<const void**>(alloc_(key_size)); + copy(key, key + depth, kcopy); + Bucket* b = reinterpret_cast<Bucket*>(alloc_(sizeof(Bucket))); + memset(b, 0, sizeof(*b)); + b->hash = h; + b->depth = depth; + b->stack = kcopy; + b->next = bucket_table_[buck]; + bucket_table_[buck] = b; + num_buckets_++; + return b; +} + +int HeapProfileTable::GetCallerStackTrace( + int skip_count, void* stack[kMaxStackDepth]) { + return MallocHook::GetCallerStackTrace( + stack, kMaxStackDepth, kStripFrames + skip_count + 1); +} + +void HeapProfileTable::RecordAlloc( + const void* ptr, size_t bytes, int stack_depth, + const void* const call_stack[]) { + Bucket* b = GetBucket(stack_depth, call_stack); + b->allocs++; + b->alloc_size += bytes; + total_.allocs++; + total_.alloc_size += bytes; + + AllocValue v; + v.set_bucket(b); // also did set_live(false); set_ignore(false) + v.bytes = bytes; + address_map_->Insert(ptr, v); +} + +void HeapProfileTable::RecordFree(const void* ptr) { + AllocValue v; + if (address_map_->FindAndRemove(ptr, &v)) { + Bucket* b = v.bucket(); + b->frees++; + b->free_size += v.bytes; + total_.frees++; + total_.free_size += v.bytes; + } +} + +bool HeapProfileTable::FindAlloc(const void* ptr, size_t* object_size) const { + const AllocValue* alloc_value = address_map_->Find(ptr); + if (alloc_value != NULL) *object_size = alloc_value->bytes; + return alloc_value != NULL; +} + +bool HeapProfileTable::FindAllocDetails(const void* ptr, + AllocInfo* info) const { + const AllocValue* alloc_value = address_map_->Find(ptr); + if (alloc_value != NULL) { + info->object_size = alloc_value->bytes; + info->call_stack = alloc_value->bucket()->stack; + info->stack_depth = alloc_value->bucket()->depth; + } + return alloc_value != NULL; +} + +bool HeapProfileTable::FindInsideAlloc(const void* ptr, + size_t max_size, + const void** object_ptr, + size_t* object_size) const { + const AllocValue* alloc_value = + address_map_->FindInside(&AllocValueSize, max_size, ptr, object_ptr); + if (alloc_value != NULL) *object_size = alloc_value->bytes; + return alloc_value != NULL; +} + +bool HeapProfileTable::MarkAsLive(const void* ptr) { + AllocValue* alloc = address_map_->FindMutable(ptr); + if (alloc && !alloc->live()) { + alloc->set_live(true); + return true; + } + return false; +} + +void HeapProfileTable::MarkAsIgnored(const void* ptr) { + AllocValue* alloc = address_map_->FindMutable(ptr); + if (alloc) { + alloc->set_ignore(true); + } +} + +// We'd be happier using snprintfer, but we don't to reduce dependencies. +int HeapProfileTable::UnparseBucket(const Bucket& b, + char* buf, int buflen, int bufsize, + const char* extra, + Stats* profile_stats) { + if (profile_stats != NULL) { + profile_stats->allocs += b.allocs; + profile_stats->alloc_size += b.alloc_size; + profile_stats->frees += b.frees; + profile_stats->free_size += b.free_size; + } + int printed = + snprintf(buf + buflen, bufsize - buflen, "%6d: %8" PRId64 " [%6d: %8" PRId64 "] @%s", + b.allocs - b.frees, + b.alloc_size - b.free_size, + b.allocs, + b.alloc_size, + extra); + // If it looks like the snprintf failed, ignore the fact we printed anything + if (printed < 0 || printed >= bufsize - buflen) return buflen; + buflen += printed; + for (int d = 0; d < b.depth; d++) { + printed = snprintf(buf + buflen, bufsize - buflen, " 0x%08" PRIxPTR, + reinterpret_cast<uintptr_t>(b.stack[d])); + if (printed < 0 || printed >= bufsize - buflen) return buflen; + buflen += printed; + } + printed = snprintf(buf + buflen, bufsize - buflen, "\n"); + if (printed < 0 || printed >= bufsize - buflen) return buflen; + buflen += printed; + return buflen; +} + +HeapProfileTable::Bucket** +HeapProfileTable::MakeSortedBucketList() const { + Bucket** list = static_cast<Bucket**>(alloc_(sizeof(Bucket) * num_buckets_)); + + int bucket_count = 0; + for (int i = 0; i < kHashTableSize; i++) { + for (Bucket* curr = bucket_table_[i]; curr != 0; curr = curr->next) { + list[bucket_count++] = curr; + } + } + RAW_DCHECK(bucket_count == num_buckets_, ""); + + sort(list, list + num_buckets_, ByAllocatedSpace); + + return list; +} + +void HeapProfileTable::IterateOrderedAllocContexts( + AllocContextIterator callback) const { + Bucket** list = MakeSortedBucketList(); + AllocContextInfo info; + for (int i = 0; i < num_buckets_; ++i) { + *static_cast<Stats*>(&info) = *static_cast<Stats*>(list[i]); + info.stack_depth = list[i]->depth; + info.call_stack = list[i]->stack; + callback(info); + } + dealloc_(list); +} + +int HeapProfileTable::FillOrderedProfile(char buf[], int size) const { + Bucket** list = MakeSortedBucketList(); + + // Our file format is "bucket, bucket, ..., bucket, proc_self_maps_info". + // In the cases buf is too small, we'd rather leave out the last + // buckets than leave out the /proc/self/maps info. To ensure that, + // we actually print the /proc/self/maps info first, then move it to + // the end of the buffer, then write the bucket info into whatever + // is remaining, and then move the maps info one last time to close + // any gaps. Whew! + int map_length = snprintf(buf, size, "%s", kProcSelfMapsHeader); + if (map_length < 0 || map_length >= size) { + dealloc_(list); + return 0; + } + bool dummy; // "wrote_all" -- did /proc/self/maps fit in its entirety? + map_length += FillProcSelfMaps(buf + map_length, size - map_length, &dummy); + RAW_DCHECK(map_length <= size, ""); + char* const map_start = buf + size - map_length; // move to end + memmove(map_start, buf, map_length); + size -= map_length; + + Stats stats; + memset(&stats, 0, sizeof(stats)); + int bucket_length = snprintf(buf, size, "%s", kProfileHeader); + if (bucket_length < 0 || bucket_length >= size) { + dealloc_(list); + return 0; + } + bucket_length = UnparseBucket(total_, buf, bucket_length, size, + " heapprofile", &stats); + + // Dump the mmap list first. + if (profile_mmap_) { + BufferArgs buffer(buf, bucket_length, size); + MemoryRegionMap::IterateBuckets<BufferArgs*>(DumpBucketIterator, &buffer); + bucket_length = buffer.buflen; + } + + for (int i = 0; i < num_buckets_; i++) { + bucket_length = UnparseBucket(*list[i], buf, bucket_length, size, "", + &stats); + } + RAW_DCHECK(bucket_length < size, ""); + + dealloc_(list); + + RAW_DCHECK(buf + bucket_length <= map_start, ""); + memmove(buf + bucket_length, map_start, map_length); // close the gap + + return bucket_length + map_length; +} + +// static +void HeapProfileTable::DumpBucketIterator(const Bucket* bucket, + BufferArgs* args) { + args->buflen = UnparseBucket(*bucket, args->buf, args->buflen, args->bufsize, + "", NULL); +} + +inline +void HeapProfileTable::DumpNonLiveIterator(const void* ptr, AllocValue* v, + const DumpArgs& args) { + if (v->live()) { + v->set_live(false); + return; + } + if (v->ignore()) { + return; + } + Bucket b; + memset(&b, 0, sizeof(b)); + b.allocs = 1; + b.alloc_size = v->bytes; + b.depth = v->bucket()->depth; + b.stack = v->bucket()->stack; + char buf[1024]; + int len = UnparseBucket(b, buf, 0, sizeof(buf), "", args.profile_stats); + RawWrite(args.fd, buf, len); +} + +// Callback from NonLiveSnapshot; adds entry to arg->dest +// if not the entry is not live and is not present in arg->base. +void HeapProfileTable::AddIfNonLive(const void* ptr, AllocValue* v, + AddNonLiveArgs* arg) { + if (v->live()) { + v->set_live(false); + } else { + if (arg->base != NULL && arg->base->map_.Find(ptr) != NULL) { + // Present in arg->base, so do not save + } else { + arg->dest->Add(ptr, *v); + } + } +} + +bool HeapProfileTable::WriteProfile(const char* file_name, + const Bucket& total, + AllocationMap* allocations) { + RAW_VLOG(1, "Dumping non-live heap profile to %s", file_name); + RawFD fd = RawOpenForWriting(file_name); + if (fd != kIllegalRawFD) { + RawWrite(fd, kProfileHeader, strlen(kProfileHeader)); + char buf[512]; + int len = UnparseBucket(total, buf, 0, sizeof(buf), " heapprofile", + NULL); + RawWrite(fd, buf, len); + const DumpArgs args(fd, NULL); + allocations->Iterate<const DumpArgs&>(DumpNonLiveIterator, args); + RawWrite(fd, kProcSelfMapsHeader, strlen(kProcSelfMapsHeader)); + DumpProcSelfMaps(fd); + RawClose(fd); + return true; + } else { + RAW_LOG(ERROR, "Failed dumping filtered heap profile to %s", file_name); + return false; + } +} + +void HeapProfileTable::CleanupOldProfiles(const char* prefix) { + if (!FLAGS_cleanup_old_heap_profiles) + return; + string pattern = string(prefix) + ".*" + kFileExt; +#if defined(HAVE_GLOB_H) + glob_t g; + const int r = glob(pattern.c_str(), GLOB_ERR, NULL, &g); + if (r == 0 || r == GLOB_NOMATCH) { + const int prefix_length = strlen(prefix); + for (int i = 0; i < g.gl_pathc; i++) { + const char* fname = g.gl_pathv[i]; + if ((strlen(fname) >= prefix_length) && + (memcmp(fname, prefix, prefix_length) == 0)) { + RAW_VLOG(1, "Removing old heap profile %s", fname); + unlink(fname); + } + } + } + globfree(&g); +#else /* HAVE_GLOB_H */ + RAW_LOG(WARNING, "Unable to remove old heap profiles (can't run glob())"); +#endif +} + +HeapProfileTable::Snapshot* HeapProfileTable::TakeSnapshot() { + Snapshot* s = new (alloc_(sizeof(Snapshot))) Snapshot(alloc_, dealloc_); + address_map_->Iterate(AddToSnapshot, s); + return s; +} + +void HeapProfileTable::ReleaseSnapshot(Snapshot* s) { + s->~Snapshot(); + dealloc_(s); +} + +// Callback from TakeSnapshot; adds a single entry to snapshot +void HeapProfileTable::AddToSnapshot(const void* ptr, AllocValue* v, + Snapshot* snapshot) { + snapshot->Add(ptr, *v); +} + +HeapProfileTable::Snapshot* HeapProfileTable::NonLiveSnapshot( + Snapshot* base) { + RAW_VLOG(2, "NonLiveSnapshot input: %d %d\n", + int(total_.allocs - total_.frees), + int(total_.alloc_size - total_.free_size)); + + Snapshot* s = new (alloc_(sizeof(Snapshot))) Snapshot(alloc_, dealloc_); + AddNonLiveArgs args; + args.dest = s; + args.base = base; + address_map_->Iterate<AddNonLiveArgs*>(AddIfNonLive, &args); + RAW_VLOG(2, "NonLiveSnapshot output: %d %d\n", + int(s->total_.allocs - s->total_.frees), + int(s->total_.alloc_size - s->total_.free_size)); + return s; +} + +// Information kept per unique bucket seen +struct HeapProfileTable::Snapshot::Entry { + int count; + int bytes; + Bucket* bucket; + Entry() : count(0), bytes(0) { } + + // Order by decreasing bytes + bool operator<(const Entry& x) const { + return this->bytes > x.bytes; + } +}; + +// State used to generate leak report. We keep a mapping from Bucket pointer +// the collected stats for that bucket. +struct HeapProfileTable::Snapshot::ReportState { + map<Bucket*, Entry> buckets_; +}; + +// Callback from ReportLeaks; updates ReportState. +void HeapProfileTable::Snapshot::ReportCallback(const void* ptr, + AllocValue* v, + ReportState* state) { + Entry* e = &state->buckets_[v->bucket()]; // Creates empty Entry first time + e->bucket = v->bucket(); + e->count++; + e->bytes += v->bytes; +} + +void HeapProfileTable::Snapshot::ReportLeaks(const char* checker_name, + const char* filename, + bool should_symbolize) { + // This is only used by the heap leak checker, but is intimately + // tied to the allocation map that belongs in this module and is + // therefore placed here. + RAW_LOG(ERROR, "Leak check %s detected leaks of %" PRIuS " bytes " + "in %" PRIuS " objects", + checker_name, + size_t(total_.alloc_size), + size_t(total_.allocs)); + + // Group objects by Bucket + ReportState state; + map_.Iterate(&ReportCallback, &state); + + // Sort buckets by decreasing leaked size + const int n = state.buckets_.size(); + Entry* entries = new Entry[n]; + int dst = 0; + for (map<Bucket*,Entry>::const_iterator iter = state.buckets_.begin(); + iter != state.buckets_.end(); + ++iter) { + entries[dst++] = iter->second; + } + sort(entries, entries + n); + + // Report a bounded number of leaks to keep the leak report from + // growing too long. + const int to_report = + (FLAGS_heap_check_max_leaks > 0 && + n > FLAGS_heap_check_max_leaks) ? FLAGS_heap_check_max_leaks : n; + RAW_LOG(ERROR, "The %d largest leaks:", to_report); + + // Print + SymbolTable symbolization_table; + for (int i = 0; i < to_report; i++) { + const Entry& e = entries[i]; + for (int j = 0; j < e.bucket->depth; j++) { + symbolization_table.Add(e.bucket->stack[j]); + } + } + static const int kBufSize = 2<<10; + char buffer[kBufSize]; + if (should_symbolize) + symbolization_table.Symbolize(); + for (int i = 0; i < to_report; i++) { + const Entry& e = entries[i]; + base::RawPrinter printer(buffer, kBufSize); + printer.Printf("Leak of %d bytes in %d objects allocated from:\n", + e.bytes, e.count); + for (int j = 0; j < e.bucket->depth; j++) { + const void* pc = e.bucket->stack[j]; + printer.Printf("\t@ %" PRIxPTR " %s\n", + reinterpret_cast<uintptr_t>(pc), symbolization_table.GetSymbol(pc)); + } + RAW_LOG(ERROR, "%s", buffer); + } + + if (to_report < n) { + RAW_LOG(ERROR, "Skipping leaks numbered %d..%d", + to_report, n-1); + } + delete[] entries; + + // TODO: Dump the sorted Entry list instead of dumping raw data? + // (should be much shorter) + if (!HeapProfileTable::WriteProfile(filename, total_, &map_)) { + RAW_LOG(ERROR, "Could not write pprof profile to %s", filename); + } +} + +void HeapProfileTable::Snapshot::ReportObject(const void* ptr, + AllocValue* v, + char* unused) { + // Perhaps also log the allocation stack trace (unsymbolized) + // on this line in case somebody finds it useful. + RAW_LOG(ERROR, "leaked %" PRIuS " byte object %p", v->bytes, ptr); +} + +void HeapProfileTable::Snapshot::ReportIndividualObjects() { + char unused; + map_.Iterate(ReportObject, &unused); +} diff --git a/src/third_party/gperftools-2.5/src/heap-profile-table.h b/src/third_party/gperftools-2.5/src/heap-profile-table.h new file mode 100644 index 00000000000..3c6284741af --- /dev/null +++ b/src/third_party/gperftools-2.5/src/heap-profile-table.h @@ -0,0 +1,399 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2006, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Sanjay Ghemawat +// Maxim Lifantsev (refactoring) +// + +#ifndef BASE_HEAP_PROFILE_TABLE_H_ +#define BASE_HEAP_PROFILE_TABLE_H_ + +#include "addressmap-inl.h" +#include "base/basictypes.h" +#include "base/logging.h" // for RawFD +#include "heap-profile-stats.h" + +// Table to maintain a heap profile data inside, +// i.e. the set of currently active heap memory allocations. +// thread-unsafe and non-reentrant code: +// each instance object must be used by one thread +// at a time w/o self-recursion. +// +// TODO(maxim): add a unittest for this class. +class HeapProfileTable { + public: + + // Extension to be used for heap pforile files. + static const char kFileExt[]; + + // Longest stack trace we record. + static const int kMaxStackDepth = 32; + + // data types ---------------------------- + + // Profile stats. + typedef HeapProfileStats Stats; + + // Info we can return about an allocation. + struct AllocInfo { + size_t object_size; // size of the allocation + const void* const* call_stack; // call stack that made the allocation call + int stack_depth; // depth of call_stack + bool live; + bool ignored; + }; + + // Info we return about an allocation context. + // An allocation context is a unique caller stack trace + // of an allocation operation. + struct AllocContextInfo : public Stats { + int stack_depth; // Depth of stack trace + const void* const* call_stack; // Stack trace + }; + + // Memory (de)allocator interface we'll use. + typedef void* (*Allocator)(size_t size); + typedef void (*DeAllocator)(void* ptr); + + // interface --------------------------- + + HeapProfileTable(Allocator alloc, DeAllocator dealloc, bool profile_mmap); + ~HeapProfileTable(); + + // Collect the stack trace for the function that asked to do the + // allocation for passing to RecordAlloc() below. + // + // The stack trace is stored in 'stack'. The stack depth is returned. + // + // 'skip_count' gives the number of stack frames between this call + // and the memory allocation function. + static int GetCallerStackTrace(int skip_count, void* stack[kMaxStackDepth]); + + // Record an allocation at 'ptr' of 'bytes' bytes. 'stack_depth' + // and 'call_stack' identifying the function that requested the + // allocation. They can be generated using GetCallerStackTrace() above. + void RecordAlloc(const void* ptr, size_t bytes, + int stack_depth, const void* const call_stack[]); + + // Record the deallocation of memory at 'ptr'. + void RecordFree(const void* ptr); + + // Return true iff we have recorded an allocation at 'ptr'. + // If yes, fill *object_size with the allocation byte size. + bool FindAlloc(const void* ptr, size_t* object_size) const; + // Same as FindAlloc, but fills all of *info. + bool FindAllocDetails(const void* ptr, AllocInfo* info) const; + + // Return true iff "ptr" points into a recorded allocation + // If yes, fill *object_ptr with the actual allocation address + // and *object_size with the allocation byte size. + // max_size specifies largest currently possible allocation size. + bool FindInsideAlloc(const void* ptr, size_t max_size, + const void** object_ptr, size_t* object_size) const; + + // If "ptr" points to a recorded allocation and it's not marked as live + // mark it as live and return true. Else return false. + // All allocations start as non-live. + bool MarkAsLive(const void* ptr); + + // If "ptr" points to a recorded allocation, mark it as "ignored". + // Ignored objects are treated like other objects, except that they + // are skipped in heap checking reports. + void MarkAsIgnored(const void* ptr); + + // Return current total (de)allocation statistics. It doesn't contain + // mmap'ed regions. + const Stats& total() const { return total_; } + + // Allocation data iteration callback: gets passed object pointer and + // fully-filled AllocInfo. + typedef void (*AllocIterator)(const void* ptr, const AllocInfo& info); + + // Iterate over the allocation profile data calling "callback" + // for every allocation. + void IterateAllocs(AllocIterator callback) const { + address_map_->Iterate(MapArgsAllocIterator, callback); + } + + // Allocation context profile data iteration callback + typedef void (*AllocContextIterator)(const AllocContextInfo& info); + + // Iterate over the allocation context profile data calling "callback" + // for every allocation context. Allocation contexts are ordered by the + // size of allocated space. + void IterateOrderedAllocContexts(AllocContextIterator callback) const; + + // Fill profile data into buffer 'buf' of size 'size' + // and return the actual size occupied by the dump in 'buf'. + // The profile buckets are dumped in the decreasing order + // of currently allocated bytes. + // We do not provision for 0-terminating 'buf'. + int FillOrderedProfile(char buf[], int size) const; + + // Cleanup any old profile files matching prefix + ".*" + kFileExt. + static void CleanupOldProfiles(const char* prefix); + + // Return a snapshot of the current contents of *this. + // Caller must call ReleaseSnapshot() on result when no longer needed. + // The result is only valid while this exists and until + // the snapshot is discarded by calling ReleaseSnapshot(). + class Snapshot; + Snapshot* TakeSnapshot(); + + // Release a previously taken snapshot. snapshot must not + // be used after this call. + void ReleaseSnapshot(Snapshot* snapshot); + + // Return a snapshot of every non-live, non-ignored object in *this. + // If "base" is non-NULL, skip any objects present in "base". + // As a side-effect, clears the "live" bit on every live object in *this. + // Caller must call ReleaseSnapshot() on result when no longer needed. + Snapshot* NonLiveSnapshot(Snapshot* base); + + private: + + // data types ---------------------------- + + // Hash table bucket to hold (de)allocation stats + // for a given allocation call stack trace. + typedef HeapProfileBucket Bucket; + + // Info stored in the address map + struct AllocValue { + // Access to the stack-trace bucket + Bucket* bucket() const { + return reinterpret_cast<Bucket*>(bucket_rep & ~uintptr_t(kMask)); + } + // This also does set_live(false). + void set_bucket(Bucket* b) { bucket_rep = reinterpret_cast<uintptr_t>(b); } + size_t bytes; // Number of bytes in this allocation + + // Access to the allocation liveness flag (for leak checking) + bool live() const { return bucket_rep & kLive; } + void set_live(bool l) { + bucket_rep = (bucket_rep & ~uintptr_t(kLive)) | (l ? kLive : 0); + } + + // Should this allocation be ignored if it looks like a leak? + bool ignore() const { return bucket_rep & kIgnore; } + void set_ignore(bool r) { + bucket_rep = (bucket_rep & ~uintptr_t(kIgnore)) | (r ? kIgnore : 0); + } + + private: + // We store a few bits in the bottom bits of bucket_rep. + // (Alignment is at least four, so we have at least two bits.) + static const int kLive = 1; + static const int kIgnore = 2; + static const int kMask = kLive | kIgnore; + + uintptr_t bucket_rep; + }; + + // helper for FindInsideAlloc + static size_t AllocValueSize(const AllocValue& v) { return v.bytes; } + + typedef AddressMap<AllocValue> AllocationMap; + + // Arguments that need to be passed DumpBucketIterator callback below. + struct BufferArgs { + BufferArgs(char* buf_arg, int buflen_arg, int bufsize_arg) + : buf(buf_arg), + buflen(buflen_arg), + bufsize(bufsize_arg) { + } + + char* buf; + int buflen; + int bufsize; + + DISALLOW_COPY_AND_ASSIGN(BufferArgs); + }; + + // Arguments that need to be passed DumpNonLiveIterator callback below. + struct DumpArgs { + DumpArgs(RawFD fd_arg, Stats* profile_stats_arg) + : fd(fd_arg), + profile_stats(profile_stats_arg) { + } + + RawFD fd; // file to write to + Stats* profile_stats; // stats to update (may be NULL) + }; + + // helpers ---------------------------- + + // Unparse bucket b and print its portion of profile dump into buf. + // We return the amount of space in buf that we use. We start printing + // at buf + buflen, and promise not to go beyond buf + bufsize. + // We do not provision for 0-terminating 'buf'. + // + // If profile_stats is non-NULL, we update *profile_stats by + // counting bucket b. + // + // "extra" is appended to the unparsed bucket. Typically it is empty, + // but may be set to something like " heapprofile" for the total + // bucket to indicate the type of the profile. + static int UnparseBucket(const Bucket& b, + char* buf, int buflen, int bufsize, + const char* extra, + Stats* profile_stats); + + // Get the bucket for the caller stack trace 'key' of depth 'depth' + // creating the bucket if needed. + Bucket* GetBucket(int depth, const void* const key[]); + + // Helper for IterateAllocs to do callback signature conversion + // from AllocationMap::Iterate to AllocIterator. + static void MapArgsAllocIterator(const void* ptr, AllocValue* v, + AllocIterator callback) { + AllocInfo info; + info.object_size = v->bytes; + info.call_stack = v->bucket()->stack; + info.stack_depth = v->bucket()->depth; + info.live = v->live(); + info.ignored = v->ignore(); + callback(ptr, info); + } + + // Helper to dump a bucket. + inline static void DumpBucketIterator(const Bucket* bucket, + BufferArgs* args); + + // Helper for DumpNonLiveProfile to do object-granularity + // heap profile dumping. It gets passed to AllocationMap::Iterate. + inline static void DumpNonLiveIterator(const void* ptr, AllocValue* v, + const DumpArgs& args); + + // Helper for IterateOrderedAllocContexts and FillOrderedProfile. + // Creates a sorted list of Buckets whose length is num_buckets_. + // The caller is responsible for deallocating the returned list. + Bucket** MakeSortedBucketList() const; + + // Helper for TakeSnapshot. Saves object to snapshot. + static void AddToSnapshot(const void* ptr, AllocValue* v, Snapshot* s); + + // Arguments passed to AddIfNonLive + struct AddNonLiveArgs { + Snapshot* dest; + Snapshot* base; + }; + + // Helper for NonLiveSnapshot. Adds the object to the destination + // snapshot if it is non-live. + static void AddIfNonLive(const void* ptr, AllocValue* v, + AddNonLiveArgs* arg); + + // Write contents of "*allocations" as a heap profile to + // "file_name". "total" must contain the total of all entries in + // "*allocations". + static bool WriteProfile(const char* file_name, + const Bucket& total, + AllocationMap* allocations); + + // data ---------------------------- + + // Memory (de)allocator that we use. + Allocator alloc_; + DeAllocator dealloc_; + + // Overall profile stats; we use only the Stats part, + // but make it a Bucket to pass to UnparseBucket. + Bucket total_; + + bool profile_mmap_; + + // Bucket hash table for malloc. + // We hand-craft one instead of using one of the pre-written + // ones because we do not want to use malloc when operating on the table. + // It is only few lines of code, so no big deal. + Bucket** bucket_table_; + int num_buckets_; + + // Map of all currently allocated objects and mapped regions we know about. + AllocationMap* address_map_; + + DISALLOW_COPY_AND_ASSIGN(HeapProfileTable); +}; + +class HeapProfileTable::Snapshot { + public: + const Stats& total() const { return total_; } + + // Report anything in this snapshot as a leak. + // May use new/delete for temporary storage. + // If should_symbolize is true, will fork (which is not threadsafe) + // to turn addresses into symbol names. Set to false for maximum safety. + // Also writes a heap profile to "filename" that contains + // all of the objects in this snapshot. + void ReportLeaks(const char* checker_name, const char* filename, + bool should_symbolize); + + // Report the addresses of all leaked objects. + // May use new/delete for temporary storage. + void ReportIndividualObjects(); + + bool Empty() const { + return (total_.allocs == 0) && (total_.alloc_size == 0); + } + + private: + friend class HeapProfileTable; + + // Total count/size are stored in a Bucket so we can reuse UnparseBucket + Bucket total_; + + // We share the Buckets managed by the parent table, but have our + // own object->bucket map. + AllocationMap map_; + + Snapshot(Allocator alloc, DeAllocator dealloc) : map_(alloc, dealloc) { + memset(&total_, 0, sizeof(total_)); + } + + // Callback used to populate a Snapshot object with entries found + // in another allocation map. + inline void Add(const void* ptr, const AllocValue& v) { + map_.Insert(ptr, v); + total_.allocs++; + total_.alloc_size += v.bytes; + } + + // Helpers for sorting and generating leak reports + struct Entry; + struct ReportState; + static void ReportCallback(const void* ptr, AllocValue* v, ReportState*); + static void ReportObject(const void* ptr, AllocValue* v, char*); + + DISALLOW_COPY_AND_ASSIGN(Snapshot); +}; + +#endif // BASE_HEAP_PROFILE_TABLE_H_ diff --git a/src/third_party/gperftools-2.5/src/heap-profiler.cc b/src/third_party/gperftools-2.5/src/heap-profiler.cc new file mode 100755 index 00000000000..17d86976bc4 --- /dev/null +++ b/src/third_party/gperftools-2.5/src/heap-profiler.cc @@ -0,0 +1,620 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Sanjay Ghemawat +// +// TODO: Log large allocations + +#include <config.h> +#include <stddef.h> +#include <stdio.h> +#include <stdlib.h> +#ifdef HAVE_UNISTD_H +#include <unistd.h> +#endif +#ifdef HAVE_INTTYPES_H +#include <inttypes.h> +#endif +#ifdef HAVE_FCNTL_H +#include <fcntl.h> // for open() +#endif +#ifdef HAVE_MMAP +#include <sys/mman.h> +#endif +#include <errno.h> +#include <assert.h> +#include <sys/types.h> +#include <signal.h> + +#include <algorithm> +#include <string> + +#include <gperftools/heap-profiler.h> + +#include "base/logging.h" +#include "base/basictypes.h" // for PRId64, among other things +#include "base/googleinit.h" +#include "base/commandlineflags.h" +#include "malloc_hook-inl.h" +#include "tcmalloc_guard.h" +#include <gperftools/malloc_hook.h> +#include <gperftools/malloc_extension.h> +#include "base/spinlock.h" +#include "base/low_level_alloc.h" +#include "base/sysinfo.h" // for GetUniquePathFromEnv() +#include "heap-profile-table.h" +#include "memory_region_map.h" + + +#ifndef PATH_MAX +#ifdef MAXPATHLEN +#define PATH_MAX MAXPATHLEN +#else +#define PATH_MAX 4096 // seems conservative for max filename len! +#endif +#endif + +using STL_NAMESPACE::string; +using STL_NAMESPACE::sort; + +//---------------------------------------------------------------------- +// Flags that control heap-profiling +// +// The thread-safety of the profiler depends on these being immutable +// after main starts, so don't change them. +//---------------------------------------------------------------------- + +DEFINE_int64(heap_profile_allocation_interval, + EnvToInt64("HEAP_PROFILE_ALLOCATION_INTERVAL", 1 << 30 /*1GB*/), + "If non-zero, dump heap profiling information once every " + "specified number of bytes allocated by the program since " + "the last dump."); +DEFINE_int64(heap_profile_deallocation_interval, + EnvToInt64("HEAP_PROFILE_DEALLOCATION_INTERVAL", 0), + "If non-zero, dump heap profiling information once every " + "specified number of bytes deallocated by the program " + "since the last dump."); +// We could also add flags that report whenever inuse_bytes changes by +// X or -X, but there hasn't been a need for that yet, so we haven't. +DEFINE_int64(heap_profile_inuse_interval, + EnvToInt64("HEAP_PROFILE_INUSE_INTERVAL", 100 << 20 /*100MB*/), + "If non-zero, dump heap profiling information whenever " + "the high-water memory usage mark increases by the specified " + "number of bytes."); +DEFINE_int64(heap_profile_time_interval, + EnvToInt64("HEAP_PROFILE_TIME_INTERVAL", 0), + "If non-zero, dump heap profiling information once every " + "specified number of seconds since the last dump."); +DEFINE_bool(mmap_log, + EnvToBool("HEAP_PROFILE_MMAP_LOG", false), + "Should mmap/munmap calls be logged?"); +DEFINE_bool(mmap_profile, + EnvToBool("HEAP_PROFILE_MMAP", false), + "If heap-profiling is on, also profile mmap, mremap, and sbrk)"); +DEFINE_bool(only_mmap_profile, + EnvToBool("HEAP_PROFILE_ONLY_MMAP", false), + "If heap-profiling is on, only profile mmap, mremap, and sbrk; " + "do not profile malloc/new/etc"); + + +//---------------------------------------------------------------------- +// Locking +//---------------------------------------------------------------------- + +// A pthread_mutex has way too much lock contention to be used here. +// +// I would like to use Mutex, but it can call malloc(), +// which can cause us to fall into an infinite recursion. +// +// So we use a simple spinlock. +static SpinLock heap_lock(SpinLock::LINKER_INITIALIZED); + +//---------------------------------------------------------------------- +// Simple allocator for heap profiler's internal memory +//---------------------------------------------------------------------- + +static LowLevelAlloc::Arena *heap_profiler_memory; + +static void* ProfilerMalloc(size_t bytes) { + return LowLevelAlloc::AllocWithArena(bytes, heap_profiler_memory); +} +static void ProfilerFree(void* p) { + LowLevelAlloc::Free(p); +} + +// We use buffers of this size in DoGetHeapProfile. +static const int kProfileBufferSize = 1 << 20; + +// This is a last-ditch buffer we use in DumpProfileLocked in case we +// can't allocate more memory from ProfilerMalloc. We expect this +// will be used by HeapProfileEndWriter when the application has to +// exit due to out-of-memory. This buffer is allocated in +// HeapProfilerStart. Access to this must be protected by heap_lock. +static char* global_profiler_buffer = NULL; + + +//---------------------------------------------------------------------- +// Profiling control/state data +//---------------------------------------------------------------------- + +// Access to all of these is protected by heap_lock. +static bool is_on = false; // If are on as a subsytem. +static bool dumping = false; // Dumping status to prevent recursion +static char* filename_prefix = NULL; // Prefix used for profile file names + // (NULL if no need for dumping yet) +static int dump_count = 0; // How many dumps so far +static int64 last_dump_alloc = 0; // alloc_size when did we last dump +static int64 last_dump_free = 0; // free_size when did we last dump +static int64 high_water_mark = 0; // In-use-bytes at last high-water dump +static int64 last_dump_time = 0; // The time of the last dump + +static HeapProfileTable* heap_profile = NULL; // the heap profile table + +//---------------------------------------------------------------------- +// Profile generation +//---------------------------------------------------------------------- + +// Input must be a buffer of size at least 1MB. +static char* DoGetHeapProfileLocked(char* buf, int buflen) { + // We used to be smarter about estimating the required memory and + // then capping it to 1MB and generating the profile into that. + if (buf == NULL || buflen < 1) + return NULL; + + RAW_DCHECK(heap_lock.IsHeld(), ""); + int bytes_written = 0; + if (is_on) { + HeapProfileTable::Stats const stats = heap_profile->total(); + (void)stats; // avoid an unused-variable warning in non-debug mode. + bytes_written = heap_profile->FillOrderedProfile(buf, buflen - 1); + // FillOrderedProfile should not reduce the set of active mmap-ed regions, + // hence MemoryRegionMap will let us remove everything we've added above: + RAW_DCHECK(stats.Equivalent(heap_profile->total()), ""); + // if this fails, we somehow removed by FillOrderedProfile + // more than we have added. + } + buf[bytes_written] = '\0'; + RAW_DCHECK(bytes_written == strlen(buf), ""); + + return buf; +} + +extern "C" char* GetHeapProfile() { + // Use normal malloc: we return the profile to the user to free it: + char* buffer = reinterpret_cast<char*>(malloc(kProfileBufferSize)); + SpinLockHolder l(&heap_lock); + return DoGetHeapProfileLocked(buffer, kProfileBufferSize); +} + +// defined below +static void NewHook(const void* ptr, size_t size); +static void DeleteHook(const void* ptr); + +// Helper for HeapProfilerDump. +static void DumpProfileLocked(const char* reason) { + RAW_DCHECK(heap_lock.IsHeld(), ""); + RAW_DCHECK(is_on, ""); + RAW_DCHECK(!dumping, ""); + + if (filename_prefix == NULL) return; // we do not yet need dumping + + dumping = true; + + // Make file name + char file_name[1000]; + dump_count++; + snprintf(file_name, sizeof(file_name), "%s.%04d%s", + filename_prefix, dump_count, HeapProfileTable::kFileExt); + + // Dump the profile + RAW_VLOG(0, "Dumping heap profile to %s (%s)", file_name, reason); + // We must use file routines that don't access memory, since we hold + // a memory lock now. + RawFD fd = RawOpenForWriting(file_name); + if (fd == kIllegalRawFD) { + RAW_LOG(ERROR, "Failed dumping heap profile to %s", file_name); + dumping = false; + return; + } + + // This case may be impossible, but it's best to be safe. + // It's safe to use the global buffer: we're protected by heap_lock. + if (global_profiler_buffer == NULL) { + global_profiler_buffer = + reinterpret_cast<char*>(ProfilerMalloc(kProfileBufferSize)); + } + + char* profile = DoGetHeapProfileLocked(global_profiler_buffer, + kProfileBufferSize); + RawWrite(fd, profile, strlen(profile)); + RawClose(fd); + + dumping = false; +} + +//---------------------------------------------------------------------- +// Profile collection +//---------------------------------------------------------------------- + +// Dump a profile after either an allocation or deallocation, if +// the memory use has changed enough since the last dump. +static void MaybeDumpProfileLocked() { + if (!dumping) { + const HeapProfileTable::Stats& total = heap_profile->total(); + const int64 inuse_bytes = total.alloc_size - total.free_size; + bool need_to_dump = false; + char buf[128]; + int64 current_time = time(NULL); + if (FLAGS_heap_profile_allocation_interval > 0 && + total.alloc_size >= + last_dump_alloc + FLAGS_heap_profile_allocation_interval) { + snprintf(buf, sizeof(buf), ("%" PRId64 " MB allocated cumulatively, " + "%" PRId64 " MB currently in use"), + total.alloc_size >> 20, inuse_bytes >> 20); + need_to_dump = true; + } else if (FLAGS_heap_profile_deallocation_interval > 0 && + total.free_size >= + last_dump_free + FLAGS_heap_profile_deallocation_interval) { + snprintf(buf, sizeof(buf), ("%" PRId64 " MB freed cumulatively, " + "%" PRId64 " MB currently in use"), + total.free_size >> 20, inuse_bytes >> 20); + need_to_dump = true; + } else if (FLAGS_heap_profile_inuse_interval > 0 && + inuse_bytes > + high_water_mark + FLAGS_heap_profile_inuse_interval) { + snprintf(buf, sizeof(buf), "%" PRId64 " MB currently in use", + inuse_bytes >> 20); + need_to_dump = true; + } else if (FLAGS_heap_profile_time_interval > 0 && + current_time - last_dump_time >= + FLAGS_heap_profile_time_interval) { + snprintf(buf, sizeof(buf), "%" PRId64 " sec since the last dump", + current_time - last_dump_time); + need_to_dump = true; + last_dump_time = current_time; + } + if (need_to_dump) { + DumpProfileLocked(buf); + + last_dump_alloc = total.alloc_size; + last_dump_free = total.free_size; + if (inuse_bytes > high_water_mark) + high_water_mark = inuse_bytes; + } + } +} + +// Record an allocation in the profile. +static void RecordAlloc(const void* ptr, size_t bytes, int skip_count) { + // Take the stack trace outside the critical section. + void* stack[HeapProfileTable::kMaxStackDepth]; + int depth = HeapProfileTable::GetCallerStackTrace(skip_count + 1, stack); + SpinLockHolder l(&heap_lock); + if (is_on) { + heap_profile->RecordAlloc(ptr, bytes, depth, stack); + MaybeDumpProfileLocked(); + } +} + +// Record a deallocation in the profile. +static void RecordFree(const void* ptr) { + SpinLockHolder l(&heap_lock); + if (is_on) { + heap_profile->RecordFree(ptr); + MaybeDumpProfileLocked(); + } +} + +//---------------------------------------------------------------------- +// Allocation/deallocation hooks for MallocHook +//---------------------------------------------------------------------- + +// static +void NewHook(const void* ptr, size_t size) { + if (ptr != NULL) RecordAlloc(ptr, size, 0); +} + +// static +void DeleteHook(const void* ptr) { + if (ptr != NULL) RecordFree(ptr); +} + +// TODO(jandrews): Re-enable stack tracing +#ifdef TODO_REENABLE_STACK_TRACING +static void RawInfoStackDumper(const char* message, void*) { + RAW_LOG(INFO, "%.*s", static_cast<int>(strlen(message) - 1), message); + // -1 is to chop the \n which will be added by RAW_LOG +} +#endif + +static void MmapHook(const void* result, const void* start, size_t size, + int prot, int flags, int fd, off_t offset) { + if (FLAGS_mmap_log) { // log it + // We use PRIxS not just '%p' to avoid deadlocks + // in pretty-printing of NULL as "nil". + // TODO(maxim): instead should use a safe snprintf reimplementation + RAW_LOG(INFO, + "mmap(start=0x%" PRIxPTR ", len=%" PRIuS ", prot=0x%x, flags=0x%x, " + "fd=%d, offset=0x%x) = 0x%" PRIxPTR "", + (uintptr_t) start, size, prot, flags, fd, (unsigned int) offset, + (uintptr_t) result); +#ifdef TODO_REENABLE_STACK_TRACING + DumpStackTrace(1, RawInfoStackDumper, NULL); +#endif + } +} + +static void MremapHook(const void* result, const void* old_addr, + size_t old_size, size_t new_size, + int flags, const void* new_addr) { + if (FLAGS_mmap_log) { // log it + // We use PRIxS not just '%p' to avoid deadlocks + // in pretty-printing of NULL as "nil". + // TODO(maxim): instead should use a safe snprintf reimplementation + RAW_LOG(INFO, + "mremap(old_addr=0x%" PRIxPTR ", old_size=%" PRIuS ", " + "new_size=%" PRIuS ", flags=0x%x, new_addr=0x%" PRIxPTR ") = " + "0x%" PRIxPTR "", + (uintptr_t) old_addr, old_size, new_size, flags, + (uintptr_t) new_addr, (uintptr_t) result); +#ifdef TODO_REENABLE_STACK_TRACING + DumpStackTrace(1, RawInfoStackDumper, NULL); +#endif + } +} + +static void MunmapHook(const void* ptr, size_t size) { + if (FLAGS_mmap_log) { // log it + // We use PRIxS not just '%p' to avoid deadlocks + // in pretty-printing of NULL as "nil". + // TODO(maxim): instead should use a safe snprintf reimplementation + RAW_LOG(INFO, "munmap(start=0x%" PRIxPTR ", len=%" PRIuS ")", + (uintptr_t) ptr, size); +#ifdef TODO_REENABLE_STACK_TRACING + DumpStackTrace(1, RawInfoStackDumper, NULL); +#endif + } +} + +static void SbrkHook(const void* result, ptrdiff_t increment) { + if (FLAGS_mmap_log) { // log it + RAW_LOG(INFO, "sbrk(inc=%" PRIdS ") = 0x%" PRIxPTR "", + increment, (uintptr_t) result); +#ifdef TODO_REENABLE_STACK_TRACING + DumpStackTrace(1, RawInfoStackDumper, NULL); +#endif + } +} + +//---------------------------------------------------------------------- +// Starting/stopping/dumping +//---------------------------------------------------------------------- + +extern "C" void HeapProfilerStart(const char* prefix) { + SpinLockHolder l(&heap_lock); + + if (is_on) return; + + is_on = true; + + RAW_VLOG(0, "Starting tracking the heap"); + + // This should be done before the hooks are set up, since it should + // call new, and we want that to be accounted for correctly. + MallocExtension::Initialize(); + + if (FLAGS_only_mmap_profile) { + FLAGS_mmap_profile = true; + } + + if (FLAGS_mmap_profile) { + // Ask MemoryRegionMap to record all mmap, mremap, and sbrk + // call stack traces of at least size kMaxStackDepth: + MemoryRegionMap::Init(HeapProfileTable::kMaxStackDepth, + /* use_buckets */ true); + } + + if (FLAGS_mmap_log) { + // Install our hooks to do the logging: + RAW_CHECK(MallocHook::AddMmapHook(&MmapHook), ""); + RAW_CHECK(MallocHook::AddMremapHook(&MremapHook), ""); + RAW_CHECK(MallocHook::AddMunmapHook(&MunmapHook), ""); + RAW_CHECK(MallocHook::AddSbrkHook(&SbrkHook), ""); + } + + heap_profiler_memory = + LowLevelAlloc::NewArena(0, LowLevelAlloc::DefaultArena()); + + // Reserve space now for the heap profiler, so we can still write a + // heap profile even if the application runs out of memory. + global_profiler_buffer = + reinterpret_cast<char*>(ProfilerMalloc(kProfileBufferSize)); + + heap_profile = new(ProfilerMalloc(sizeof(HeapProfileTable))) + HeapProfileTable(ProfilerMalloc, ProfilerFree, FLAGS_mmap_profile); + + last_dump_alloc = 0; + last_dump_free = 0; + high_water_mark = 0; + last_dump_time = 0; + + // We do not reset dump_count so if the user does a sequence of + // HeapProfilerStart/HeapProfileStop, we will get a continuous + // sequence of profiles. + + if (FLAGS_only_mmap_profile == false) { + // Now set the hooks that capture new/delete and malloc/free. + RAW_CHECK(MallocHook::AddNewHook(&NewHook), ""); + RAW_CHECK(MallocHook::AddDeleteHook(&DeleteHook), ""); + } + + // Copy filename prefix + RAW_DCHECK(filename_prefix == NULL, ""); + const int prefix_length = strlen(prefix); + filename_prefix = reinterpret_cast<char*>(ProfilerMalloc(prefix_length + 1)); + memcpy(filename_prefix, prefix, prefix_length); + filename_prefix[prefix_length] = '\0'; +} + +extern "C" int IsHeapProfilerRunning() { + SpinLockHolder l(&heap_lock); + return is_on ? 1 : 0; // return an int, because C code doesn't have bool +} + +extern "C" void HeapProfilerStop() { + SpinLockHolder l(&heap_lock); + + if (!is_on) return; + + if (FLAGS_only_mmap_profile == false) { + // Unset our new/delete hooks, checking they were set: + RAW_CHECK(MallocHook::RemoveNewHook(&NewHook), ""); + RAW_CHECK(MallocHook::RemoveDeleteHook(&DeleteHook), ""); + } + if (FLAGS_mmap_log) { + // Restore mmap/sbrk hooks, checking that our hooks were set: + RAW_CHECK(MallocHook::RemoveMmapHook(&MmapHook), ""); + RAW_CHECK(MallocHook::RemoveMremapHook(&MremapHook), ""); + RAW_CHECK(MallocHook::RemoveSbrkHook(&SbrkHook), ""); + RAW_CHECK(MallocHook::RemoveMunmapHook(&MunmapHook), ""); + } + + // free profile + heap_profile->~HeapProfileTable(); + ProfilerFree(heap_profile); + heap_profile = NULL; + + // free output-buffer memory + ProfilerFree(global_profiler_buffer); + + // free prefix + ProfilerFree(filename_prefix); + filename_prefix = NULL; + + if (!LowLevelAlloc::DeleteArena(heap_profiler_memory)) { + RAW_LOG(FATAL, "Memory leak in HeapProfiler:"); + } + + if (FLAGS_mmap_profile) { + MemoryRegionMap::Shutdown(); + } + + is_on = false; +} + +extern "C" void HeapProfilerDump(const char *reason) { + SpinLockHolder l(&heap_lock); + if (is_on && !dumping) { + DumpProfileLocked(reason); + } +} + +// Signal handler that is registered when a user selectable signal +// number is defined in the environment variable HEAPPROFILESIGNAL. +static void HeapProfilerDumpSignal(int signal_number) { + (void)signal_number; + if (!heap_lock.TryLock()) { + return; + } + if (is_on && !dumping) { + DumpProfileLocked("signal"); + } + heap_lock.Unlock(); +} + + +//---------------------------------------------------------------------- +// Initialization/finalization code +//---------------------------------------------------------------------- + +// Initialization code +static void HeapProfilerInit() { + // Everything after this point is for setting up the profiler based on envvar + char fname[PATH_MAX]; + if (!GetUniquePathFromEnv("HEAPPROFILE", fname)) { + return; + } + // We do a uid check so we don't write out files in a setuid executable. +#ifdef HAVE_GETEUID + if (getuid() != geteuid()) { + RAW_LOG(WARNING, ("HeapProfiler: ignoring HEAPPROFILE because " + "program seems to be setuid\n")); + return; + } +#endif + + char *signal_number_str = getenv("HEAPPROFILESIGNAL"); + if (signal_number_str != NULL) { + long int signal_number = strtol(signal_number_str, NULL, 10); + intptr_t old_signal_handler = reinterpret_cast<intptr_t>(signal(signal_number, HeapProfilerDumpSignal)); + if (old_signal_handler == reinterpret_cast<intptr_t>(SIG_ERR)) { + RAW_LOG(FATAL, "Failed to set signal. Perhaps signal number %s is invalid\n", signal_number_str); + } else if (old_signal_handler == 0) { + RAW_LOG(INFO,"Using signal %d as heap profiling switch", signal_number); + } else { + RAW_LOG(FATAL, "Signal %d already in use\n", signal_number); + } + } + + HeapProfileTable::CleanupOldProfiles(fname); + + HeapProfilerStart(fname); +} + +// class used for finalization -- dumps the heap-profile at program exit +struct HeapProfileEndWriter { + ~HeapProfileEndWriter() { + char buf[128]; + if (heap_profile) { + const HeapProfileTable::Stats& total = heap_profile->total(); + const int64 inuse_bytes = total.alloc_size - total.free_size; + + if ((inuse_bytes >> 20) > 0) { + snprintf(buf, sizeof(buf), ("Exiting, %" PRId64 " MB in use"), + inuse_bytes >> 20); + } else if ((inuse_bytes >> 10) > 0) { + snprintf(buf, sizeof(buf), ("Exiting, %" PRId64 " kB in use"), + inuse_bytes >> 10); + } else { + snprintf(buf, sizeof(buf), ("Exiting, %" PRId64 " bytes in use"), + inuse_bytes); + } + } else { + snprintf(buf, sizeof(buf), ("Exiting")); + } + HeapProfilerDump(buf); + } +}; + +// We want to make sure tcmalloc is up and running before starting the profiler +static const TCMallocGuard tcmalloc_initializer; +REGISTER_MODULE_INITIALIZER(heapprofiler, HeapProfilerInit()); +static HeapProfileEndWriter heap_profile_end_writer; diff --git a/src/third_party/gperftools-2.5/src/internal_logging.cc b/src/third_party/gperftools-2.5/src/internal_logging.cc new file mode 100644 index 00000000000..708fa650974 --- /dev/null +++ b/src/third_party/gperftools-2.5/src/internal_logging.cc @@ -0,0 +1,192 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Sanjay Ghemawat <opensource@google.com> + +#include <config.h> +#include "internal_logging.h" +#include <stdarg.h> // for va_end, va_start +#include <stdio.h> // for vsnprintf, va_list, etc +#include <stdlib.h> // for abort +#include <string.h> // for strlen, memcpy +#ifdef HAVE_UNISTD_H +#include <unistd.h> // for write() +#endif + +#include <gperftools/malloc_extension.h> +#include "base/logging.h" // for perftools_vsnprintf +#include "base/spinlock.h" // for SpinLockHolder, SpinLock + +// Variables for storing crash output. Allocated statically since we +// may not be able to heap-allocate while crashing. +static SpinLock crash_lock(base::LINKER_INITIALIZED); +static bool crashed = false; +static const int kStatsBufferSize = 16 << 10; +static char stats_buffer[kStatsBufferSize] = { 0 }; + +namespace tcmalloc { + +static void WriteMessage(const char* msg, int length) { + write(STDERR_FILENO, msg, length); +} + +void (*log_message_writer)(const char* msg, int length) = WriteMessage; + + +class Logger { + public: + bool Add(const LogItem& item); + bool AddStr(const char* str, int n); + bool AddNum(uint64_t num, int base); // base must be 10 or 16. + + static const int kBufSize = 200; + char* p_; + char* end_; + char buf_[kBufSize]; +}; + +void Log(LogMode mode, const char* filename, int line, + LogItem a, LogItem b, LogItem c, LogItem d) { + Logger state; + state.p_ = state.buf_; + state.end_ = state.buf_ + sizeof(state.buf_); + state.AddStr(filename, strlen(filename)) + && state.AddStr(":", 1) + && state.AddNum(line, 10) + && state.AddStr("]", 1) + && state.Add(a) + && state.Add(b) + && state.Add(c) + && state.Add(d); + + // Teminate with newline + if (state.p_ >= state.end_) { + state.p_ = state.end_ - 1; + } + *state.p_ = '\n'; + state.p_++; + + int msglen = state.p_ - state.buf_; + if (mode == kLog) { + (*log_message_writer)(state.buf_, msglen); + return; + } + + bool first_crash = false; + { + SpinLockHolder l(&crash_lock); + if (!crashed) { + crashed = true; + first_crash = true; + } + } + + (*log_message_writer)(state.buf_, msglen); + if (first_crash && mode == kCrashWithStats) { + MallocExtension::instance()->GetStats(stats_buffer, kStatsBufferSize); + (*log_message_writer)(stats_buffer, strlen(stats_buffer)); + } + + abort(); +} + +bool Logger::Add(const LogItem& item) { + // Separate items with spaces + if (p_ < end_) { + *p_ = ' '; + p_++; + } + + switch (item.tag_) { + case LogItem::kStr: + return AddStr(item.u_.str, strlen(item.u_.str)); + case LogItem::kUnsigned: + return AddNum(item.u_.unum, 10); + case LogItem::kSigned: + if (item.u_.snum < 0) { + // The cast to uint64_t is intentionally before the negation + // so that we do not attempt to negate -2^63. + return AddStr("-", 1) + && AddNum(- static_cast<uint64_t>(item.u_.snum), 10); + } else { + return AddNum(static_cast<uint64_t>(item.u_.snum), 10); + } + case LogItem::kPtr: + return AddStr("0x", 2) + && AddNum(reinterpret_cast<uintptr_t>(item.u_.ptr), 16); + default: + return false; + } +} + +bool Logger::AddStr(const char* str, int n) { + if (end_ - p_ < n) { + return false; + } else { + memcpy(p_, str, n); + p_ += n; + return true; + } +} + +bool Logger::AddNum(uint64_t num, int base) { + static const char kDigits[] = "0123456789abcdef"; + char space[22]; // more than enough for 2^64 in smallest supported base (10) + char* end = space + sizeof(space); + char* pos = end; + do { + pos--; + *pos = kDigits[num % base]; + num /= base; + } while (num > 0 && pos > space); + return AddStr(pos, end - pos); +} + +} // end tcmalloc namespace + +void TCMalloc_Printer::printf(const char* format, ...) { + if (left_ > 0) { + va_list ap; + va_start(ap, format); + const int r = perftools_vsnprintf(buf_, left_, format, ap); + va_end(ap); + if (r < 0) { + // Perhaps an old glibc that returns -1 on truncation? + left_ = 0; + } else if (r > left_) { + // Truncation + left_ = 0; + } else { + left_ -= r; + buf_ += r; + } + } +} diff --git a/src/third_party/gperftools-2.5/src/internal_logging.h b/src/third_party/gperftools-2.5/src/internal_logging.h new file mode 100644 index 00000000000..0c300c3e20d --- /dev/null +++ b/src/third_party/gperftools-2.5/src/internal_logging.h @@ -0,0 +1,144 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Sanjay Ghemawat <opensource@google.com> +// +// Internal logging and related utility routines. + +#ifndef TCMALLOC_INTERNAL_LOGGING_H_ +#define TCMALLOC_INTERNAL_LOGGING_H_ + +#include <config.h> +#include <stddef.h> // for size_t +#if defined HAVE_STDINT_H +#include <stdint.h> +#elif defined HAVE_INTTYPES_H +#include <inttypes.h> +#else +#include <sys/types.h> +#endif + +//------------------------------------------------------------------- +// Utility routines +//------------------------------------------------------------------- + +// Safe logging helper: we write directly to the stderr file +// descriptor and avoid FILE buffering because that may invoke +// malloc(). +// +// Example: +// Log(kLog, __FILE__, __LINE__, "error", bytes); + +namespace tcmalloc { +enum LogMode { + kLog, // Just print the message + kCrash, // Print the message and crash + kCrashWithStats // Print the message, some stats, and crash +}; + +class Logger; + +// A LogItem holds any of the argument types that can be passed to Log() +class LogItem { + public: + LogItem() : tag_(kEnd) { } + LogItem(const char* v) : tag_(kStr) { u_.str = v; } + LogItem(int v) : tag_(kSigned) { u_.snum = v; } + LogItem(long v) : tag_(kSigned) { u_.snum = v; } + LogItem(long long v) : tag_(kSigned) { u_.snum = v; } + LogItem(unsigned int v) : tag_(kUnsigned) { u_.unum = v; } + LogItem(unsigned long v) : tag_(kUnsigned) { u_.unum = v; } + LogItem(unsigned long long v) : tag_(kUnsigned) { u_.unum = v; } + LogItem(const void* v) : tag_(kPtr) { u_.ptr = v; } + private: + friend class Logger; + enum Tag { + kStr, + kSigned, + kUnsigned, + kPtr, + kEnd + }; + Tag tag_; + union { + const char* str; + const void* ptr; + int64_t snum; + uint64_t unum; + } u_; +}; + +extern PERFTOOLS_DLL_DECL void Log(LogMode mode, const char* filename, int line, + LogItem a, LogItem b = LogItem(), + LogItem c = LogItem(), LogItem d = LogItem()); + +// Tests can override this function to collect logging messages. +extern PERFTOOLS_DLL_DECL void (*log_message_writer)(const char* msg, int length); + +} // end tcmalloc namespace + +// Like assert(), but executed even in NDEBUG mode +#undef CHECK_CONDITION +#define CHECK_CONDITION(cond) \ +do { \ + if (!(cond)) { \ + ::tcmalloc::Log(::tcmalloc::kCrash, __FILE__, __LINE__, #cond); \ + } \ +} while (0) + +// Our own version of assert() so we can avoid hanging by trying to do +// all kinds of goofy printing while holding the malloc lock. +#ifndef NDEBUG +#define ASSERT(cond) CHECK_CONDITION(cond) +#else +#define ASSERT(cond) ((void) 0) +#endif + +// Print into buffer +class TCMalloc_Printer { + private: + char* buf_; // Where should we write next + int left_; // Space left in buffer (including space for \0) + + public: + // REQUIRES: "length > 0" + TCMalloc_Printer(char* buf, int length) : buf_(buf), left_(length) { + buf[0] = '\0'; + } + + void printf(const char* format, ...) +#ifdef HAVE___ATTRIBUTE__ + __attribute__ ((__format__ (__printf__, 2, 3))) +#endif +; +}; + +#endif // TCMALLOC_INTERNAL_LOGGING_H_ diff --git a/src/third_party/gperftools-2.5/src/libc_override.h b/src/third_party/gperftools-2.5/src/libc_override.h new file mode 100644 index 00000000000..c01a97ca614 --- /dev/null +++ b/src/third_party/gperftools-2.5/src/libc_override.h @@ -0,0 +1,91 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2011, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Craig Silverstein <opensource@google.com> +// +// This .h file imports the code that causes tcmalloc to override libc +// versions of malloc/free/new/delete/etc. That is, it provides the +// logic that makes it so calls to malloc(10) go through tcmalloc, +// rather than the default (libc) malloc. +// +// This file also provides a method: ReplaceSystemAlloc(), that every +// libc_override_*.h file it #includes is required to provide. This +// is called when first setting up tcmalloc -- that is, when a global +// constructor in tcmalloc.cc is executed -- to do any initialization +// work that may be required for this OS. (Note we cannot entirely +// control when tcmalloc is initialized, and the system may do some +// mallocs and frees before this routine is called.) It may be a +// noop. +// +// Every libc has its own way of doing this, and sometimes the compiler +// matters too, so we have a different file for each libc, and often +// for different compilers and OS's. + +#ifndef TCMALLOC_LIBC_OVERRIDE_INL_H_ +#define TCMALLOC_LIBC_OVERRIDE_INL_H_ + +#include <config.h> +#ifdef HAVE_FEATURES_H +#include <features.h> // for __GLIBC__ +#endif +#include <gperftools/tcmalloc.h> + +static void ReplaceSystemAlloc(); // defined in the .h files below + +// For windows, there are two ways to get tcmalloc. If we're +// patching, then src/windows/patch_function.cc will do the necessary +// overriding here. Otherwise, we doing the 'redefine' trick, where +// we remove malloc/new/etc from mscvcrt.dll, and just need to define +// them now. +#if defined(_WIN32) && defined(WIN32_DO_PATCHING) +void PatchWindowsFunctions(); // in src/windows/patch_function.cc +static void ReplaceSystemAlloc() { PatchWindowsFunctions(); } + +#elif defined(_WIN32) && !defined(WIN32_DO_PATCHING) +#include "libc_override_redefine.h" + +#elif defined(__APPLE__) +#include "libc_override_osx.h" + +#elif defined(__GLIBC__) +#include "libc_override_glibc.h" + +// Not all gcc systems necessarily support weak symbols, but all the +// ones I know of do, so for now just assume they all do. +#elif defined(__GNUC__) +#include "libc_override_gcc_and_weak.h" + +#else +#error Need to add support for your libc/OS here + +#endif + +#endif // TCMALLOC_LIBC_OVERRIDE_INL_H_ diff --git a/src/third_party/gperftools-2.5/src/libc_override_gcc_and_weak.h b/src/third_party/gperftools-2.5/src/libc_override_gcc_and_weak.h new file mode 100644 index 00000000000..ecb66ec5711 --- /dev/null +++ b/src/third_party/gperftools-2.5/src/libc_override_gcc_and_weak.h @@ -0,0 +1,172 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2011, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Craig Silverstein <opensource@google.com> +// +// Used to override malloc routines on systems that define the +// memory allocation routines to be weak symbols in their libc +// (almost all unix-based systems are like this), on gcc, which +// suppports the 'alias' attribute. + +#ifndef TCMALLOC_LIBC_OVERRIDE_GCC_AND_WEAK_INL_H_ +#define TCMALLOC_LIBC_OVERRIDE_GCC_AND_WEAK_INL_H_ + +#ifdef HAVE_SYS_CDEFS_H +#include <sys/cdefs.h> // for __THROW +#endif +#include <gperftools/tcmalloc.h> + +#include "getenv_safe.h" // TCMallocGetenvSafe +#include "base/commandlineflags.h" + +#ifndef __THROW // I guess we're not on a glibc-like system +# define __THROW // __THROW is just an optimization, so ok to make it "" +#endif + +#ifndef __GNUC__ +# error libc_override_gcc_and_weak.h is for gcc distributions only. +#endif + +#define ALIAS(tc_fn) __attribute__ ((alias (#tc_fn), used)) + +void* operator new(size_t size) throw (std::bad_alloc) + ALIAS(tc_new); +void operator delete(void* p) throw() + ALIAS(tc_delete); +void* operator new[](size_t size) throw (std::bad_alloc) + ALIAS(tc_newarray); +void operator delete[](void* p) throw() + ALIAS(tc_deletearray); +void* operator new(size_t size, const std::nothrow_t& nt) throw() + ALIAS(tc_new_nothrow); +void* operator new[](size_t size, const std::nothrow_t& nt) throw() + ALIAS(tc_newarray_nothrow); +void operator delete(void* p, const std::nothrow_t& nt) throw() + ALIAS(tc_delete_nothrow); +void operator delete[](void* p, const std::nothrow_t& nt) throw() + ALIAS(tc_deletearray_nothrow); + +#if defined(ENABLE_SIZED_DELETE) + +void operator delete(void *p, size_t size) throw() + ALIAS(tc_delete_sized); +void operator delete[](void *p, size_t size) throw() + ALIAS(tc_deletearray_sized); + +#elif defined(ENABLE_DYNAMIC_SIZED_DELETE) && \ + (__GNUC__ * 100 + __GNUC_MINOR__) >= 405 + +static void delegate_sized_delete(void *p, size_t s) throw() { + (operator delete)(p); +} + +static void delegate_sized_deletearray(void *p, size_t s) throw() { + (operator delete[])(p); +} + +extern "C" __attribute__((weak)) +int tcmalloc_sized_delete_enabled(void); + +static bool sized_delete_enabled(void) { + if (tcmalloc_sized_delete_enabled != 0) { + return !!tcmalloc_sized_delete_enabled(); + } + + const char *flag = TCMallocGetenvSafe("TCMALLOC_ENABLE_SIZED_DELETE"); + return tcmalloc::commandlineflags::StringToBool(flag, false); +} + +extern "C" { + +static void *resolve_delete_sized(void) { + if (sized_delete_enabled()) { + return reinterpret_cast<void *>(tc_delete_sized); + } + return reinterpret_cast<void *>(delegate_sized_delete); +} + +static void *resolve_deletearray_sized(void) { + if (sized_delete_enabled()) { + return reinterpret_cast<void *>(tc_deletearray_sized); + } + return reinterpret_cast<void *>(delegate_sized_deletearray); +} + +} + +void operator delete(void *p, size_t size) throw() + __attribute__((ifunc("resolve_delete_sized"))); +void operator delete[](void *p, size_t size) throw() + __attribute__((ifunc("resolve_deletearray_sized"))); + +#else /* !ENABLE_SIZED_DELETE && !ENABLE_DYN_SIZED_DELETE */ + +void operator delete(void *p, size_t size) throw() + ALIAS(tc_delete); +void operator delete[](void *p, size_t size) throw() + ALIAS(tc_deletearray); + +#endif /* !ENABLE_SIZED_DELETE && !ENABLE_DYN_SIZED_DELETE */ + +extern "C" { + void* malloc(size_t size) __THROW ALIAS(tc_malloc); + void free(void* ptr) __THROW ALIAS(tc_free); + void* realloc(void* ptr, size_t size) __THROW ALIAS(tc_realloc); + void* calloc(size_t n, size_t size) __THROW ALIAS(tc_calloc); + void cfree(void* ptr) __THROW ALIAS(tc_cfree); + void* memalign(size_t align, size_t s) __THROW ALIAS(tc_memalign); + void* valloc(size_t size) __THROW ALIAS(tc_valloc); + void* pvalloc(size_t size) __THROW ALIAS(tc_pvalloc); + int posix_memalign(void** r, size_t a, size_t s) __THROW + ALIAS(tc_posix_memalign); +#ifndef __UCLIBC__ + void malloc_stats(void) __THROW ALIAS(tc_malloc_stats); +#endif + int mallopt(int cmd, int value) __THROW ALIAS(tc_mallopt); +#ifdef HAVE_STRUCT_MALLINFO + struct mallinfo mallinfo(void) __THROW ALIAS(tc_mallinfo); +#endif + size_t malloc_size(void* p) __THROW ALIAS(tc_malloc_size); +#if defined(__ANDROID__) + size_t malloc_usable_size(const void* p) __THROW + ALIAS(tc_malloc_size); +#else + size_t malloc_usable_size(void* p) __THROW ALIAS(tc_malloc_size); +#endif +} // extern "C" + +#undef ALIAS + +// No need to do anything at tcmalloc-registration time: we do it all +// via overriding weak symbols (at link time). +static void ReplaceSystemAlloc() { } + +#endif // TCMALLOC_LIBC_OVERRIDE_GCC_AND_WEAK_INL_H_ diff --git a/src/third_party/gperftools-2.5/src/libc_override_glibc.h b/src/third_party/gperftools-2.5/src/libc_override_glibc.h new file mode 100644 index 00000000000..014aff038bd --- /dev/null +++ b/src/third_party/gperftools-2.5/src/libc_override_glibc.h @@ -0,0 +1,146 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2011, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Craig Silverstein <opensource@google.com> +// +// Used to override malloc routines on systems that are using glibc. + +#ifndef TCMALLOC_LIBC_OVERRIDE_GLIBC_INL_H_ +#define TCMALLOC_LIBC_OVERRIDE_GLIBC_INL_H_ + +#include <config.h> +#include <features.h> // for __GLIBC__ +#include <gperftools/tcmalloc.h> + +#ifndef __GLIBC__ +# error libc_override_glibc.h is for glibc distributions only. +#endif + +// In glibc, the memory-allocation methods are weak symbols, so we can +// just override them with our own. If we're using gcc, we can use +// __attribute__((alias)) to do the overriding easily (exception: +// Mach-O, which doesn't support aliases). Otherwise we have to use a +// function call. +#if !defined(__GNUC__) || defined(__MACH__) + +// This also defines ReplaceSystemAlloc(). +# include "libc_override_redefine.h" // defines functions malloc()/etc + +#else // #if !defined(__GNUC__) || defined(__MACH__) + +// If we get here, we're a gcc system, so do all the overriding we do +// with gcc. This does the overriding of all the 'normal' memory +// allocation. This also defines ReplaceSystemAlloc(). +# include "libc_override_gcc_and_weak.h" + +// We also have to do some glibc-specific overriding. Some library +// routines on RedHat 9 allocate memory using malloc() and free it +// using __libc_free() (or vice-versa). Since we provide our own +// implementations of malloc/free, we need to make sure that the +// __libc_XXX variants (defined as part of glibc) also point to the +// same implementations. Since it only matters for redhat, we +// do it inside the gcc #ifdef, since redhat uses gcc. +// TODO(csilvers): only do this if we detect we're an old enough glibc? + +#define ALIAS(tc_fn) __attribute__ ((alias (#tc_fn))) +extern "C" { + void* __libc_malloc(size_t size) ALIAS(tc_malloc); + void __libc_free(void* ptr) ALIAS(tc_free); + void* __libc_realloc(void* ptr, size_t size) ALIAS(tc_realloc); + void* __libc_calloc(size_t n, size_t size) ALIAS(tc_calloc); + void __libc_cfree(void* ptr) ALIAS(tc_cfree); + void* __libc_memalign(size_t align, size_t s) ALIAS(tc_memalign); + void* __libc_valloc(size_t size) ALIAS(tc_valloc); + void* __libc_pvalloc(size_t size) ALIAS(tc_pvalloc); + int __posix_memalign(void** r, size_t a, size_t s) ALIAS(tc_posix_memalign); +} // extern "C" +#undef ALIAS + +#endif // #if defined(__GNUC__) && !defined(__MACH__) + + +// We also have to hook libc malloc. While our work with weak symbols +// should make sure libc malloc is never called in most situations, it +// can be worked around by shared libraries with the DEEPBIND +// environment variable set. The below hooks libc to call our malloc +// routines even in that situation. In other situations, this hook +// should never be called. +extern "C" { +static void* glibc_override_malloc(size_t size, const void *caller) { + return tc_malloc(size); +} +static void* glibc_override_realloc(void *ptr, size_t size, + const void *caller) { + return tc_realloc(ptr, size); +} +static void glibc_override_free(void *ptr, const void *caller) { + tc_free(ptr); +} +static void* glibc_override_memalign(size_t align, size_t size, + const void *caller) { + return tc_memalign(align, size); +} + +// We should be using __malloc_initialize_hook here, like the #if 0 +// code below. (See http://swoolley.org/man.cgi/3/malloc_hook.) +// However, this causes weird linker errors with programs that link +// with -static, so instead we just assign the vars directly at +// static-constructor time. That should serve the same effect of +// making sure the hooks are set before the first malloc call the +// program makes. +#if 0 +#include <malloc.h> // for __malloc_hook, etc. +void glibc_override_malloc_init_hook(void) { + __malloc_hook = glibc_override_malloc; + __realloc_hook = glibc_override_realloc; + __free_hook = glibc_override_free; + __memalign_hook = glibc_override_memalign; +} + +void (* MALLOC_HOOK_MAYBE_VOLATILE __malloc_initialize_hook)(void) + = &glibc_override_malloc_init_hook; +#endif + +void* (* MALLOC_HOOK_MAYBE_VOLATILE __malloc_hook)(size_t, const void*) + = &glibc_override_malloc; +void* (* MALLOC_HOOK_MAYBE_VOLATILE __realloc_hook)(void*, size_t, const void*) + = &glibc_override_realloc; +void (* MALLOC_HOOK_MAYBE_VOLATILE __free_hook)(void*, const void*) + = &glibc_override_free; +void* (* MALLOC_HOOK_MAYBE_VOLATILE __memalign_hook)(size_t,size_t, const void*) + = &glibc_override_memalign; + +} // extern "C" + +// No need to write ReplaceSystemAlloc(); one of the #includes above +// did it for us. + +#endif // TCMALLOC_LIBC_OVERRIDE_GLIBC_INL_H_ diff --git a/src/third_party/gperftools-2.5/src/libc_override_osx.h b/src/third_party/gperftools-2.5/src/libc_override_osx.h new file mode 100644 index 00000000000..b801f22a452 --- /dev/null +++ b/src/third_party/gperftools-2.5/src/libc_override_osx.h @@ -0,0 +1,281 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2011, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Craig Silverstein <opensource@google.com> +// +// Used to override malloc routines on OS X systems. We use the +// malloc-zone functionality built into OS X to register our malloc +// routine. +// +// 1) We used to use the normal 'override weak libc malloc/etc' +// technique for OS X. This is not optimal because mach does not +// support the 'alias' attribute, so we had to have forwarding +// functions. It also does not work very well with OS X shared +// libraries (dylibs) -- in general, the shared libs don't use +// tcmalloc unless run with the DYLD_FORCE_FLAT_NAMESPACE envvar. +// +// 2) Another approach would be to use an interposition array: +// static const interpose_t interposers[] __attribute__((section("__DATA, __interpose"))) = { +// { (void *)tc_malloc, (void *)malloc }, +// { (void *)tc_free, (void *)free }, +// }; +// This requires the user to set the DYLD_INSERT_LIBRARIES envvar, so +// is not much better. +// +// 3) Registering a new malloc zone avoids all these issues: +// http://www.opensource.apple.com/source/Libc/Libc-583/include/malloc/malloc.h +// http://www.opensource.apple.com/source/Libc/Libc-583/gen/malloc.c +// If we make tcmalloc the default malloc zone (undocumented but +// possible) then all new allocs use it, even those in shared +// libraries. Allocs done before tcmalloc was installed, or in libs +// that aren't using tcmalloc for some reason, will correctly go +// through the malloc-zone interface when free-ing, and will pick up +// the libc free rather than tcmalloc free. So it should "never" +// cause a crash (famous last words). +// +// 4) The routines one must define for one's own malloc have changed +// between OS X versions. This requires some hoops on our part, but +// is only really annoying when it comes to posix_memalign. The right +// behavior there depends on what OS version tcmalloc was compiled on, +// but also what OS version the program is running on. For now, we +// punt and don't implement our own posix_memalign. Apps that really +// care can use tc_posix_memalign directly. + +#ifndef TCMALLOC_LIBC_OVERRIDE_OSX_INL_H_ +#define TCMALLOC_LIBC_OVERRIDE_OSX_INL_H_ + +#include <config.h> +#ifdef HAVE_FEATURES_H +#include <features.h> +#endif +#include <gperftools/tcmalloc.h> + +#if !defined(__APPLE__) +# error libc_override_glibc-osx.h is for OS X distributions only. +#endif + +#include <AvailabilityMacros.h> +#include <malloc/malloc.h> + +namespace tcmalloc { + void CentralCacheLockAll(); + void CentralCacheUnlockAll(); +} + +// from AvailabilityMacros.h +#if defined(MAC_OS_X_VERSION_10_6) && \ + MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_6 +extern "C" { + // This function is only available on 10.6 (and later) but the + // LibSystem headers do not use AvailabilityMacros.h to handle weak + // importing automatically. This prototype is a copy of the one in + // <malloc/malloc.h> with the WEAK_IMPORT_ATTRBIUTE added. + extern malloc_zone_t *malloc_default_purgeable_zone(void) + WEAK_IMPORT_ATTRIBUTE; +} +#endif + +// We need to provide wrappers around all the libc functions. +namespace { +size_t mz_size(malloc_zone_t* zone, const void* ptr) { + if (MallocExtension::instance()->GetOwnership(ptr) != MallocExtension::kOwned) + return 0; // malloc_zone semantics: return 0 if we don't own the memory + + // TODO(csilvers): change this method to take a const void*, one day. + return MallocExtension::instance()->GetAllocatedSize(const_cast<void*>(ptr)); +} + +void* mz_malloc(malloc_zone_t* zone, size_t size) { + return tc_malloc(size); +} + +void* mz_calloc(malloc_zone_t* zone, size_t num_items, size_t size) { + return tc_calloc(num_items, size); +} + +void* mz_valloc(malloc_zone_t* zone, size_t size) { + return tc_valloc(size); +} + +void mz_free(malloc_zone_t* zone, void* ptr) { + return tc_free(ptr); +} + +void* mz_realloc(malloc_zone_t* zone, void* ptr, size_t size) { + return tc_realloc(ptr, size); +} + +void* mz_memalign(malloc_zone_t* zone, size_t align, size_t size) { + return tc_memalign(align, size); +} + +void mz_destroy(malloc_zone_t* zone) { + // A no-op -- we will not be destroyed! +} + +// malloc_introspection callbacks. I'm not clear on what all of these do. +kern_return_t mi_enumerator(task_t task, void *, + unsigned type_mask, vm_address_t zone_address, + memory_reader_t reader, + vm_range_recorder_t recorder) { + // Should enumerate all the pointers we have. Seems like a lot of work. + return KERN_FAILURE; +} + +size_t mi_good_size(malloc_zone_t *zone, size_t size) { + // I think it's always safe to return size, but we maybe could do better. + return size; +} + +boolean_t mi_check(malloc_zone_t *zone) { + return MallocExtension::instance()->VerifyAllMemory(); +} + +void mi_print(malloc_zone_t *zone, boolean_t verbose) { + int bufsize = 8192; + if (verbose) + bufsize = 102400; // I picked this size arbitrarily + char* buffer = new char[bufsize]; + MallocExtension::instance()->GetStats(buffer, bufsize); + fprintf(stdout, "%s", buffer); + delete[] buffer; +} + +void mi_log(malloc_zone_t *zone, void *address) { + // I don't think we support anything like this +} + +void mi_force_lock(malloc_zone_t *zone) { + tcmalloc::CentralCacheLockAll(); +} + +void mi_force_unlock(malloc_zone_t *zone) { + tcmalloc::CentralCacheUnlockAll(); +} + +void mi_statistics(malloc_zone_t *zone, malloc_statistics_t *stats) { + // TODO(csilvers): figure out how to fill these out + stats->blocks_in_use = 0; + stats->size_in_use = 0; + stats->max_size_in_use = 0; + stats->size_allocated = 0; +} + +boolean_t mi_zone_locked(malloc_zone_t *zone) { + return false; // Hopefully unneeded by us! +} + +} // unnamed namespace + +// OS X doesn't have pvalloc, cfree, malloc_statc, etc, so we can just +// define our own. :-) OS X supplies posix_memalign in some versions +// but not others, either strongly or weakly linked, in a way that's +// difficult enough to code to correctly, that I just don't try to +// support either memalign() or posix_memalign(). If you need them +// and are willing to code to tcmalloc, you can use tc_posix_memalign(). +extern "C" { + void cfree(void* p) { tc_cfree(p); } + void* pvalloc(size_t s) { return tc_pvalloc(s); } + void malloc_stats(void) { tc_malloc_stats(); } + int mallopt(int cmd, int v) { return tc_mallopt(cmd, v); } + // No struct mallinfo on OS X, so don't define mallinfo(). + // An alias for malloc_size(), which OS X defines. + size_t malloc_usable_size(void* p) { return tc_malloc_size(p); } +} // extern "C" + +static void ReplaceSystemAlloc() { + static malloc_introspection_t tcmalloc_introspection; + memset(&tcmalloc_introspection, 0, sizeof(tcmalloc_introspection)); + + tcmalloc_introspection.enumerator = &mi_enumerator; + tcmalloc_introspection.good_size = &mi_good_size; + tcmalloc_introspection.check = &mi_check; + tcmalloc_introspection.print = &mi_print; + tcmalloc_introspection.log = &mi_log; + tcmalloc_introspection.force_lock = &mi_force_lock; + tcmalloc_introspection.force_unlock = &mi_force_unlock; + + static malloc_zone_t tcmalloc_zone; + memset(&tcmalloc_zone, 0, sizeof(malloc_zone_t)); + + // Start with a version 4 zone which is used for OS X 10.4 and 10.5. + tcmalloc_zone.version = 4; + tcmalloc_zone.zone_name = "tcmalloc"; + tcmalloc_zone.size = &mz_size; + tcmalloc_zone.malloc = &mz_malloc; + tcmalloc_zone.calloc = &mz_calloc; + tcmalloc_zone.valloc = &mz_valloc; + tcmalloc_zone.free = &mz_free; + tcmalloc_zone.realloc = &mz_realloc; + tcmalloc_zone.destroy = &mz_destroy; + tcmalloc_zone.batch_malloc = NULL; + tcmalloc_zone.batch_free = NULL; + tcmalloc_zone.introspect = &tcmalloc_introspection; + + // from AvailabilityMacros.h +#if defined(MAC_OS_X_VERSION_10_6) && \ + MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_6 + // Switch to version 6 on OSX 10.6 to support memalign. + tcmalloc_zone.version = 6; + tcmalloc_zone.free_definite_size = NULL; + tcmalloc_zone.memalign = &mz_memalign; + tcmalloc_introspection.zone_locked = &mi_zone_locked; + + // Request the default purgable zone to force its creation. The + // current default zone is registered with the purgable zone for + // doing tiny and small allocs. Sadly, it assumes that the default + // zone is the szone implementation from OS X and will crash if it + // isn't. By creating the zone now, this will be true and changing + // the default zone won't cause a problem. This only needs to + // happen when actually running on OS X 10.6 and higher (note the + // ifdef above only checks if we were *compiled* with 10.6 or + // higher; at runtime we have to check if this symbol is defined.) + if (malloc_default_purgeable_zone) { + malloc_default_purgeable_zone(); + } +#endif + + // Register the tcmalloc zone. At this point, it will not be the + // default zone. + malloc_zone_register(&tcmalloc_zone); + + // Unregister and reregister the default zone. Unregistering swaps + // the specified zone with the last one registered which for the + // default zone makes the more recently registered zone the default + // zone. The default zone is then re-registered to ensure that + // allocations made from it earlier will be handled correctly. + // Things are not guaranteed to work that way, but it's how they work now. + malloc_zone_t *default_zone = malloc_default_zone(); + malloc_zone_unregister(default_zone); + malloc_zone_register(default_zone); +} + +#endif // TCMALLOC_LIBC_OVERRIDE_OSX_INL_H_ diff --git a/src/third_party/gperftools-2.5/src/libc_override_redefine.h b/src/third_party/gperftools-2.5/src/libc_override_redefine.h new file mode 100644 index 00000000000..72679ef38b8 --- /dev/null +++ b/src/third_party/gperftools-2.5/src/libc_override_redefine.h @@ -0,0 +1,92 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2011, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Craig Silverstein <opensource@google.com> +// +// Used on systems that don't have their own definition of +// malloc/new/etc. (Typically this will be a windows msvcrt.dll that +// has been edited to remove the definitions.) We can just define our +// own as normal functions. +// +// This should also work on systems were all the malloc routines are +// defined as weak symbols, and there's no support for aliasing. + +#ifndef TCMALLOC_LIBC_OVERRIDE_REDEFINE_H_ +#define TCMALLOC_LIBC_OVERRIDE_REDEFINE_H_ + +void* operator new(size_t size) { return tc_new(size); } +void operator delete(void* p) throw() { tc_delete(p); } +void* operator new[](size_t size) { return tc_newarray(size); } +void operator delete[](void* p) throw() { tc_deletearray(p); } +void* operator new(size_t size, const std::nothrow_t& nt) throw() { + return tc_new_nothrow(size, nt); +} +void* operator new[](size_t size, const std::nothrow_t& nt) throw() { + return tc_newarray_nothrow(size, nt); +} +void operator delete(void* ptr, const std::nothrow_t& nt) throw() { + return tc_delete_nothrow(ptr, nt); +} +void operator delete[](void* ptr, const std::nothrow_t& nt) throw() { + return tc_deletearray_nothrow(ptr, nt); +} + +#ifdef ENABLE_SIZED_DELETE +void operator delete(void* p, size_t s) throw() { tc_delete_sized(p, s); } +void operator delete[](void* p, size_t s) throw(){ tc_deletearray_sized(p); } +#endif + +extern "C" { + void* malloc(size_t s) { return tc_malloc(s); } + void free(void* p) { tc_free(p); } + void* realloc(void* p, size_t s) { return tc_realloc(p, s); } + void* calloc(size_t n, size_t s) { return tc_calloc(n, s); } + void cfree(void* p) { tc_cfree(p); } + void* memalign(size_t a, size_t s) { return tc_memalign(a, s); } + void* valloc(size_t s) { return tc_valloc(s); } + void* pvalloc(size_t s) { return tc_pvalloc(s); } + int posix_memalign(void** r, size_t a, size_t s) { + return tc_posix_memalign(r, a, s); + } + void malloc_stats(void) { tc_malloc_stats(); } + int mallopt(int cmd, int v) { return tc_mallopt(cmd, v); } +#ifdef HAVE_STRUCT_MALLINFO + struct mallinfo mallinfo(void) { return tc_mallinfo(); } +#endif + size_t malloc_size(void* p) { return tc_malloc_size(p); } + size_t malloc_usable_size(void* p) { return tc_malloc_size(p); } +} // extern "C" + +// No need to do anything at tcmalloc-registration time: we do it all +// via overriding weak symbols (at link time). +static void ReplaceSystemAlloc() { } + +#endif // TCMALLOC_LIBC_OVERRIDE_REDEFINE_H_ diff --git a/src/third_party/gperftools-2.5/src/linked_list.h b/src/third_party/gperftools-2.5/src/linked_list.h new file mode 100644 index 00000000000..66a07410760 --- /dev/null +++ b/src/third_party/gperftools-2.5/src/linked_list.h @@ -0,0 +1,103 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Sanjay Ghemawat <opensource@google.com> +// +// Some very basic linked list functions for dealing with using void * as +// storage. + +#ifndef TCMALLOC_LINKED_LIST_H_ +#define TCMALLOC_LINKED_LIST_H_ + +#include <stddef.h> + +namespace tcmalloc { + +inline void *SLL_Next(void *t) { + return *(reinterpret_cast<void**>(t)); +} + +inline void SLL_SetNext(void *t, void *n) { + *(reinterpret_cast<void**>(t)) = n; +} + +inline void SLL_Push(void **list, void *element) { + SLL_SetNext(element, *list); + *list = element; +} + +inline void *SLL_Pop(void **list) { + void *result = *list; + *list = SLL_Next(*list); + return result; +} + +// Remove N elements from a linked list to which head points. head will be +// modified to point to the new head. start and end will point to the first +// and last nodes of the range. Note that end will point to NULL after this +// function is called. +inline void SLL_PopRange(void **head, int N, void **start, void **end) { + if (N == 0) { + *start = NULL; + *end = NULL; + return; + } + + void *tmp = *head; + for (int i = 1; i < N; ++i) { + tmp = SLL_Next(tmp); + } + + *start = *head; + *end = tmp; + *head = SLL_Next(tmp); + // Unlink range from list. + SLL_SetNext(tmp, NULL); +} + +inline void SLL_PushRange(void **head, void *start, void *end) { + if (!start) return; + SLL_SetNext(end, *head); + *head = start; +} + +inline size_t SLL_Size(void *head) { + int count = 0; + while (head) { + count++; + head = SLL_Next(head); + } + return count; +} + +} // namespace tcmalloc + +#endif // TCMALLOC_LINKED_LIST_H_ diff --git a/src/third_party/gperftools-2.5/src/malloc_extension.cc b/src/third_party/gperftools-2.5/src/malloc_extension.cc new file mode 100644 index 00000000000..6e695523be1 --- /dev/null +++ b/src/third_party/gperftools-2.5/src/malloc_extension.cc @@ -0,0 +1,388 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Sanjay Ghemawat <opensource@google.com> + +#include <config.h> +#include <assert.h> +#include <string.h> +#include <stdio.h> +#if defined HAVE_STDINT_H +#include <stdint.h> +#elif defined HAVE_INTTYPES_H +#include <inttypes.h> +#else +#include <sys/types.h> +#endif +#include <string> +#include "base/dynamic_annotations.h" +#include "base/sysinfo.h" // for FillProcSelfMaps +#ifndef NO_HEAP_CHECK +#include "gperftools/heap-checker.h" +#endif +#include "gperftools/malloc_extension.h" +#include "gperftools/malloc_extension_c.h" +#include "maybe_threads.h" +#include "base/googleinit.h" + +using STL_NAMESPACE::string; +using STL_NAMESPACE::vector; + +static void DumpAddressMap(string* result) { + *result += "\nMAPPED_LIBRARIES:\n"; + // We keep doubling until we get a fit + const size_t old_resultlen = result->size(); + for (int amap_size = 10240; amap_size < 10000000; amap_size *= 2) { + result->resize(old_resultlen + amap_size); + bool wrote_all = false; + const int bytes_written = + tcmalloc::FillProcSelfMaps(&((*result)[old_resultlen]), amap_size, + &wrote_all); + if (wrote_all) { // we fit! + (*result)[old_resultlen + bytes_written] = '\0'; + result->resize(old_resultlen + bytes_written); + return; + } + } + result->reserve(old_resultlen); // just don't print anything +} + +// Note: this routine is meant to be called before threads are spawned. +void MallocExtension::Initialize() { + static bool initialize_called = false; + + if (initialize_called) return; + initialize_called = true; + +#ifdef __GLIBC__ + // GNU libc++ versions 3.3 and 3.4 obey the environment variables + // GLIBCPP_FORCE_NEW and GLIBCXX_FORCE_NEW respectively. Setting + // one of these variables forces the STL default allocator to call + // new() or delete() for each allocation or deletion. Otherwise + // the STL allocator tries to avoid the high cost of doing + // allocations by pooling memory internally. However, tcmalloc + // does allocations really fast, especially for the types of small + // items one sees in STL, so it's better off just using us. + // TODO: control whether we do this via an environment variable? + setenv("GLIBCPP_FORCE_NEW", "1", false /* no overwrite*/); + setenv("GLIBCXX_FORCE_NEW", "1", false /* no overwrite*/); + + // Now we need to make the setenv 'stick', which it may not do since + // the env is flakey before main() is called. But luckily stl only + // looks at this env var the first time it tries to do an alloc, and + // caches what it finds. So we just cause an stl alloc here. + string dummy("I need to be allocated"); + dummy += "!"; // so the definition of dummy isn't optimized out +#endif /* __GLIBC__ */ +} + +// SysAllocator implementation +SysAllocator::~SysAllocator() {} + +// Default implementation -- does nothing +MallocExtension::~MallocExtension() { } +bool MallocExtension::VerifyAllMemory() { return true; } +bool MallocExtension::VerifyNewMemory(const void* p) { return true; } +bool MallocExtension::VerifyArrayNewMemory(const void* p) { return true; } +bool MallocExtension::VerifyMallocMemory(const void* p) { return true; } + +bool MallocExtension::GetNumericProperty(const char* property, size_t* value) { + return false; +} + +bool MallocExtension::SetNumericProperty(const char* property, size_t value) { + return false; +} + +void MallocExtension::GetStats(char* buffer, int length) { + assert(length > 0); + buffer[0] = '\0'; +} + +bool MallocExtension::MallocMemoryStats(int* blocks, size_t* total, + int histogram[kMallocHistogramSize]) { + *blocks = 0; + *total = 0; + memset(histogram, 0, sizeof(*histogram) * kMallocHistogramSize); + return true; +} + +void** MallocExtension::ReadStackTraces(int* sample_period) { + return NULL; +} + +void** MallocExtension::ReadHeapGrowthStackTraces() { + return NULL; +} + +void MallocExtension::MarkThreadIdle() { + // Default implementation does nothing +} + +void MallocExtension::MarkThreadBusy() { + // Default implementation does nothing +} + +SysAllocator* MallocExtension::GetSystemAllocator() { + return NULL; +} + +void MallocExtension::SetSystemAllocator(SysAllocator *a) { + // Default implementation does nothing +} + +void MallocExtension::ReleaseToSystem(size_t num_bytes) { + // Default implementation does nothing +} + +void MallocExtension::ReleaseFreeMemory() { + ReleaseToSystem(static_cast<size_t>(-1)); // SIZE_T_MAX +} + +void MallocExtension::SetMemoryReleaseRate(double rate) { + // Default implementation does nothing +} + +double MallocExtension::GetMemoryReleaseRate() { + return -1.0; +} + +size_t MallocExtension::GetEstimatedAllocatedSize(size_t size) { + return size; +} + +size_t MallocExtension::GetAllocatedSize(const void* p) { + assert(GetOwnership(p) != kNotOwned); + return 0; +} + +MallocExtension::Ownership MallocExtension::GetOwnership(const void* p) { + return kUnknownOwnership; +} + +void MallocExtension::GetFreeListSizes( + vector<MallocExtension::FreeListInfo>* v) { + v->clear(); +} + +size_t MallocExtension::GetThreadCacheSize() { + return 0; +} + +void MallocExtension::MarkThreadTemporarilyIdle() { + // Default implementation does nothing +} + +// The current malloc extension object. + +static MallocExtension* current_instance; + +static void InitModule() { + if (current_instance != NULL) { + return; + } + current_instance = new MallocExtension; +#ifndef NO_HEAP_CHECK + HeapLeakChecker::IgnoreObject(current_instance); +#endif +} + +REGISTER_MODULE_INITIALIZER(malloc_extension_init, InitModule()) + +MallocExtension* MallocExtension::instance() { + InitModule(); + return current_instance; +} + +void MallocExtension::Register(MallocExtension* implementation) { + InitModule(); + // When running under valgrind, our custom malloc is replaced with + // valgrind's one and malloc extensions will not work. (Note: + // callers should be responsible for checking that they are the + // malloc that is really being run, before calling Register. This + // is just here as an extra sanity check.) + if (!RunningOnValgrind()) { + current_instance = implementation; + } +} + +// ----------------------------------------------------------------------- +// Heap sampling support +// ----------------------------------------------------------------------- + +namespace { + +// Accessors +uintptr_t Count(void** entry) { + return reinterpret_cast<uintptr_t>(entry[0]); +} +uintptr_t Size(void** entry) { + return reinterpret_cast<uintptr_t>(entry[1]); +} +uintptr_t Depth(void** entry) { + return reinterpret_cast<uintptr_t>(entry[2]); +} +void* PC(void** entry, int i) { + return entry[3+i]; +} + +void PrintCountAndSize(MallocExtensionWriter* writer, + uintptr_t count, uintptr_t size) { + char buf[100]; + snprintf(buf, sizeof(buf), + "%6" PRIu64 ": %8" PRIu64 " [%6" PRIu64 ": %8" PRIu64 "] @", + static_cast<uint64>(count), + static_cast<uint64>(size), + static_cast<uint64>(count), + static_cast<uint64>(size)); + writer->append(buf, strlen(buf)); +} + +void PrintHeader(MallocExtensionWriter* writer, + const char* label, void** entries) { + // Compute the total count and total size + uintptr_t total_count = 0; + uintptr_t total_size = 0; + for (void** entry = entries; Count(entry) != 0; entry += 3 + Depth(entry)) { + total_count += Count(entry); + total_size += Size(entry); + } + + const char* const kTitle = "heap profile: "; + writer->append(kTitle, strlen(kTitle)); + PrintCountAndSize(writer, total_count, total_size); + writer->append(" ", 1); + writer->append(label, strlen(label)); + writer->append("\n", 1); +} + +void PrintStackEntry(MallocExtensionWriter* writer, void** entry) { + PrintCountAndSize(writer, Count(entry), Size(entry)); + + for (int i = 0; i < Depth(entry); i++) { + char buf[32]; + snprintf(buf, sizeof(buf), " %p", PC(entry, i)); + writer->append(buf, strlen(buf)); + } + writer->append("\n", 1); +} + +} + +void MallocExtension::GetHeapSample(MallocExtensionWriter* writer) { + int sample_period = 0; + void** entries = ReadStackTraces(&sample_period); + if (entries == NULL) { + const char* const kErrorMsg = + "This malloc implementation does not support sampling.\n" + "As of 2005/01/26, only tcmalloc supports sampling, and\n" + "you are probably running a binary that does not use\n" + "tcmalloc.\n"; + writer->append(kErrorMsg, strlen(kErrorMsg)); + return; + } + + char label[32]; + sprintf(label, "heap_v2/%d", sample_period); + PrintHeader(writer, label, entries); + for (void** entry = entries; Count(entry) != 0; entry += 3 + Depth(entry)) { + PrintStackEntry(writer, entry); + } + delete[] entries; + + DumpAddressMap(writer); +} + +void MallocExtension::GetHeapGrowthStacks(MallocExtensionWriter* writer) { + void** entries = ReadHeapGrowthStackTraces(); + if (entries == NULL) { + const char* const kErrorMsg = + "This malloc implementation does not support " + "ReadHeapGrowthStackTraces().\n" + "As of 2005/09/27, only tcmalloc supports this, and you\n" + "are probably running a binary that does not use tcmalloc.\n"; + writer->append(kErrorMsg, strlen(kErrorMsg)); + return; + } + + // Do not canonicalize the stack entries, so that we get a + // time-ordered list of stack traces, which may be useful if the + // client wants to focus on the latest stack traces. + PrintHeader(writer, "growth", entries); + for (void** entry = entries; Count(entry) != 0; entry += 3 + Depth(entry)) { + PrintStackEntry(writer, entry); + } + delete[] entries; + + DumpAddressMap(writer); +} + +void MallocExtension::Ranges(void* arg, RangeFunction func) { + // No callbacks by default +} + +// These are C shims that work on the current instance. + +#define C_SHIM(fn, retval, paramlist, arglist) \ + extern "C" PERFTOOLS_DLL_DECL retval MallocExtension_##fn paramlist { \ + return MallocExtension::instance()->fn arglist; \ + } + +C_SHIM(VerifyAllMemory, int, (void), ()); +C_SHIM(VerifyNewMemory, int, (const void* p), (p)); +C_SHIM(VerifyArrayNewMemory, int, (const void* p), (p)); +C_SHIM(VerifyMallocMemory, int, (const void* p), (p)); +C_SHIM(MallocMemoryStats, int, + (int* blocks, size_t* total, int histogram[kMallocHistogramSize]), + (blocks, total, histogram)); + +C_SHIM(GetStats, void, + (char* buffer, int buffer_length), (buffer, buffer_length)); +C_SHIM(GetNumericProperty, int, + (const char* property, size_t* value), (property, value)); +C_SHIM(SetNumericProperty, int, + (const char* property, size_t value), (property, value)); + +C_SHIM(MarkThreadIdle, void, (void), ()); +C_SHIM(MarkThreadBusy, void, (void), ()); +C_SHIM(ReleaseFreeMemory, void, (void), ()); +C_SHIM(ReleaseToSystem, void, (size_t num_bytes), (num_bytes)); +C_SHIM(GetEstimatedAllocatedSize, size_t, (size_t size), (size)); +C_SHIM(GetAllocatedSize, size_t, (const void* p), (p)); +C_SHIM(GetThreadCacheSize, size_t, (void), ()); +C_SHIM(MarkThreadTemporarilyIdle, void, (void), ()); + +// Can't use the shim here because of the need to translate the enums. +extern "C" +MallocExtension_Ownership MallocExtension_GetOwnership(const void* p) { + return static_cast<MallocExtension_Ownership>( + MallocExtension::instance()->GetOwnership(p)); +} diff --git a/src/third_party/gperftools-2.5/src/malloc_hook-inl.h b/src/third_party/gperftools-2.5/src/malloc_hook-inl.h new file mode 100644 index 00000000000..dbf4d46ed47 --- /dev/null +++ b/src/third_party/gperftools-2.5/src/malloc_hook-inl.h @@ -0,0 +1,249 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Sanjay Ghemawat +// +// This has the implementation details of malloc_hook that are needed +// to use malloc-hook inside the tcmalloc system. It does not hold +// any of the client-facing calls that are used to add new hooks. + +#ifndef _MALLOC_HOOK_INL_H_ +#define _MALLOC_HOOK_INL_H_ + +#include <stddef.h> +#include <sys/types.h> +#include "base/atomicops.h" +#include "base/basictypes.h" +#include <gperftools/malloc_hook.h> + +#include "common.h" // for UNLIKELY + +namespace base { namespace internal { + +// Capacity of 8 means that HookList is 9 words. +static const int kHookListCapacity = 8; +// last entry is reserved for deprecated "singular" hooks. So we have +// 7 "normal" hooks per list +static const int kHookListMaxValues = 7; +static const int kHookListSingularIdx = 7; + +// HookList: a class that provides synchronized insertions and removals and +// lockless traversal. Most of the implementation is in malloc_hook.cc. +template <typename T> +struct PERFTOOLS_DLL_DECL HookList { + COMPILE_ASSERT(sizeof(T) <= sizeof(AtomicWord), T_should_fit_in_AtomicWord); + + // Adds value to the list. Note that duplicates are allowed. Thread-safe and + // blocking (acquires hooklist_spinlock). Returns true on success; false + // otherwise (failures include invalid value and no space left). + bool Add(T value); + + void FixupPrivEndLocked(); + + // Removes the first entry matching value from the list. Thread-safe and + // blocking (acquires hooklist_spinlock). Returns true on success; false + // otherwise (failures include invalid value and no value found). + bool Remove(T value); + + // Store up to n values of the list in output_array, and return the number of + // elements stored. Thread-safe and non-blocking. This is fast (one memory + // access) if the list is empty. + int Traverse(T* output_array, int n) const; + + // Fast inline implementation for fast path of Invoke*Hook. + bool empty() const { + return base::subtle::NoBarrier_Load(&priv_end) == 0; + } + + // Used purely to handle deprecated singular hooks + T GetSingular() const { + const AtomicWord *place = &priv_data[kHookListSingularIdx]; + return bit_cast<T>(base::subtle::NoBarrier_Load(place)); + } + + T ExchangeSingular(T new_val); + + // This internal data is not private so that the class is an aggregate and can + // be initialized by the linker. Don't access this directly. Use the + // INIT_HOOK_LIST macro in malloc_hook.cc. + + // One more than the index of the last valid element in priv_data. During + // 'Remove' this may be past the last valid element in priv_data, but + // subsequent values will be 0. + // + // Index kHookListCapacity-1 is reserved as 'deprecated' single hook pointer + AtomicWord priv_end; + AtomicWord priv_data[kHookListCapacity]; +}; + +ATTRIBUTE_VISIBILITY_HIDDEN extern HookList<MallocHook::NewHook> new_hooks_; +ATTRIBUTE_VISIBILITY_HIDDEN extern HookList<MallocHook::DeleteHook> delete_hooks_; +ATTRIBUTE_VISIBILITY_HIDDEN extern HookList<MallocHook::PreMmapHook> premmap_hooks_; +ATTRIBUTE_VISIBILITY_HIDDEN extern HookList<MallocHook::MmapHook> mmap_hooks_; +ATTRIBUTE_VISIBILITY_HIDDEN extern HookList<MallocHook::MmapReplacement> mmap_replacement_; +ATTRIBUTE_VISIBILITY_HIDDEN extern HookList<MallocHook::MunmapHook> munmap_hooks_; +ATTRIBUTE_VISIBILITY_HIDDEN extern HookList<MallocHook::MunmapReplacement> munmap_replacement_; +ATTRIBUTE_VISIBILITY_HIDDEN extern HookList<MallocHook::MremapHook> mremap_hooks_; +ATTRIBUTE_VISIBILITY_HIDDEN extern HookList<MallocHook::PreSbrkHook> presbrk_hooks_; +ATTRIBUTE_VISIBILITY_HIDDEN extern HookList<MallocHook::SbrkHook> sbrk_hooks_; + +} } // namespace base::internal + +// The following method is DEPRECATED +inline MallocHook::NewHook MallocHook::GetNewHook() { + return base::internal::new_hooks_.GetSingular(); +} + +inline void MallocHook::InvokeNewHook(const void* p, size_t s) { + if (UNLIKELY(!base::internal::new_hooks_.empty())) { + InvokeNewHookSlow(p, s); + } +} + +// The following method is DEPRECATED +inline MallocHook::DeleteHook MallocHook::GetDeleteHook() { + return base::internal::delete_hooks_.GetSingular(); +} + +inline void MallocHook::InvokeDeleteHook(const void* p) { + if (UNLIKELY(!base::internal::delete_hooks_.empty())) { + InvokeDeleteHookSlow(p); + } +} + +// The following method is DEPRECATED +inline MallocHook::PreMmapHook MallocHook::GetPreMmapHook() { + return base::internal::premmap_hooks_.GetSingular(); +} + +inline void MallocHook::InvokePreMmapHook(const void* start, + size_t size, + int protection, + int flags, + int fd, + off_t offset) { + if (!base::internal::premmap_hooks_.empty()) { + InvokePreMmapHookSlow(start, size, protection, flags, fd, offset); + } +} + +// The following method is DEPRECATED +inline MallocHook::MmapHook MallocHook::GetMmapHook() { + return base::internal::mmap_hooks_.GetSingular(); +} + +inline void MallocHook::InvokeMmapHook(const void* result, + const void* start, + size_t size, + int protection, + int flags, + int fd, + off_t offset) { + if (!base::internal::mmap_hooks_.empty()) { + InvokeMmapHookSlow(result, start, size, protection, flags, fd, offset); + } +} + +inline bool MallocHook::InvokeMmapReplacement(const void* start, + size_t size, + int protection, + int flags, + int fd, + off_t offset, + void** result) { + if (!base::internal::mmap_replacement_.empty()) { + return InvokeMmapReplacementSlow(start, size, + protection, flags, + fd, offset, + result); + } + return false; +} + +// The following method is DEPRECATED +inline MallocHook::MunmapHook MallocHook::GetMunmapHook() { + return base::internal::munmap_hooks_.GetSingular(); +} + +inline void MallocHook::InvokeMunmapHook(const void* p, size_t size) { + if (!base::internal::munmap_hooks_.empty()) { + InvokeMunmapHookSlow(p, size); + } +} + +inline bool MallocHook::InvokeMunmapReplacement( + const void* p, size_t size, int* result) { + if (!base::internal::mmap_replacement_.empty()) { + return InvokeMunmapReplacementSlow(p, size, result); + } + return false; +} + +// The following method is DEPRECATED +inline MallocHook::MremapHook MallocHook::GetMremapHook() { + return base::internal::mremap_hooks_.GetSingular(); +} + +inline void MallocHook::InvokeMremapHook(const void* result, + const void* old_addr, + size_t old_size, + size_t new_size, + int flags, + const void* new_addr) { + if (!base::internal::mremap_hooks_.empty()) { + InvokeMremapHookSlow(result, old_addr, old_size, new_size, flags, new_addr); + } +} + +// The following method is DEPRECATED +inline MallocHook::PreSbrkHook MallocHook::GetPreSbrkHook() { + return base::internal::presbrk_hooks_.GetSingular(); +} + +inline void MallocHook::InvokePreSbrkHook(ptrdiff_t increment) { + if (!base::internal::presbrk_hooks_.empty() && increment != 0) { + InvokePreSbrkHookSlow(increment); + } +} + +// The following method is DEPRECATED +inline MallocHook::SbrkHook MallocHook::GetSbrkHook() { + return base::internal::sbrk_hooks_.GetSingular(); +} + +inline void MallocHook::InvokeSbrkHook(const void* result, + ptrdiff_t increment) { + if (!base::internal::sbrk_hooks_.empty() && increment != 0) { + InvokeSbrkHookSlow(result, increment); + } +} + +#endif /* _MALLOC_HOOK_INL_H_ */ diff --git a/src/third_party/gperftools-2.5/src/malloc_hook.cc b/src/third_party/gperftools-2.5/src/malloc_hook.cc new file mode 100644 index 00000000000..57b516d7234 --- /dev/null +++ b/src/third_party/gperftools-2.5/src/malloc_hook.cc @@ -0,0 +1,703 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Sanjay Ghemawat <opensource@google.com> + +#include <config.h> + +// Disable the glibc prototype of mremap(), as older versions of the +// system headers define this function with only four arguments, +// whereas newer versions allow an optional fifth argument: +#ifdef HAVE_MMAP +# define mremap glibc_mremap +# include <sys/mman.h> +# undef mremap +#endif + +#include <stddef.h> +#ifdef HAVE_STDINT_H +#include <stdint.h> +#endif +#include <algorithm> +#include "base/logging.h" +#include "base/spinlock.h" +#include "maybe_emergency_malloc.h" +#include "maybe_threads.h" +#include "malloc_hook-inl.h" +#include <gperftools/malloc_hook.h> + +// This #ifdef should almost never be set. Set NO_TCMALLOC_SAMPLES if +// you're porting to a system where you really can't get a stacktrace. +#ifdef NO_TCMALLOC_SAMPLES + // We use #define so code compiles even if you #include stacktrace.h somehow. +# define GetStackTrace(stack, depth, skip) (0) +#else +# include <gperftools/stacktrace.h> +#endif + +// __THROW is defined in glibc systems. It means, counter-intuitively, +// "This function will never throw an exception." It's an optional +// optimization tool, but we may need to use it to match glibc prototypes. +#ifndef __THROW // I guess we're not on a glibc system +# define __THROW // __THROW is just an optimization, so ok to make it "" +#endif + +using std::copy; + + +// Declaration of default weak initialization function, that can be overridden +// by linking-in a strong definition (as heap-checker.cc does). This is +// extern "C" so that it doesn't trigger gold's --detect-odr-violations warning, +// which only looks at C++ symbols. +// +// This function is declared here as weak, and defined later, rather than a more +// straightforward simple weak definition, as a workround for an icc compiler +// issue ((Intel reference 290819). This issue causes icc to resolve weak +// symbols too early, at compile rather than link time. By declaring it (weak) +// here, then defining it below after its use, we can avoid the problem. +extern "C" { +ATTRIBUTE_WEAK void MallocHook_InitAtFirstAllocation_HeapLeakChecker(); +} + +namespace { + +void RemoveInitialHooksAndCallInitializers(); // below. + +pthread_once_t once = PTHREAD_ONCE_INIT; + +// These hooks are installed in MallocHook as the only initial hooks. The first +// hook that is called will run RemoveInitialHooksAndCallInitializers (see the +// definition below) and then redispatch to any malloc hooks installed by +// RemoveInitialHooksAndCallInitializers. +// +// Note(llib): there is a possibility of a race in the event that there are +// multiple threads running before the first allocation. This is pretty +// difficult to achieve, but if it is then multiple threads may concurrently do +// allocations. The first caller will call +// RemoveInitialHooksAndCallInitializers via one of the initial hooks. A +// concurrent allocation may, depending on timing either: +// * still have its initial malloc hook installed, run that and block on waiting +// for the first caller to finish its call to +// RemoveInitialHooksAndCallInitializers, and proceed normally. +// * occur some time during the RemoveInitialHooksAndCallInitializers call, at +// which point there could be no initial hooks and the subsequent hooks that +// are about to be set up by RemoveInitialHooksAndCallInitializers haven't +// been installed yet. I think the worst we can get is that some allocations +// will not get reported to some hooks set by the initializers called from +// RemoveInitialHooksAndCallInitializers. + +void InitialNewHook(const void* ptr, size_t size) { + perftools_pthread_once(&once, &RemoveInitialHooksAndCallInitializers); + MallocHook::InvokeNewHook(ptr, size); +} + +void InitialPreMMapHook(const void* start, + size_t size, + int protection, + int flags, + int fd, + off_t offset) { + perftools_pthread_once(&once, &RemoveInitialHooksAndCallInitializers); + MallocHook::InvokePreMmapHook(start, size, protection, flags, fd, offset); +} + +void InitialPreSbrkHook(ptrdiff_t increment) { + perftools_pthread_once(&once, &RemoveInitialHooksAndCallInitializers); + MallocHook::InvokePreSbrkHook(increment); +} + +// This function is called at most once by one of the above initial malloc +// hooks. It removes all initial hooks and initializes all other clients that +// want to get control at the very first memory allocation. The initializers +// may assume that the initial malloc hooks have been removed. The initializers +// may set up malloc hooks and allocate memory. +void RemoveInitialHooksAndCallInitializers() { + RAW_CHECK(MallocHook::RemoveNewHook(&InitialNewHook), ""); + RAW_CHECK(MallocHook::RemovePreMmapHook(&InitialPreMMapHook), ""); + RAW_CHECK(MallocHook::RemovePreSbrkHook(&InitialPreSbrkHook), ""); + + // HeapLeakChecker is currently the only module that needs to get control on + // the first memory allocation, but one can add other modules by following the + // same weak/strong function pattern. + MallocHook_InitAtFirstAllocation_HeapLeakChecker(); +} + +} // namespace + +// Weak default initialization function that must go after its use. +extern "C" void MallocHook_InitAtFirstAllocation_HeapLeakChecker() { + // Do nothing. +} + +namespace base { namespace internal { + +// This lock is shared between all implementations of HookList::Add & Remove. +// The potential for contention is very small. This needs to be a SpinLock and +// not a Mutex since it's possible for Mutex locking to allocate memory (e.g., +// per-thread allocation in debug builds), which could cause infinite recursion. +static SpinLock hooklist_spinlock(base::LINKER_INITIALIZED); + +template <typename T> +bool HookList<T>::Add(T value_as_t) { + AtomicWord value = bit_cast<AtomicWord>(value_as_t); + if (value == 0) { + return false; + } + SpinLockHolder l(&hooklist_spinlock); + // Find the first slot in data that is 0. + int index = 0; + while ((index < kHookListMaxValues) && + (base::subtle::NoBarrier_Load(&priv_data[index]) != 0)) { + ++index; + } + if (index == kHookListMaxValues) { + return false; + } + AtomicWord prev_num_hooks = base::subtle::Acquire_Load(&priv_end); + base::subtle::NoBarrier_Store(&priv_data[index], value); + if (prev_num_hooks <= index) { + base::subtle::NoBarrier_Store(&priv_end, index + 1); + } + return true; +} + +template <typename T> +void HookList<T>::FixupPrivEndLocked() { + AtomicWord hooks_end = base::subtle::NoBarrier_Load(&priv_end); + while ((hooks_end > 0) && + (base::subtle::NoBarrier_Load(&priv_data[hooks_end - 1]) == 0)) { + --hooks_end; + } + base::subtle::NoBarrier_Store(&priv_end, hooks_end); +} + +template <typename T> +bool HookList<T>::Remove(T value_as_t) { + if (value_as_t == 0) { + return false; + } + SpinLockHolder l(&hooklist_spinlock); + AtomicWord hooks_end = base::subtle::NoBarrier_Load(&priv_end); + int index = 0; + while (index < hooks_end && value_as_t != bit_cast<T>( + base::subtle::NoBarrier_Load(&priv_data[index]))) { + ++index; + } + if (index == hooks_end) { + return false; + } + base::subtle::NoBarrier_Store(&priv_data[index], 0); + FixupPrivEndLocked(); + return true; +} + +template <typename T> +int HookList<T>::Traverse(T* output_array, int n) const { + AtomicWord hooks_end = base::subtle::Acquire_Load(&priv_end); + int actual_hooks_end = 0; + for (int i = 0; i < hooks_end && n > 0; ++i) { + AtomicWord data = base::subtle::Acquire_Load(&priv_data[i]); + if (data != 0) { + *output_array++ = bit_cast<T>(data); + ++actual_hooks_end; + --n; + } + } + return actual_hooks_end; +} + +template <typename T> +T HookList<T>::ExchangeSingular(T value_as_t) { + AtomicWord value = bit_cast<AtomicWord>(value_as_t); + AtomicWord old_value; + SpinLockHolder l(&hooklist_spinlock); + old_value = base::subtle::NoBarrier_Load(&priv_data[kHookListSingularIdx]); + base::subtle::NoBarrier_Store(&priv_data[kHookListSingularIdx], value); + if (value != 0) { + base::subtle::NoBarrier_Store(&priv_end, kHookListSingularIdx + 1); + } else { + FixupPrivEndLocked(); + } + return bit_cast<T>(old_value); +} + +// Initialize a HookList (optionally with the given initial_value in index 0). +#define INIT_HOOK_LIST { 0 } +#define INIT_HOOK_LIST_WITH_VALUE(initial_value) \ + { 1, { reinterpret_cast<AtomicWord>(initial_value) } } + +// Explicit instantiation for malloc_hook_test.cc. This ensures all the methods +// are instantiated. +template struct HookList<MallocHook::NewHook>; + +HookList<MallocHook::NewHook> new_hooks_ = + INIT_HOOK_LIST_WITH_VALUE(&InitialNewHook); +HookList<MallocHook::DeleteHook> delete_hooks_ = INIT_HOOK_LIST; +HookList<MallocHook::PreMmapHook> premmap_hooks_ = + INIT_HOOK_LIST_WITH_VALUE(&InitialPreMMapHook); +HookList<MallocHook::MmapHook> mmap_hooks_ = INIT_HOOK_LIST; +HookList<MallocHook::MunmapHook> munmap_hooks_ = INIT_HOOK_LIST; +HookList<MallocHook::MremapHook> mremap_hooks_ = INIT_HOOK_LIST; +HookList<MallocHook::PreSbrkHook> presbrk_hooks_ = + INIT_HOOK_LIST_WITH_VALUE(InitialPreSbrkHook); +HookList<MallocHook::SbrkHook> sbrk_hooks_ = INIT_HOOK_LIST; + +// These lists contain either 0 or 1 hooks. +HookList<MallocHook::MmapReplacement> mmap_replacement_ = { 0 }; +HookList<MallocHook::MunmapReplacement> munmap_replacement_ = { 0 }; + +#undef INIT_HOOK_LIST_WITH_VALUE +#undef INIT_HOOK_LIST + +} } // namespace base::internal + +using base::internal::kHookListMaxValues; +using base::internal::new_hooks_; +using base::internal::delete_hooks_; +using base::internal::premmap_hooks_; +using base::internal::mmap_hooks_; +using base::internal::mmap_replacement_; +using base::internal::munmap_hooks_; +using base::internal::munmap_replacement_; +using base::internal::mremap_hooks_; +using base::internal::presbrk_hooks_; +using base::internal::sbrk_hooks_; + +// These are available as C bindings as well as C++, hence their +// definition outside the MallocHook class. +extern "C" +int MallocHook_AddNewHook(MallocHook_NewHook hook) { + RAW_VLOG(10, "AddNewHook(%p)", hook); + return new_hooks_.Add(hook); +} + +extern "C" +int MallocHook_RemoveNewHook(MallocHook_NewHook hook) { + RAW_VLOG(10, "RemoveNewHook(%p)", hook); + return new_hooks_.Remove(hook); +} + +extern "C" +int MallocHook_AddDeleteHook(MallocHook_DeleteHook hook) { + RAW_VLOG(10, "AddDeleteHook(%p)", hook); + return delete_hooks_.Add(hook); +} + +extern "C" +int MallocHook_RemoveDeleteHook(MallocHook_DeleteHook hook) { + RAW_VLOG(10, "RemoveDeleteHook(%p)", hook); + return delete_hooks_.Remove(hook); +} + +extern "C" +int MallocHook_AddPreMmapHook(MallocHook_PreMmapHook hook) { + RAW_VLOG(10, "AddPreMmapHook(%p)", hook); + return premmap_hooks_.Add(hook); +} + +extern "C" +int MallocHook_RemovePreMmapHook(MallocHook_PreMmapHook hook) { + RAW_VLOG(10, "RemovePreMmapHook(%p)", hook); + return premmap_hooks_.Remove(hook); +} + +extern "C" +int MallocHook_SetMmapReplacement(MallocHook_MmapReplacement hook) { + RAW_VLOG(10, "SetMmapReplacement(%p)", hook); + // NOTE this is a best effort CHECK. Concurrent sets could succeed since + // this test is outside of the Add spin lock. + RAW_CHECK(mmap_replacement_.empty(), "Only one MMapReplacement is allowed."); + return mmap_replacement_.Add(hook); +} + +extern "C" +int MallocHook_RemoveMmapReplacement(MallocHook_MmapReplacement hook) { + RAW_VLOG(10, "RemoveMmapReplacement(%p)", hook); + return mmap_replacement_.Remove(hook); +} + +extern "C" +int MallocHook_AddMmapHook(MallocHook_MmapHook hook) { + RAW_VLOG(10, "AddMmapHook(%p)", hook); + return mmap_hooks_.Add(hook); +} + +extern "C" +int MallocHook_RemoveMmapHook(MallocHook_MmapHook hook) { + RAW_VLOG(10, "RemoveMmapHook(%p)", hook); + return mmap_hooks_.Remove(hook); +} + +extern "C" +int MallocHook_AddMunmapHook(MallocHook_MunmapHook hook) { + RAW_VLOG(10, "AddMunmapHook(%p)", hook); + return munmap_hooks_.Add(hook); +} + +extern "C" +int MallocHook_RemoveMunmapHook(MallocHook_MunmapHook hook) { + RAW_VLOG(10, "RemoveMunmapHook(%p)", hook); + return munmap_hooks_.Remove(hook); +} + +extern "C" +int MallocHook_SetMunmapReplacement(MallocHook_MunmapReplacement hook) { + RAW_VLOG(10, "SetMunmapReplacement(%p)", hook); + // NOTE this is a best effort CHECK. Concurrent sets could succeed since + // this test is outside of the Add spin lock. + RAW_CHECK(munmap_replacement_.empty(), + "Only one MunmapReplacement is allowed."); + return munmap_replacement_.Add(hook); +} + +extern "C" +int MallocHook_RemoveMunmapReplacement(MallocHook_MunmapReplacement hook) { + RAW_VLOG(10, "RemoveMunmapReplacement(%p)", hook); + return munmap_replacement_.Remove(hook); +} + +extern "C" +int MallocHook_AddMremapHook(MallocHook_MremapHook hook) { + RAW_VLOG(10, "AddMremapHook(%p)", hook); + return mremap_hooks_.Add(hook); +} + +extern "C" +int MallocHook_RemoveMremapHook(MallocHook_MremapHook hook) { + RAW_VLOG(10, "RemoveMremapHook(%p)", hook); + return mremap_hooks_.Remove(hook); +} + +extern "C" +int MallocHook_AddPreSbrkHook(MallocHook_PreSbrkHook hook) { + RAW_VLOG(10, "AddPreSbrkHook(%p)", hook); + return presbrk_hooks_.Add(hook); +} + +extern "C" +int MallocHook_RemovePreSbrkHook(MallocHook_PreSbrkHook hook) { + RAW_VLOG(10, "RemovePreSbrkHook(%p)", hook); + return presbrk_hooks_.Remove(hook); +} + +extern "C" +int MallocHook_AddSbrkHook(MallocHook_SbrkHook hook) { + RAW_VLOG(10, "AddSbrkHook(%p)", hook); + return sbrk_hooks_.Add(hook); +} + +extern "C" +int MallocHook_RemoveSbrkHook(MallocHook_SbrkHook hook) { + RAW_VLOG(10, "RemoveSbrkHook(%p)", hook); + return sbrk_hooks_.Remove(hook); +} + +// The code below is DEPRECATED. +extern "C" +MallocHook_NewHook MallocHook_SetNewHook(MallocHook_NewHook hook) { + RAW_VLOG(10, "SetNewHook(%p)", hook); + return new_hooks_.ExchangeSingular(hook); +} + +extern "C" +MallocHook_DeleteHook MallocHook_SetDeleteHook(MallocHook_DeleteHook hook) { + RAW_VLOG(10, "SetDeleteHook(%p)", hook); + return delete_hooks_.ExchangeSingular(hook); +} + +extern "C" +MallocHook_PreMmapHook MallocHook_SetPreMmapHook(MallocHook_PreMmapHook hook) { + RAW_VLOG(10, "SetPreMmapHook(%p)", hook); + return premmap_hooks_.ExchangeSingular(hook); +} + +extern "C" +MallocHook_MmapHook MallocHook_SetMmapHook(MallocHook_MmapHook hook) { + RAW_VLOG(10, "SetMmapHook(%p)", hook); + return mmap_hooks_.ExchangeSingular(hook); +} + +extern "C" +MallocHook_MunmapHook MallocHook_SetMunmapHook(MallocHook_MunmapHook hook) { + RAW_VLOG(10, "SetMunmapHook(%p)", hook); + return munmap_hooks_.ExchangeSingular(hook); +} + +extern "C" +MallocHook_MremapHook MallocHook_SetMremapHook(MallocHook_MremapHook hook) { + RAW_VLOG(10, "SetMremapHook(%p)", hook); + return mremap_hooks_.ExchangeSingular(hook); +} + +extern "C" +MallocHook_PreSbrkHook MallocHook_SetPreSbrkHook(MallocHook_PreSbrkHook hook) { + RAW_VLOG(10, "SetPreSbrkHook(%p)", hook); + return presbrk_hooks_.ExchangeSingular(hook); +} + +extern "C" +MallocHook_SbrkHook MallocHook_SetSbrkHook(MallocHook_SbrkHook hook) { + RAW_VLOG(10, "SetSbrkHook(%p)", hook); + return sbrk_hooks_.ExchangeSingular(hook); +} +// End of DEPRECATED code section. + +// Note: embedding the function calls inside the traversal of HookList would be +// very confusing, as it is legal for a hook to remove itself and add other +// hooks. Doing traversal first, and then calling the hooks ensures we only +// call the hooks registered at the start. +#define INVOKE_HOOKS(HookType, hook_list, args) do { \ + HookType hooks[kHookListMaxValues]; \ + int num_hooks = hook_list.Traverse(hooks, kHookListMaxValues); \ + for (int i = 0; i < num_hooks; ++i) { \ + (*hooks[i])args; \ + } \ + } while (0) + +// There should only be one replacement. Return the result of the first +// one, or false if there is none. +#define INVOKE_REPLACEMENT(HookType, hook_list, args) do { \ + HookType hooks[kHookListMaxValues]; \ + int num_hooks = hook_list.Traverse(hooks, kHookListMaxValues); \ + return (num_hooks > 0 && (*hooks[0])args); \ + } while (0) + + +void MallocHook::InvokeNewHookSlow(const void* p, size_t s) { + if (tcmalloc::IsEmergencyPtr(p)) { + return; + } + INVOKE_HOOKS(NewHook, new_hooks_, (p, s)); +} + +void MallocHook::InvokeDeleteHookSlow(const void* p) { + if (tcmalloc::IsEmergencyPtr(p)) { + return; + } + INVOKE_HOOKS(DeleteHook, delete_hooks_, (p)); +} + +void MallocHook::InvokePreMmapHookSlow(const void* start, + size_t size, + int protection, + int flags, + int fd, + off_t offset) { + INVOKE_HOOKS(PreMmapHook, premmap_hooks_, (start, size, protection, flags, fd, + offset)); +} + +void MallocHook::InvokeMmapHookSlow(const void* result, + const void* start, + size_t size, + int protection, + int flags, + int fd, + off_t offset) { + INVOKE_HOOKS(MmapHook, mmap_hooks_, (result, start, size, protection, flags, + fd, offset)); +} + +bool MallocHook::InvokeMmapReplacementSlow(const void* start, + size_t size, + int protection, + int flags, + int fd, + off_t offset, + void** result) { + INVOKE_REPLACEMENT(MmapReplacement, mmap_replacement_, + (start, size, protection, flags, fd, offset, result)); +} + +void MallocHook::InvokeMunmapHookSlow(const void* p, size_t s) { + INVOKE_HOOKS(MunmapHook, munmap_hooks_, (p, s)); +} + +bool MallocHook::InvokeMunmapReplacementSlow(const void* p, + size_t s, + int* result) { + INVOKE_REPLACEMENT(MunmapReplacement, munmap_replacement_, (p, s, result)); +} + +void MallocHook::InvokeMremapHookSlow(const void* result, + const void* old_addr, + size_t old_size, + size_t new_size, + int flags, + const void* new_addr) { + INVOKE_HOOKS(MremapHook, mremap_hooks_, (result, old_addr, old_size, new_size, + flags, new_addr)); +} + +void MallocHook::InvokePreSbrkHookSlow(ptrdiff_t increment) { + INVOKE_HOOKS(PreSbrkHook, presbrk_hooks_, (increment)); +} + +void MallocHook::InvokeSbrkHookSlow(const void* result, ptrdiff_t increment) { + INVOKE_HOOKS(SbrkHook, sbrk_hooks_, (result, increment)); +} + +#undef INVOKE_HOOKS + +#ifndef NO_TCMALLOC_SAMPLES + +DEFINE_ATTRIBUTE_SECTION_VARS(google_malloc); +DECLARE_ATTRIBUTE_SECTION_VARS(google_malloc); + // actual functions are in debugallocation.cc or tcmalloc.cc +DEFINE_ATTRIBUTE_SECTION_VARS(malloc_hook); +DECLARE_ATTRIBUTE_SECTION_VARS(malloc_hook); + // actual functions are in this file, malloc_hook.cc, and low_level_alloc.cc + +#define ADDR_IN_ATTRIBUTE_SECTION(addr, name) \ + (reinterpret_cast<uintptr_t>(ATTRIBUTE_SECTION_START(name)) <= \ + reinterpret_cast<uintptr_t>(addr) && \ + reinterpret_cast<uintptr_t>(addr) < \ + reinterpret_cast<uintptr_t>(ATTRIBUTE_SECTION_STOP(name))) + +// Return true iff 'caller' is a return address within a function +// that calls one of our hooks via MallocHook:Invoke*. +// A helper for GetCallerStackTrace. +static inline bool InHookCaller(const void* caller) { + return ADDR_IN_ATTRIBUTE_SECTION(caller, google_malloc) || + ADDR_IN_ATTRIBUTE_SECTION(caller, malloc_hook); + // We can use one section for everything except tcmalloc_or_debug + // due to its special linkage mode, which prevents merging of the sections. +} + +#undef ADDR_IN_ATTRIBUTE_SECTION + +static bool checked_sections = false; + +static inline void CheckInHookCaller() { + if (!checked_sections) { + INIT_ATTRIBUTE_SECTION_VARS(google_malloc); + if (ATTRIBUTE_SECTION_START(google_malloc) == + ATTRIBUTE_SECTION_STOP(google_malloc)) { + RAW_LOG(ERROR, "google_malloc section is missing, " + "thus InHookCaller is broken!"); + } + INIT_ATTRIBUTE_SECTION_VARS(malloc_hook); + if (ATTRIBUTE_SECTION_START(malloc_hook) == + ATTRIBUTE_SECTION_STOP(malloc_hook)) { + RAW_LOG(ERROR, "malloc_hook section is missing, " + "thus InHookCaller is broken!"); + } + checked_sections = true; + } +} + +#endif // !NO_TCMALLOC_SAMPLES + +// We can improve behavior/compactness of this function +// if we pass a generic test function (with a generic arg) +// into the implementations for GetStackTrace instead of the skip_count. +extern "C" int MallocHook_GetCallerStackTrace(void** result, int max_depth, + int skip_count) { +#if defined(NO_TCMALLOC_SAMPLES) + return 0; +#elif !defined(HAVE_ATTRIBUTE_SECTION_START) + // Fall back to GetStackTrace and good old but fragile frame skip counts. + // Note: this path is inaccurate when a hook is not called directly by an + // allocation function but is daisy-chained through another hook, + // search for MallocHook::(Get|Set|Invoke)* to find such cases. + return GetStackTrace(result, max_depth, skip_count + int(DEBUG_MODE)); + // due to -foptimize-sibling-calls in opt mode + // there's no need for extra frame skip here then +#else + CheckInHookCaller(); + // MallocHook caller determination via InHookCaller works, use it: + static const int kMaxSkip = 32 + 6 + 3; + // Constant tuned to do just one GetStackTrace call below in practice + // and not get many frames that we don't actually need: + // currently max passsed max_depth is 32, + // max passed/needed skip_count is 6 + // and 3 is to account for some hook daisy chaining. + static const int kStackSize = kMaxSkip + 1; + void* stack[kStackSize]; + int depth = GetStackTrace(stack, kStackSize, 1); // skip this function frame + if (depth == 0) // silenty propagate cases when GetStackTrace does not work + return 0; + for (int i = 0; i < depth; ++i) { // stack[0] is our immediate caller + if (InHookCaller(stack[i])) { + RAW_VLOG(10, "Found hooked allocator at %d: %p <- %p", + i, stack[i], stack[i+1]); + i += 1; // skip hook caller frame + depth -= i; // correct depth + if (depth > max_depth) depth = max_depth; + copy(stack + i, stack + i + depth, result); + if (depth < max_depth && depth + i == kStackSize) { + // get frames for the missing depth + depth += + GetStackTrace(result + depth, max_depth - depth, 1 + kStackSize); + } + return depth; + } + } + RAW_LOG(WARNING, "Hooked allocator frame not found, returning empty trace"); + // If this happens try increasing kMaxSkip + // or else something must be wrong with InHookCaller, + // e.g. for every section used in InHookCaller + // all functions in that section must be inside the same library. + return 0; +#endif +} + +// On systems where we know how, we override mmap/munmap/mremap/sbrk +// to provide support for calling the related hooks (in addition, +// of course, to doing what these functions normally do). + +#if defined(__linux) +# include "malloc_hook_mmap_linux.h" + +#elif defined(__FreeBSD__) +# include "malloc_hook_mmap_freebsd.h" + +#else + +/*static*/void* MallocHook::UnhookedMMap(void *start, size_t length, int prot, + int flags, int fd, off_t offset) { + void* result; + if (!MallocHook::InvokeMmapReplacement( + start, length, prot, flags, fd, offset, &result)) { + result = mmap(start, length, prot, flags, fd, offset); + } + return result; +} + +/*static*/int MallocHook::UnhookedMUnmap(void *start, size_t length) { + int result; + if (!MallocHook::InvokeMunmapReplacement(start, length, &result)) { + result = munmap(start, length); + } + return result; +} + +#endif diff --git a/src/third_party/gperftools-2.5/src/malloc_hook_mmap_freebsd.h b/src/third_party/gperftools-2.5/src/malloc_hook_mmap_freebsd.h new file mode 100644 index 00000000000..8575dcc7c08 --- /dev/null +++ b/src/third_party/gperftools-2.5/src/malloc_hook_mmap_freebsd.h @@ -0,0 +1,135 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2011, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Override mmap/munmap/mremap/sbrk to provide support for calling the +// related hooks (in addition, of course, to doing what these +// functions normally do). + +#ifndef __FreeBSD__ +# error Should only be including malloc_hook_mmap_freebsd.h on FreeBSD systems. +#endif + +#include <unistd.h> +#include <sys/syscall.h> +#include <sys/mman.h> +#include <errno.h> +#include <dlfcn.h> + +// Make sure mmap doesn't get #define'd away by <sys/mman.h> +#undef mmap + +// According to the FreeBSD documentation, use syscall if you do not +// need 64-bit alignment otherwise use __syscall. Indeed, syscall +// doesn't work correctly in most situations on 64-bit. It's return +// type is 'int' so for things like SYS_mmap, it actually truncates +// the returned address to 32-bits. +#if defined(__amd64__) || defined(__x86_64__) +# define MALLOC_HOOK_SYSCALL __syscall +#else +# define MALLOC_HOOK_SYSCALL syscall +#endif + + +extern "C" { + void* mmap(void *start, size_t length,int prot, int flags, + int fd, off_t offset) __THROW + ATTRIBUTE_SECTION(malloc_hook); + int munmap(void* start, size_t length) __THROW + ATTRIBUTE_SECTION(malloc_hook); + void* sbrk(intptr_t increment) __THROW + ATTRIBUTE_SECTION(malloc_hook); +} + +static inline void* do_mmap(void *start, size_t length, + int prot, int flags, + int fd, off_t offset) __THROW { + return (void *)MALLOC_HOOK_SYSCALL(SYS_mmap, + start, length, prot, flags, fd, offset); +} + +static inline void* do_sbrk(intptr_t increment) { + static void *(*libc_sbrk)(intptr_t); + if (libc_sbrk == NULL) + libc_sbrk = (void *(*)(intptr_t))dlsym(RTLD_NEXT, "sbrk"); + + return libc_sbrk(increment); +} + + +extern "C" void* mmap(void *start, size_t length, int prot, int flags, + int fd, off_t offset) __THROW { + MallocHook::InvokePreMmapHook(start, length, prot, flags, fd, offset); + void *result; + if (!MallocHook::InvokeMmapReplacement( + start, length, prot, flags, fd, offset, &result)) { + result = do_mmap(start, length, prot, flags, fd, + static_cast<size_t>(offset)); // avoid sign extension + } + MallocHook::InvokeMmapHook(result, start, length, prot, flags, fd, offset); + return result; +} + +extern "C" int munmap(void* start, size_t length) __THROW { + MallocHook::InvokeMunmapHook(start, length); + int result; + if (!MallocHook::InvokeMunmapReplacement(start, length, &result)) { + result = MALLOC_HOOK_SYSCALL(SYS_munmap, start, length); + } + + return result; +} + +extern "C" void* sbrk(intptr_t increment) __THROW { + MallocHook::InvokePreSbrkHook(increment); + void *result = do_sbrk(increment); + MallocHook::InvokeSbrkHook(result, increment); + return result; +} + +/*static*/void* MallocHook::UnhookedMMap(void *start, size_t length, int prot, + int flags, int fd, off_t offset) { + void* result; + if (!MallocHook::InvokeMmapReplacement( + start, length, prot, flags, fd, offset, &result)) { + result = do_mmap(start, length, prot, flags, fd, offset); + } + + return result; +} + +/*static*/int MallocHook::UnhookedMUnmap(void *start, size_t length) { + int result; + if (!MallocHook::InvokeMunmapReplacement(start, length, &result)) { + result = MALLOC_HOOK_SYSCALL(SYS_munmap, start, length); + } + return result; +} + +#undef MALLOC_HOOK_SYSCALL diff --git a/src/third_party/gperftools-2.5/src/malloc_hook_mmap_linux.h b/src/third_party/gperftools-2.5/src/malloc_hook_mmap_linux.h new file mode 100755 index 00000000000..1c4c7660824 --- /dev/null +++ b/src/third_party/gperftools-2.5/src/malloc_hook_mmap_linux.h @@ -0,0 +1,252 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Sanjay Ghemawat <opensource@google.com> + +// We define mmap() and mmap64(), which somewhat reimplements libc's mmap +// syscall stubs. Unfortunately libc only exports the stubs via weak symbols +// (which we're overriding with our mmap64() and mmap() wrappers) so we can't +// just call through to them. + +#ifndef __linux +# error Should only be including malloc_hook_mmap_linux.h on linux systems. +#endif + +#include <unistd.h> +#include <syscall.h> +#include <sys/mman.h> +#include <errno.h> +#include "base/linux_syscall_support.h" + +// The x86-32 case and the x86-64 case differ: +// 32b has a mmap2() syscall, 64b does not. +// 64b and 32b have different calling conventions for mmap(). + +// I test for 64-bit first so I don't have to do things like +// '#if (defined(__mips__) && !defined(__MIPS64__))' as a mips32 check. +#if defined(__x86_64__) || defined(__PPC64__) || defined(__aarch64__) || (defined(_MIPS_SIM) && _MIPS_SIM == _ABI64) + +static inline void* do_mmap64(void *start, size_t length, + int prot, int flags, + int fd, __off64_t offset) __THROW { + return sys_mmap(start, length, prot, flags, fd, offset); +} + +#define MALLOC_HOOK_HAVE_DO_MMAP64 1 + +#elif defined(__i386__) || defined(__PPC__) || defined(__mips__) || \ + defined(__arm__) + +static inline void* do_mmap64(void *start, size_t length, + int prot, int flags, + int fd, __off64_t offset) __THROW { + void *result; + + // Try mmap2() unless it's not supported + static bool have_mmap2 = true; + if (have_mmap2) { + static int pagesize = 0; + if (!pagesize) pagesize = getpagesize(); + + // Check that the offset is page aligned + if (offset & (pagesize - 1)) { + result = MAP_FAILED; + errno = EINVAL; + goto out; + } + + result = (void *)syscall(SYS_mmap2, + start, length, prot, flags, fd, + (off_t) (offset / pagesize)); + if (result != MAP_FAILED || errno != ENOSYS) goto out; + + // We don't have mmap2() after all - don't bother trying it in future + have_mmap2 = false; + } + + if (((off_t)offset) != offset) { + // If we're trying to map a 64-bit offset, fail now since we don't + // have 64-bit mmap() support. + result = MAP_FAILED; + errno = EINVAL; + goto out; + } + +#ifdef __NR_mmap + { + // Fall back to old 32-bit offset mmap() call + // Old syscall interface cannot handle six args, so pass in an array + int32 args[6] = { (int32) start, (int32) length, prot, flags, fd, + (int32)(off_t) offset }; + result = (void *)syscall(SYS_mmap, args); + } +#else + // Some Linux ports like ARM EABI Linux has no mmap, just mmap2. + result = MAP_FAILED; +#endif + + out: + return result; +} + +#define MALLOC_HOOK_HAVE_DO_MMAP64 1 + +#elif defined(__s390x__) + +static inline void* do_mmap64(void *start, size_t length, + int prot, int flags, + int fd, __off64_t offset) __THROW { + // mmap on s390x uses the old syscall interface + unsigned long args[6] = { (unsigned long) start, (unsigned long) length, + (unsigned long) prot, (unsigned long) flags, + (unsigned long) fd, (unsigned long) offset }; + return sys_mmap(args); +} + +#define MALLOC_HOOK_HAVE_DO_MMAP64 1 + +#endif // #if defined(__x86_64__) + + +#ifdef MALLOC_HOOK_HAVE_DO_MMAP64 + +// We use do_mmap64 abstraction to put MallocHook::InvokeMmapHook +// calls right into mmap and mmap64, so that the stack frames in the caller's +// stack are at the same offsets for all the calls of memory allocating +// functions. + +// Put all callers of MallocHook::Invoke* in this module into +// malloc_hook section, +// so that MallocHook::GetCallerStackTrace can function accurately: + +// Make sure mmap doesn't get #define'd away by <sys/mman.h> +# undef mmap + +extern "C" { + void* mmap64(void *start, size_t length, int prot, int flags, + int fd, __off64_t offset ) __THROW + ATTRIBUTE_SECTION(malloc_hook); + void* mmap(void *start, size_t length,int prot, int flags, + int fd, off_t offset) __THROW + ATTRIBUTE_SECTION(malloc_hook); + int munmap(void* start, size_t length) __THROW + ATTRIBUTE_SECTION(malloc_hook); + void* mremap(void* old_addr, size_t old_size, size_t new_size, + int flags, ...) __THROW + ATTRIBUTE_SECTION(malloc_hook); + void* sbrk(ptrdiff_t increment) __THROW + ATTRIBUTE_SECTION(malloc_hook); +} + +extern "C" void* mmap64(void *start, size_t length, int prot, int flags, + int fd, __off64_t offset) __THROW { + MallocHook::InvokePreMmapHook(start, length, prot, flags, fd, offset); + void *result; + if (!MallocHook::InvokeMmapReplacement( + start, length, prot, flags, fd, offset, &result)) { + result = do_mmap64(start, length, prot, flags, fd, offset); + } + MallocHook::InvokeMmapHook(result, start, length, prot, flags, fd, offset); + return result; +} + +# if !defined(__USE_FILE_OFFSET64) || !defined(__REDIRECT_NTH) + +extern "C" void* mmap(void *start, size_t length, int prot, int flags, + int fd, off_t offset) __THROW { + MallocHook::InvokePreMmapHook(start, length, prot, flags, fd, offset); + void *result; + if (!MallocHook::InvokeMmapReplacement( + start, length, prot, flags, fd, offset, &result)) { + result = do_mmap64(start, length, prot, flags, fd, + static_cast<size_t>(offset)); // avoid sign extension + } + MallocHook::InvokeMmapHook(result, start, length, prot, flags, fd, offset); + return result; +} + +# endif // !defined(__USE_FILE_OFFSET64) || !defined(__REDIRECT_NTH) + +extern "C" int munmap(void* start, size_t length) __THROW { + MallocHook::InvokeMunmapHook(start, length); + int result; + if (!MallocHook::InvokeMunmapReplacement(start, length, &result)) { + result = sys_munmap(start, length); + } + return result; +} + +extern "C" void* mremap(void* old_addr, size_t old_size, size_t new_size, + int flags, ...) __THROW { + va_list ap; + va_start(ap, flags); + void *new_address = va_arg(ap, void *); + va_end(ap); + void* result = sys_mremap(old_addr, old_size, new_size, flags, new_address); + MallocHook::InvokeMremapHook(result, old_addr, old_size, new_size, flags, + new_address); + return result; +} + +#ifndef __UCLIBC__ +// libc's version: +extern "C" void* __sbrk(ptrdiff_t increment); + +extern "C" void* sbrk(ptrdiff_t increment) __THROW { + MallocHook::InvokePreSbrkHook(increment); + void *result = __sbrk(increment); + MallocHook::InvokeSbrkHook(result, increment); + return result; +} + +#endif + +/*static*/void* MallocHook::UnhookedMMap(void *start, size_t length, int prot, + int flags, int fd, off_t offset) { + void* result; + if (!MallocHook::InvokeMmapReplacement( + start, length, prot, flags, fd, offset, &result)) { + result = do_mmap64(start, length, prot, flags, fd, offset); + } + return result; +} + +/*static*/int MallocHook::UnhookedMUnmap(void *start, size_t length) { + int result; + if (!MallocHook::InvokeMunmapReplacement(start, length, &result)) { + result = syscall(SYS_munmap, start, length); + } + return result; +} + +#undef MALLOC_HOOK_HAVE_DO_MMAP64 + +#endif // #ifdef MALLOC_HOOK_HAVE_DO_MMAP64 diff --git a/src/third_party/gperftools-2.5/src/maybe_emergency_malloc.h b/src/third_party/gperftools-2.5/src/maybe_emergency_malloc.h new file mode 100644 index 00000000000..250ecf01a3f --- /dev/null +++ b/src/third_party/gperftools-2.5/src/maybe_emergency_malloc.h @@ -0,0 +1,55 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2014, gperftools Contributors +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef MAYBE_EMERGENCY_MALLOC_H +#define MAYBE_EMERGENCY_MALLOC_H + +#include "config.h" + +#ifdef ENABLE_EMERGENCY_MALLOC + +#include "emergency_malloc.h" + +#else + +namespace tcmalloc { + static inline void *EmergencyMalloc(size_t size) {return NULL;} + static inline void EmergencyFree(void *p) {} + static inline void *EmergencyCalloc(size_t n, size_t elem_size) {return NULL;} + static inline void *EmergencyRealloc(void *old_ptr, size_t new_size) {return NULL;} + + static inline bool IsEmergencyPtr(const void *_ptr) { + return false; + } +} + +#endif // ENABLE_EMERGENCY_MALLOC + +#endif diff --git a/src/third_party/gperftools-2.5/src/maybe_threads.cc b/src/third_party/gperftools-2.5/src/maybe_threads.cc new file mode 100644 index 00000000000..acfc99a5ae5 --- /dev/null +++ b/src/third_party/gperftools-2.5/src/maybe_threads.cc @@ -0,0 +1,171 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Paul Menage <opensource@google.com> +// +// Some wrappers for pthread functions so that we can be LD_PRELOADed +// against non-pthreads apps. +// +// This module will behave very strangely if some pthreads functions +// exist and others don't. + +#include "config.h" +#include <assert.h> +#include <string.h> // for memcmp +#include <stdio.h> // for __isthreaded on FreeBSD +// We don't actually need strings. But including this header seems to +// stop the compiler trying to short-circuit our pthreads existence +// tests and claiming that the address of a function is always +// non-zero. I have no idea why ... +#include <string> +#include "maybe_threads.h" +#include "base/basictypes.h" +#include "base/logging.h" + +// __THROW is defined in glibc systems. It means, counter-intuitively, +// "This function will never throw an exception." It's an optional +// optimization tool, but we may need to use it to match glibc prototypes. +#ifndef __THROW // I guess we're not on a glibc system +# define __THROW // __THROW is just an optimization, so ok to make it "" +#endif + +// These are the methods we're going to conditionally include. +extern "C" { + int pthread_key_create (pthread_key_t*, void (*)(void*)) + __THROW ATTRIBUTE_WEAK; + int pthread_key_delete (pthread_key_t) + __THROW ATTRIBUTE_WEAK; + void *pthread_getspecific(pthread_key_t) + __THROW ATTRIBUTE_WEAK; + int pthread_setspecific(pthread_key_t, const void*) + __THROW ATTRIBUTE_WEAK; + int pthread_once(pthread_once_t *, void (*)(void)) + ATTRIBUTE_WEAK; + int pthread_atfork(void (*__prepare) (void), + void (*__parent) (void), + void (*__child) (void)) + __THROW ATTRIBUTE_WEAK; +} + +#define MAX_PERTHREAD_VALS 16 +static void *perftools_pthread_specific_vals[MAX_PERTHREAD_VALS]; +static int next_key; + +// NOTE: it's similar to bitcast defined in basic_types.h with +// exception of ignoring sizes mismatch +template <typename T1, typename T2> +static T2 memcpy_cast(const T1 &input) { + T2 output; + size_t s = sizeof(input); + if (sizeof(output) < s) { + s = sizeof(output); + } + memcpy(&output, &input, s); + return output; +} + +int perftools_pthread_key_create(pthread_key_t *key, + void (*destr_function) (void *)) { + if (pthread_key_create) { + return pthread_key_create(key, destr_function); + } else { + assert(next_key < MAX_PERTHREAD_VALS); + *key = memcpy_cast<int, pthread_key_t>(next_key++); + return 0; + } +} + +int perftools_pthread_key_delete(pthread_key_t key) { + if (pthread_key_delete) { + return pthread_key_delete(key); + } else { + return 0; + } +} + +void *perftools_pthread_getspecific(pthread_key_t key) { + if (pthread_getspecific) { + return pthread_getspecific(key); + } else { + return perftools_pthread_specific_vals[memcpy_cast<pthread_key_t, int>(key)]; + } +} + +int perftools_pthread_setspecific(pthread_key_t key, void *val) { + if (pthread_setspecific) { + return pthread_setspecific(key, val); + } else { + perftools_pthread_specific_vals[memcpy_cast<pthread_key_t, int>(key)] = val; + return 0; + } +} + + +static pthread_once_t pthread_once_init = PTHREAD_ONCE_INIT; +int perftools_pthread_once(pthread_once_t *ctl, + void (*init_routine) (void)) { +#ifdef __FreeBSD__ + // On __FreeBSD__, calling pthread_once on a system that is not + // linked with -pthread is silently a noop. :-( Luckily, we have a + // workaround: FreeBSD exposes __isthreaded in <stdio.h>, which is + // set to 1 when the first thread is spawned. So on those systems, + // we can use our own separate pthreads-once mechanism, which is + // used until __isthreaded is 1 (which will never be true if the app + // is not linked with -pthread). + static bool pthread_once_ran_before_threads = false; + if (pthread_once_ran_before_threads) { + return 0; + } + if (!__isthreaded) { + init_routine(); + pthread_once_ran_before_threads = true; + return 0; + } +#endif + if (pthread_once) { + return pthread_once(ctl, init_routine); + } else { + if (memcmp(ctl, &pthread_once_init, sizeof(*ctl)) == 0) { + init_routine(); + ++*(char*)(ctl); // make it so it's no longer equal to init + } + return 0; + } +} + +void perftools_pthread_atfork(void (*before)(), + void (*parent_after)(), + void (*child_after)()) { + if (pthread_atfork) { + int rv = pthread_atfork(before, parent_after, child_after); + CHECK(rv == 0); + } +} diff --git a/src/third_party/gperftools-2.5/src/maybe_threads.h b/src/third_party/gperftools-2.5/src/maybe_threads.h new file mode 100644 index 00000000000..c6cfdf7d158 --- /dev/null +++ b/src/third_party/gperftools-2.5/src/maybe_threads.h @@ -0,0 +1,61 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Paul Menage <opensource@google.com> + +//------------------------------------------------------------------- +// Some wrappers for pthread functions so that we can be LD_PRELOADed +// against non-pthreads apps. +//------------------------------------------------------------------- + +#ifndef GOOGLE_MAYBE_THREADS_H_ +#define GOOGLE_MAYBE_THREADS_H_ + +#ifdef HAVE_PTHREAD +#include <pthread.h> +#endif + +int perftools_pthread_key_create(pthread_key_t *key, + void (*destr_function) (void *)); +int perftools_pthread_key_delete(pthread_key_t key); +void *perftools_pthread_getspecific(pthread_key_t key); +int perftools_pthread_setspecific(pthread_key_t key, void *val); +int perftools_pthread_once(pthread_once_t *ctl, + void (*init_routine) (void)); + +// Our wrapper for pthread_atfork. Does _nothing_ when there are no +// threads. See static_vars.cc:SetupAtForkLocksHandler for only user +// of this. +void perftools_pthread_atfork(void (*before)(), + void (*parent_after)(), + void (*child_after)()); + +#endif /* GOOGLE_MAYBE_THREADS_H_ */ diff --git a/src/third_party/gperftools-2.5/src/memfs_malloc.cc b/src/third_party/gperftools-2.5/src/memfs_malloc.cc new file mode 100644 index 00000000000..fd26daff6b2 --- /dev/null +++ b/src/third_party/gperftools-2.5/src/memfs_malloc.cc @@ -0,0 +1,272 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2007, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Arun Sharma +// +// A tcmalloc system allocator that uses a memory based filesystem such as +// tmpfs or hugetlbfs +// +// Since these only exist on linux, we only register this allocator there. + +#ifdef __linux + +#include <config.h> +#include <errno.h> // for errno, EINVAL +#include <inttypes.h> // for PRId64 +#include <limits.h> // for PATH_MAX +#include <stddef.h> // for size_t, NULL +#ifdef HAVE_STDINT_H +#include <stdint.h> // for int64_t, uintptr_t +#endif +#include <stdio.h> // for snprintf +#include <stdlib.h> // for mkstemp +#include <string.h> // for strerror +#include <sys/mman.h> // for mmap, MAP_FAILED, etc +#include <sys/statfs.h> // for fstatfs, statfs +#include <unistd.h> // for ftruncate, off_t, unlink +#include <new> // for operator new +#include <string> + +#include <gperftools/malloc_extension.h> +#include "base/basictypes.h" +#include "base/googleinit.h" +#include "base/sysinfo.h" +#include "internal_logging.h" + +// TODO(sanjay): Move the code below into the tcmalloc namespace +using tcmalloc::kLog; +using tcmalloc::kCrash; +using tcmalloc::Log; +using std::string; + +DEFINE_string(memfs_malloc_path, EnvToString("TCMALLOC_MEMFS_MALLOC_PATH", ""), + "Path where hugetlbfs or tmpfs is mounted. The caller is " + "responsible for ensuring that the path is unique and does " + "not conflict with another process"); +DEFINE_int64(memfs_malloc_limit_mb, + EnvToInt("TCMALLOC_MEMFS_LIMIT_MB", 0), + "Limit total allocation size to the " + "specified number of MiB. 0 == no limit."); +DEFINE_bool(memfs_malloc_abort_on_fail, + EnvToBool("TCMALLOC_MEMFS_ABORT_ON_FAIL", false), + "abort() whenever memfs_malloc fails to satisfy an allocation " + "for any reason."); +DEFINE_bool(memfs_malloc_ignore_mmap_fail, + EnvToBool("TCMALLOC_MEMFS_IGNORE_MMAP_FAIL", false), + "Ignore failures from mmap"); +DEFINE_bool(memfs_malloc_map_private, + EnvToBool("TCMALLOC_MEMFS_MAP_PRIVATE", false), + "Use MAP_PRIVATE with mmap"); + +// Hugetlbfs based allocator for tcmalloc +class HugetlbSysAllocator: public SysAllocator { +public: + explicit HugetlbSysAllocator(SysAllocator* fallback) + : failed_(true), // To disable allocator until Initialize() is called. + big_page_size_(0), + hugetlb_fd_(-1), + hugetlb_base_(0), + fallback_(fallback) { + } + + void* Alloc(size_t size, size_t *actual_size, size_t alignment); + bool Initialize(); + + bool failed_; // Whether failed to allocate memory. + +private: + void* AllocInternal(size_t size, size_t *actual_size, size_t alignment); + + int64 big_page_size_; + int hugetlb_fd_; // file descriptor for hugetlb + off_t hugetlb_base_; + + SysAllocator* fallback_; // Default system allocator to fall back to. +}; +static union { + char buf[sizeof(HugetlbSysAllocator)]; + void *ptr; +} hugetlb_space; + +// No locking needed here since we assume that tcmalloc calls +// us with an internal lock held (see tcmalloc/system-alloc.cc). +void* HugetlbSysAllocator::Alloc(size_t size, size_t *actual_size, + size_t alignment) { + if (failed_) { + return fallback_->Alloc(size, actual_size, alignment); + } + + // We don't respond to allocation requests smaller than big_page_size_ unless + // the caller is ok to take more than they asked for. Used by MetaDataAlloc. + if (actual_size == NULL && size < big_page_size_) { + return fallback_->Alloc(size, actual_size, alignment); + } + + // Enforce huge page alignment. Be careful to deal with overflow. + size_t new_alignment = alignment; + if (new_alignment < big_page_size_) new_alignment = big_page_size_; + size_t aligned_size = ((size + new_alignment - 1) / + new_alignment) * new_alignment; + if (aligned_size < size) { + return fallback_->Alloc(size, actual_size, alignment); + } + + void* result = AllocInternal(aligned_size, actual_size, new_alignment); + if (result != NULL) { + return result; + } + Log(kLog, __FILE__, __LINE__, + "HugetlbSysAllocator: (failed, allocated)", failed_, hugetlb_base_); + if (FLAGS_memfs_malloc_abort_on_fail) { + Log(kCrash, __FILE__, __LINE__, + "memfs_malloc_abort_on_fail is set"); + } + return fallback_->Alloc(size, actual_size, alignment); +} + +void* HugetlbSysAllocator::AllocInternal(size_t size, size_t* actual_size, + size_t alignment) { + // Ask for extra memory if alignment > pagesize + size_t extra = 0; + if (alignment > big_page_size_) { + extra = alignment - big_page_size_; + } + + // Test if this allocation would put us over the limit. + off_t limit = FLAGS_memfs_malloc_limit_mb*1024*1024; + if (limit > 0 && hugetlb_base_ + size + extra > limit) { + // Disable the allocator when there's less than one page left. + if (limit - hugetlb_base_ < big_page_size_) { + Log(kLog, __FILE__, __LINE__, "reached memfs_malloc_limit_mb"); + failed_ = true; + } + else { + Log(kLog, __FILE__, __LINE__, + "alloc too large (size, bytes left)", size, limit-hugetlb_base_); + } + return NULL; + } + + // This is not needed for hugetlbfs, but needed for tmpfs. Annoyingly + // hugetlbfs returns EINVAL for ftruncate. + int ret = ftruncate(hugetlb_fd_, hugetlb_base_ + size + extra); + if (ret != 0 && errno != EINVAL) { + Log(kLog, __FILE__, __LINE__, + "ftruncate failed", strerror(errno)); + failed_ = true; + return NULL; + } + + // Note: size + extra does not overflow since: + // size + alignment < (1<<NBITS). + // and extra <= alignment + // therefore size + extra < (1<<NBITS) + void *result; + result = mmap(0, size + extra, PROT_WRITE|PROT_READ, + FLAGS_memfs_malloc_map_private ? MAP_PRIVATE : MAP_SHARED, + hugetlb_fd_, hugetlb_base_); + if (result == reinterpret_cast<void*>(MAP_FAILED)) { + if (!FLAGS_memfs_malloc_ignore_mmap_fail) { + Log(kLog, __FILE__, __LINE__, + "mmap failed (size, error)", size + extra, strerror(errno)); + failed_ = true; + } + return NULL; + } + uintptr_t ptr = reinterpret_cast<uintptr_t>(result); + + // Adjust the return memory so it is aligned + size_t adjust = 0; + if ((ptr & (alignment - 1)) != 0) { + adjust = alignment - (ptr & (alignment - 1)); + } + ptr += adjust; + hugetlb_base_ += (size + extra); + + if (actual_size) { + *actual_size = size + extra - adjust; + } + + return reinterpret_cast<void*>(ptr); +} + +bool HugetlbSysAllocator::Initialize() { + char path[PATH_MAX]; + const int pathlen = FLAGS_memfs_malloc_path.size(); + if (pathlen + 8 > sizeof(path)) { + Log(kCrash, __FILE__, __LINE__, "XX fatal: memfs_malloc_path too long"); + return false; + } + memcpy(path, FLAGS_memfs_malloc_path.data(), pathlen); + memcpy(path + pathlen, ".XXXXXX", 8); // Also copies terminating \0 + + int hugetlb_fd = mkstemp(path); + if (hugetlb_fd == -1) { + Log(kLog, __FILE__, __LINE__, + "warning: unable to create memfs_malloc_path", + path, strerror(errno)); + return false; + } + + // Cleanup memory on process exit + if (unlink(path) == -1) { + Log(kCrash, __FILE__, __LINE__, + "fatal: error unlinking memfs_malloc_path", path, strerror(errno)); + return false; + } + + // Use fstatfs to figure out the default page size for memfs + struct statfs sfs; + if (fstatfs(hugetlb_fd, &sfs) == -1) { + Log(kCrash, __FILE__, __LINE__, + "fatal: error fstatfs of memfs_malloc_path", strerror(errno)); + return false; + } + int64 page_size = sfs.f_bsize; + + hugetlb_fd_ = hugetlb_fd; + big_page_size_ = page_size; + failed_ = false; + return true; +} + +REGISTER_MODULE_INITIALIZER(memfs_malloc, { + if (FLAGS_memfs_malloc_path.length()) { + SysAllocator* alloc = MallocExtension::instance()->GetSystemAllocator(); + HugetlbSysAllocator* hp = + new (hugetlb_space.buf) HugetlbSysAllocator(alloc); + if (hp->Initialize()) { + MallocExtension::instance()->SetSystemAllocator(hp); + } + } +}); + +#endif /* ifdef __linux */ diff --git a/src/third_party/gperftools-2.5/src/memory_region_map.cc b/src/third_party/gperftools-2.5/src/memory_region_map.cc new file mode 100755 index 00000000000..841d6f3cf85 --- /dev/null +++ b/src/third_party/gperftools-2.5/src/memory_region_map.cc @@ -0,0 +1,831 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +/* Copyright (c) 2006, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: Maxim Lifantsev + */ + +// +// Background and key design points of MemoryRegionMap. +// +// MemoryRegionMap is a low-level module with quite atypical requirements that +// result in some degree of non-triviality of the implementation and design. +// +// MemoryRegionMap collects info about *all* memory regions created with +// mmap, munmap, mremap, sbrk. +// They key word above is 'all': all that are happening in a process +// during its lifetime frequently starting even before global object +// constructor execution. +// +// This is needed by the primary client of MemoryRegionMap: +// HeapLeakChecker uses the regions and the associated stack traces +// to figure out what part of the memory is the heap: +// if MemoryRegionMap were to miss some (early) regions, leak checking would +// stop working correctly. +// +// To accomplish the goal of functioning before/during global object +// constructor execution MemoryRegionMap is done as a singleton service +// that relies on own on-demand initialized static constructor-less data, +// and only relies on other low-level modules that can also function properly +// even before global object constructors run. +// +// Accomplishing the goal of collecting data about all mmap, munmap, mremap, +// sbrk occurrences is a more involved: conceptually to do this one needs to +// record some bits of data in particular about any mmap or sbrk call, +// but to do that one needs to allocate memory for that data at some point, +// but all memory allocations in the end themselves come from an mmap +// or sbrk call (that's how the address space of the process grows). +// +// Also note that we need to do all the above recording from +// within an mmap/sbrk hook which is sometimes/frequently is made by a memory +// allocator, including the allocator MemoryRegionMap itself must rely on. +// In the case of heap-checker usage this includes even the very first +// mmap/sbrk call happening in the program: heap-checker gets activated due to +// a link-time installed mmap/sbrk hook and it initializes MemoryRegionMap +// and asks it to record info about this very first call right from that +// very first hook invocation. +// +// MemoryRegionMap is doing its memory allocations via LowLevelAlloc: +// unlike more complex standard memory allocator, LowLevelAlloc cooperates with +// MemoryRegionMap by not holding any of its own locks while it calls mmap +// to get memory, thus we are able to call LowLevelAlloc from +// our mmap/sbrk hooks without causing a deadlock in it. +// For the same reason of deadlock prevention the locking in MemoryRegionMap +// itself is write-recursive which is an exception to Google's mutex usage. +// +// We still need to break the infinite cycle of mmap calling our hook, +// which asks LowLevelAlloc for memory to record this mmap, +// which (sometimes) causes mmap, which calls our hook, and so on. +// We do this as follows: on a recursive call of MemoryRegionMap's +// mmap/sbrk/mremap hook we record the data about the allocation in a +// static fixed-sized stack (saved_regions and saved_buckets), when the +// recursion unwinds but before returning from the outer hook call we unwind +// this stack and move the data from saved_regions and saved_buckets to its +// permanent place in the RegionSet and "bucket_table" respectively, +// which can cause more allocations and mmap-s and recursion and unwinding, +// but the whole process ends eventually due to the fact that for the small +// allocations we are doing LowLevelAlloc reuses one mmap call and parcels out +// the memory it created to satisfy several of our allocation requests. +// + +// ========================================================================= // + +#include <config.h> + +#ifdef HAVE_UNISTD_H +#include <unistd.h> +#endif +#ifdef HAVE_INTTYPES_H +#include <inttypes.h> +#endif +#ifdef HAVE_MMAP +#include <sys/mman.h> +#elif !defined(MAP_FAILED) +#define MAP_FAILED -1 // the only thing we need from mman.h +#endif +#ifdef HAVE_PTHREAD +#include <pthread.h> // for pthread_t, pthread_self() +#endif +#include <stddef.h> + +#include <algorithm> +#include <set> + +#include "memory_region_map.h" + +#include "base/googleinit.h" +#include "base/logging.h" +#include "base/low_level_alloc.h" +#include "malloc_hook-inl.h" + +#include <gperftools/stacktrace.h> +#include <gperftools/malloc_hook.h> + +// MREMAP_FIXED is a linux extension. How it's used in this file, +// setting it to 0 is equivalent to saying, "This feature isn't +// supported", which is right. +#ifndef MREMAP_FIXED +# define MREMAP_FIXED 0 +#endif + +using std::max; + +// ========================================================================= // + +int MemoryRegionMap::client_count_ = 0; +int MemoryRegionMap::max_stack_depth_ = 0; +MemoryRegionMap::RegionSet* MemoryRegionMap::regions_ = NULL; +LowLevelAlloc::Arena* MemoryRegionMap::arena_ = NULL; +SpinLock MemoryRegionMap::lock_(SpinLock::LINKER_INITIALIZED); +SpinLock MemoryRegionMap::owner_lock_( // ACQUIRED_AFTER(lock_) + SpinLock::LINKER_INITIALIZED); +int MemoryRegionMap::recursion_count_ = 0; // GUARDED_BY(owner_lock_) +pthread_t MemoryRegionMap::lock_owner_tid_; // GUARDED_BY(owner_lock_) +int64 MemoryRegionMap::map_size_ = 0; +int64 MemoryRegionMap::unmap_size_ = 0; +HeapProfileBucket** MemoryRegionMap::bucket_table_ = NULL; // GUARDED_BY(lock_) +int MemoryRegionMap::num_buckets_ = 0; // GUARDED_BY(lock_) +int MemoryRegionMap::saved_buckets_count_ = 0; // GUARDED_BY(lock_) +HeapProfileBucket MemoryRegionMap::saved_buckets_[20]; // GUARDED_BY(lock_) + +// GUARDED_BY(lock_) +const void* MemoryRegionMap::saved_buckets_keys_[20][kMaxStackDepth]; + +// ========================================================================= // + +// Simple hook into execution of global object constructors, +// so that we do not call pthread_self() when it does not yet work. +static bool libpthread_initialized = false; +REGISTER_MODULE_INITIALIZER(libpthread_initialized_setter, + libpthread_initialized = true); + +static inline bool current_thread_is(pthread_t should_be) { + // Before main() runs, there's only one thread, so we're always that thread + if (!libpthread_initialized) return true; + // this starts working only sometime well into global constructor execution: + return pthread_equal(pthread_self(), should_be); +} + +// ========================================================================= // + +// Constructor-less place-holder to store a RegionSet in. +union MemoryRegionMap::RegionSetRep { + char rep[sizeof(RegionSet)]; + void* align_it; // do not need a better alignment for 'rep' than this + RegionSet* region_set() { return reinterpret_cast<RegionSet*>(rep); } +}; + +// The bytes where MemoryRegionMap::regions_ will point to. +// We use RegionSetRep with noop c-tor so that global construction +// does not interfere. +static MemoryRegionMap::RegionSetRep regions_rep; + +// ========================================================================= // + +// Has InsertRegionLocked been called recursively +// (or rather should we *not* use regions_ to record a hooked mmap). +static bool recursive_insert = false; + +void MemoryRegionMap::Init(int max_stack_depth, bool use_buckets) { + RAW_VLOG(10, "MemoryRegionMap Init"); + RAW_CHECK(max_stack_depth >= 0, ""); + // Make sure we don't overflow the memory in region stacks: + RAW_CHECK(max_stack_depth <= kMaxStackDepth, + "need to increase kMaxStackDepth?"); + Lock(); + client_count_ += 1; + max_stack_depth_ = max(max_stack_depth_, max_stack_depth); + if (client_count_ > 1) { + // not first client: already did initialization-proper + Unlock(); + RAW_VLOG(10, "MemoryRegionMap Init increment done"); + return; + } + // Set our hooks and make sure they were installed: + RAW_CHECK(MallocHook::AddMmapHook(&MmapHook), ""); + RAW_CHECK(MallocHook::AddMremapHook(&MremapHook), ""); + RAW_CHECK(MallocHook::AddSbrkHook(&SbrkHook), ""); + RAW_CHECK(MallocHook::AddMunmapHook(&MunmapHook), ""); + // We need to set recursive_insert since the NewArena call itself + // will already do some allocations with mmap which our hooks will catch + // recursive_insert allows us to buffer info about these mmap calls. + // Note that Init() can be (and is) sometimes called + // already from within an mmap/sbrk hook. + recursive_insert = true; + arena_ = LowLevelAlloc::NewArena(0, LowLevelAlloc::DefaultArena()); + recursive_insert = false; + HandleSavedRegionsLocked(&InsertRegionLocked); // flush the buffered ones + // Can't instead use HandleSavedRegionsLocked(&DoInsertRegionLocked) before + // recursive_insert = false; as InsertRegionLocked will also construct + // regions_ on demand for us. + if (use_buckets) { + const int table_bytes = kHashTableSize * sizeof(*bucket_table_); + recursive_insert = true; + bucket_table_ = static_cast<HeapProfileBucket**>( + MyAllocator::Allocate(table_bytes)); + recursive_insert = false; + memset(bucket_table_, 0, table_bytes); + num_buckets_ = 0; + } + Unlock(); + RAW_VLOG(10, "MemoryRegionMap Init done"); +} + +bool MemoryRegionMap::Shutdown() { + RAW_VLOG(10, "MemoryRegionMap Shutdown"); + Lock(); + RAW_CHECK(client_count_ > 0, ""); + client_count_ -= 1; + if (client_count_ != 0) { // not last client; need not really shutdown + Unlock(); + RAW_VLOG(10, "MemoryRegionMap Shutdown decrement done"); + return true; + } + if (bucket_table_ != NULL) { + for (int i = 0; i < kHashTableSize; i++) { + for (HeapProfileBucket* curr = bucket_table_[i]; curr != 0; /**/) { + HeapProfileBucket* bucket = curr; + curr = curr->next; + MyAllocator::Free(bucket->stack, 0); + MyAllocator::Free(bucket, 0); + } + } + MyAllocator::Free(bucket_table_, 0); + num_buckets_ = 0; + bucket_table_ = NULL; + } + RAW_CHECK(MallocHook::RemoveMmapHook(&MmapHook), ""); + RAW_CHECK(MallocHook::RemoveMremapHook(&MremapHook), ""); + RAW_CHECK(MallocHook::RemoveSbrkHook(&SbrkHook), ""); + RAW_CHECK(MallocHook::RemoveMunmapHook(&MunmapHook), ""); + if (regions_) regions_->~RegionSet(); + regions_ = NULL; + bool deleted_arena = LowLevelAlloc::DeleteArena(arena_); + if (deleted_arena) { + arena_ = 0; + } else { + RAW_LOG(WARNING, "Can't delete LowLevelAlloc arena: it's being used"); + } + Unlock(); + RAW_VLOG(10, "MemoryRegionMap Shutdown done"); + return deleted_arena; +} + +bool MemoryRegionMap::IsRecordingLocked() { + RAW_CHECK(LockIsHeld(), "should be held (by this thread)"); + return client_count_ > 0; +} + +// Invariants (once libpthread_initialized is true): +// * While lock_ is not held, recursion_count_ is 0 (and +// lock_owner_tid_ is the previous owner, but we don't rely on +// that). +// * recursion_count_ and lock_owner_tid_ are only written while +// both lock_ and owner_lock_ are held. They may be read under +// just owner_lock_. +// * At entry and exit of Lock() and Unlock(), the current thread +// owns lock_ iff pthread_equal(lock_owner_tid_, pthread_self()) +// && recursion_count_ > 0. +void MemoryRegionMap::Lock() { + { + SpinLockHolder l(&owner_lock_); + if (recursion_count_ > 0 && current_thread_is(lock_owner_tid_)) { + RAW_CHECK(lock_.IsHeld(), "Invariants violated"); + recursion_count_++; + RAW_CHECK(recursion_count_ <= 5, + "recursive lock nesting unexpectedly deep"); + return; + } + } + lock_.Lock(); + { + SpinLockHolder l(&owner_lock_); + RAW_CHECK(recursion_count_ == 0, + "Last Unlock didn't reset recursion_count_"); + if (libpthread_initialized) + lock_owner_tid_ = pthread_self(); + recursion_count_ = 1; + } +} + +void MemoryRegionMap::Unlock() { + SpinLockHolder l(&owner_lock_); + RAW_CHECK(recursion_count_ > 0, "unlock when not held"); + RAW_CHECK(lock_.IsHeld(), + "unlock when not held, and recursion_count_ is wrong"); + RAW_CHECK(current_thread_is(lock_owner_tid_), "unlock by non-holder"); + recursion_count_--; + if (recursion_count_ == 0) { + lock_.Unlock(); + } +} + +bool MemoryRegionMap::LockIsHeld() { + SpinLockHolder l(&owner_lock_); + return lock_.IsHeld() && current_thread_is(lock_owner_tid_); +} + +const MemoryRegionMap::Region* +MemoryRegionMap::DoFindRegionLocked(uintptr_t addr) { + RAW_CHECK(LockIsHeld(), "should be held (by this thread)"); + if (regions_ != NULL) { + Region sample; + sample.SetRegionSetKey(addr); + RegionSet::iterator region = regions_->lower_bound(sample); + if (region != regions_->end()) { + RAW_CHECK(addr <= region->end_addr, ""); + if (region->start_addr <= addr && addr < region->end_addr) { + return &(*region); + } + } + } + return NULL; +} + +bool MemoryRegionMap::FindRegion(uintptr_t addr, Region* result) { + Lock(); + const Region* region = DoFindRegionLocked(addr); + if (region != NULL) *result = *region; // create it as an independent copy + Unlock(); + return region != NULL; +} + +bool MemoryRegionMap::FindAndMarkStackRegion(uintptr_t stack_top, + Region* result) { + Lock(); + const Region* region = DoFindRegionLocked(stack_top); + if (region != NULL) { + RAW_VLOG(10, "Stack at %p is inside region %p..%p", + reinterpret_cast<void*>(stack_top), + reinterpret_cast<void*>(region->start_addr), + reinterpret_cast<void*>(region->end_addr)); + const_cast<Region*>(region)->set_is_stack(); // now we know + // cast is safe (set_is_stack does not change the set ordering key) + *result = *region; // create *result as an independent copy + } + Unlock(); + return region != NULL; +} + +HeapProfileBucket* MemoryRegionMap::GetBucket(int depth, + const void* const key[]) { + RAW_CHECK(LockIsHeld(), "should be held (by this thread)"); + // Make hash-value + uintptr_t hash = 0; + for (int i = 0; i < depth; i++) { + hash += reinterpret_cast<uintptr_t>(key[i]); + hash += hash << 10; + hash ^= hash >> 6; + } + hash += hash << 3; + hash ^= hash >> 11; + + // Lookup stack trace in table + unsigned int hash_index = (static_cast<unsigned int>(hash)) % kHashTableSize; + for (HeapProfileBucket* bucket = bucket_table_[hash_index]; + bucket != 0; + bucket = bucket->next) { + if ((bucket->hash == hash) && (bucket->depth == depth) && + std::equal(key, key + depth, bucket->stack)) { + return bucket; + } + } + + // Create new bucket + const size_t key_size = sizeof(key[0]) * depth; + HeapProfileBucket* bucket; + if (recursive_insert) { // recursion: save in saved_buckets_ + const void** key_copy = saved_buckets_keys_[saved_buckets_count_]; + std::copy(key, key + depth, key_copy); + bucket = &saved_buckets_[saved_buckets_count_]; + memset(bucket, 0, sizeof(*bucket)); + ++saved_buckets_count_; + bucket->stack = key_copy; + bucket->next = NULL; + } else { + recursive_insert = true; + const void** key_copy = static_cast<const void**>( + MyAllocator::Allocate(key_size)); + recursive_insert = false; + std::copy(key, key + depth, key_copy); + recursive_insert = true; + bucket = static_cast<HeapProfileBucket*>( + MyAllocator::Allocate(sizeof(HeapProfileBucket))); + recursive_insert = false; + memset(bucket, 0, sizeof(*bucket)); + bucket->stack = key_copy; + bucket->next = bucket_table_[hash_index]; + } + bucket->hash = hash; + bucket->depth = depth; + bucket_table_[hash_index] = bucket; + ++num_buckets_; + return bucket; +} + +MemoryRegionMap::RegionIterator MemoryRegionMap::BeginRegionLocked() { + RAW_CHECK(LockIsHeld(), "should be held (by this thread)"); + RAW_CHECK(regions_ != NULL, ""); + return regions_->begin(); +} + +MemoryRegionMap::RegionIterator MemoryRegionMap::EndRegionLocked() { + RAW_CHECK(LockIsHeld(), "should be held (by this thread)"); + RAW_CHECK(regions_ != NULL, ""); + return regions_->end(); +} + +inline void MemoryRegionMap::DoInsertRegionLocked(const Region& region) { + RAW_VLOG(12, "Inserting region %p..%p from %p", + reinterpret_cast<void*>(region.start_addr), + reinterpret_cast<void*>(region.end_addr), + reinterpret_cast<void*>(region.caller())); + RegionSet::const_iterator i = regions_->lower_bound(region); + if (i != regions_->end() && i->start_addr <= region.start_addr) { + RAW_DCHECK(region.end_addr <= i->end_addr, ""); // lower_bound ensures this + return; // 'region' is a subset of an already recorded region; do nothing + // We can be stricter and allow this only when *i has been created via + // an mmap with MAP_NORESERVE flag set. + } + if (DEBUG_MODE) { + RAW_CHECK(i == regions_->end() || !region.Overlaps(*i), + "Wow, overlapping memory regions"); + Region sample; + sample.SetRegionSetKey(region.start_addr); + i = regions_->lower_bound(sample); + RAW_CHECK(i == regions_->end() || !region.Overlaps(*i), + "Wow, overlapping memory regions"); + } + region.AssertIsConsistent(); // just making sure + // This inserts and allocates permanent storage for region + // and its call stack data: it's safe to do it now: + regions_->insert(region); + RAW_VLOG(12, "Inserted region %p..%p :", + reinterpret_cast<void*>(region.start_addr), + reinterpret_cast<void*>(region.end_addr)); + if (VLOG_IS_ON(12)) LogAllLocked(); +} + +// These variables are local to MemoryRegionMap::InsertRegionLocked() +// and MemoryRegionMap::HandleSavedRegionsLocked() +// and are file-level to ensure that they are initialized at load time. + +// Number of unprocessed region inserts. +static int saved_regions_count = 0; + +// Unprocessed inserts (must be big enough to hold all allocations that can +// be caused by a InsertRegionLocked call). +// Region has no constructor, so that c-tor execution does not interfere +// with the any-time use of the static memory behind saved_regions. +static MemoryRegionMap::Region saved_regions[20]; + +inline void MemoryRegionMap::HandleSavedRegionsLocked( + void (*insert_func)(const Region& region)) { + while (saved_regions_count > 0) { + // Making a local-var copy of the region argument to insert_func + // including its stack (w/o doing any memory allocations) is important: + // in many cases the memory in saved_regions + // will get written-to during the (*insert_func)(r) call below. + Region r = saved_regions[--saved_regions_count]; + (*insert_func)(r); + } +} + +void MemoryRegionMap::RestoreSavedBucketsLocked() { + RAW_CHECK(LockIsHeld(), "should be held (by this thread)"); + while (saved_buckets_count_ > 0) { + HeapProfileBucket bucket = saved_buckets_[--saved_buckets_count_]; + unsigned int hash_index = + static_cast<unsigned int>(bucket.hash) % kHashTableSize; + bool is_found = false; + for (HeapProfileBucket* curr = bucket_table_[hash_index]; + curr != 0; + curr = curr->next) { + if ((curr->hash == bucket.hash) && (curr->depth == bucket.depth) && + std::equal(bucket.stack, bucket.stack + bucket.depth, curr->stack)) { + curr->allocs += bucket.allocs; + curr->alloc_size += bucket.alloc_size; + curr->frees += bucket.frees; + curr->free_size += bucket.free_size; + is_found = true; + break; + } + } + if (is_found) continue; + + const size_t key_size = sizeof(bucket.stack[0]) * bucket.depth; + const void** key_copy = static_cast<const void**>( + MyAllocator::Allocate(key_size)); + std::copy(bucket.stack, bucket.stack + bucket.depth, key_copy); + HeapProfileBucket* new_bucket = static_cast<HeapProfileBucket*>( + MyAllocator::Allocate(sizeof(HeapProfileBucket))); + memset(new_bucket, 0, sizeof(*new_bucket)); + new_bucket->hash = bucket.hash; + new_bucket->depth = bucket.depth; + new_bucket->stack = key_copy; + new_bucket->next = bucket_table_[hash_index]; + bucket_table_[hash_index] = new_bucket; + ++num_buckets_; + } +} + +inline void MemoryRegionMap::InsertRegionLocked(const Region& region) { + RAW_CHECK(LockIsHeld(), "should be held (by this thread)"); + // We can be called recursively, because RegionSet constructor + // and DoInsertRegionLocked() (called below) can call the allocator. + // recursive_insert tells us if that's the case. When this happens, + // region insertion information is recorded in saved_regions[], + // and taken into account when the recursion unwinds. + // Do the insert: + if (recursive_insert) { // recursion: save in saved_regions + RAW_VLOG(12, "Saving recursive insert of region %p..%p from %p", + reinterpret_cast<void*>(region.start_addr), + reinterpret_cast<void*>(region.end_addr), + reinterpret_cast<void*>(region.caller())); + RAW_CHECK(saved_regions_count < arraysize(saved_regions), ""); + // Copy 'region' to saved_regions[saved_regions_count] + // together with the contents of its call_stack, + // then increment saved_regions_count. + saved_regions[saved_regions_count++] = region; + } else { // not a recusrive call + if (regions_ == NULL) { // init regions_ + RAW_VLOG(12, "Initializing region set"); + regions_ = regions_rep.region_set(); + recursive_insert = true; + new(regions_) RegionSet(); + HandleSavedRegionsLocked(&DoInsertRegionLocked); + recursive_insert = false; + } + recursive_insert = true; + // Do the actual insertion work to put new regions into regions_: + DoInsertRegionLocked(region); + HandleSavedRegionsLocked(&DoInsertRegionLocked); + recursive_insert = false; + } +} + +// We strip out different number of stack frames in debug mode +// because less inlining happens in that case +#ifdef NDEBUG +static const int kStripFrames = 1; +#else +static const int kStripFrames = 3; +#endif + +void MemoryRegionMap::RecordRegionAddition(const void* start, size_t size) { + // Record start/end info about this memory acquisition call in a new region: + Region region; + region.Create(start, size); + // First get the call stack info into the local varible 'region': + int depth = 0; + // NOTE: libunwind also does mmap and very much likely while holding + // it's own lock(s). So some threads may first take libunwind lock, + // and then take region map lock (necessary to record mmap done from + // inside libunwind). On the other hand other thread(s) may do + // normal mmap. Which would call this method to record it. Which + // would then proceed with installing that record to region map + // while holding region map lock. That may cause mmap from our own + // internal allocators, so attempt to unwind in this case may cause + // reverse order of taking libuwind and region map locks. Which is + // obvious deadlock. + // + // Thankfully, we can easily detect if we're holding region map lock + // and avoid recording backtrace in this (rare and largely + // irrelevant) case. By doing this we "declare" that thread needing + // both locks must take region map lock last. In other words we do + // not allow taking libuwind lock when we already have region map + // lock. Note, this is generally impossible when somebody tries to + // mix cpu profiling and heap checking/profiling, because cpu + // profiler grabs backtraces at arbitrary places. But at least such + // combination is rarer and less relevant. + if (max_stack_depth_ > 0 && !LockIsHeld()) { + depth = MallocHook::GetCallerStackTrace(const_cast<void**>(region.call_stack), + max_stack_depth_, kStripFrames + 1); + } + region.set_call_stack_depth(depth); // record stack info fully + RAW_VLOG(10, "New global region %p..%p from %p", + reinterpret_cast<void*>(region.start_addr), + reinterpret_cast<void*>(region.end_addr), + reinterpret_cast<void*>(region.caller())); + // Note: none of the above allocates memory. + Lock(); // recursively lock + map_size_ += size; + InsertRegionLocked(region); + // This will (eventually) allocate storage for and copy over the stack data + // from region.call_stack_data_ that is pointed by region.call_stack(). + if (bucket_table_ != NULL) { + HeapProfileBucket* b = GetBucket(depth, region.call_stack); + ++b->allocs; + b->alloc_size += size; + if (!recursive_insert) { + recursive_insert = true; + RestoreSavedBucketsLocked(); + recursive_insert = false; + } + } + Unlock(); +} + +void MemoryRegionMap::RecordRegionRemoval(const void* start, size_t size) { + Lock(); + if (recursive_insert) { + // First remove the removed region from saved_regions, if it's + // there, to prevent overrunning saved_regions in recursive + // map/unmap call sequences, and also from later inserting regions + // which have already been unmapped. + uintptr_t start_addr = reinterpret_cast<uintptr_t>(start); + uintptr_t end_addr = start_addr + size; + int put_pos = 0; + int old_count = saved_regions_count; + for (int i = 0; i < old_count; ++i, ++put_pos) { + Region& r = saved_regions[i]; + if (r.start_addr == start_addr && r.end_addr == end_addr) { + // An exact match, so it's safe to remove. + RecordRegionRemovalInBucket(r.call_stack_depth, r.call_stack, size); + --saved_regions_count; + --put_pos; + RAW_VLOG(10, ("Insta-Removing saved region %p..%p; " + "now have %d saved regions"), + reinterpret_cast<void*>(start_addr), + reinterpret_cast<void*>(end_addr), + saved_regions_count); + } else { + if (put_pos < i) { + saved_regions[put_pos] = saved_regions[i]; + } + } + } + } + if (regions_ == NULL) { // We must have just unset the hooks, + // but this thread was already inside the hook. + Unlock(); + return; + } + if (!recursive_insert) { + HandleSavedRegionsLocked(&InsertRegionLocked); + } + // first handle adding saved regions if any + uintptr_t start_addr = reinterpret_cast<uintptr_t>(start); + uintptr_t end_addr = start_addr + size; + // subtract start_addr, end_addr from all the regions + RAW_VLOG(10, "Removing global region %p..%p; have %" PRIuS " regions", + reinterpret_cast<void*>(start_addr), + reinterpret_cast<void*>(end_addr), + regions_->size()); + Region sample; + sample.SetRegionSetKey(start_addr); + // Only iterate over the regions that might overlap start_addr..end_addr: + for (RegionSet::iterator region = regions_->lower_bound(sample); + region != regions_->end() && region->start_addr < end_addr; + /*noop*/) { + RAW_VLOG(13, "Looking at region %p..%p", + reinterpret_cast<void*>(region->start_addr), + reinterpret_cast<void*>(region->end_addr)); + if (start_addr <= region->start_addr && + region->end_addr <= end_addr) { // full deletion + RAW_VLOG(12, "Deleting region %p..%p", + reinterpret_cast<void*>(region->start_addr), + reinterpret_cast<void*>(region->end_addr)); + RecordRegionRemovalInBucket(region->call_stack_depth, region->call_stack, + region->end_addr - region->start_addr); + RegionSet::iterator d = region; + ++region; + regions_->erase(d); + continue; + } else if (region->start_addr < start_addr && + end_addr < region->end_addr) { // cutting-out split + RAW_VLOG(12, "Splitting region %p..%p in two", + reinterpret_cast<void*>(region->start_addr), + reinterpret_cast<void*>(region->end_addr)); + RecordRegionRemovalInBucket(region->call_stack_depth, region->call_stack, + end_addr - start_addr); + // Make another region for the start portion: + // The new region has to be the start portion because we can't + // just modify region->end_addr as it's the sorting key. + Region r = *region; + r.set_end_addr(start_addr); + InsertRegionLocked(r); + // cut *region from start: + const_cast<Region&>(*region).set_start_addr(end_addr); + } else if (end_addr > region->start_addr && + start_addr <= region->start_addr) { // cut from start + RAW_VLOG(12, "Start-chopping region %p..%p", + reinterpret_cast<void*>(region->start_addr), + reinterpret_cast<void*>(region->end_addr)); + RecordRegionRemovalInBucket(region->call_stack_depth, region->call_stack, + end_addr - region->start_addr); + const_cast<Region&>(*region).set_start_addr(end_addr); + } else if (start_addr > region->start_addr && + start_addr < region->end_addr) { // cut from end + RAW_VLOG(12, "End-chopping region %p..%p", + reinterpret_cast<void*>(region->start_addr), + reinterpret_cast<void*>(region->end_addr)); + RecordRegionRemovalInBucket(region->call_stack_depth, region->call_stack, + region->end_addr - start_addr); + // Can't just modify region->end_addr (it's the sorting key): + Region r = *region; + r.set_end_addr(start_addr); + RegionSet::iterator d = region; + ++region; + // It's safe to erase before inserting since r is independent of *d: + // r contains an own copy of the call stack: + regions_->erase(d); + InsertRegionLocked(r); + continue; + } + ++region; + } + RAW_VLOG(12, "Removed region %p..%p; have %" PRIuS " regions", + reinterpret_cast<void*>(start_addr), + reinterpret_cast<void*>(end_addr), + regions_->size()); + if (VLOG_IS_ON(12)) LogAllLocked(); + unmap_size_ += size; + Unlock(); +} + +void MemoryRegionMap::RecordRegionRemovalInBucket(int depth, + const void* const stack[], + size_t size) { + RAW_CHECK(LockIsHeld(), "should be held (by this thread)"); + if (bucket_table_ == NULL) return; + HeapProfileBucket* b = GetBucket(depth, stack); + ++b->frees; + b->free_size += size; +} + +void MemoryRegionMap::MmapHook(const void* result, + const void* start, size_t size, + int prot, int flags, + int fd, off_t offset) { + // TODO(maxim): replace all 0x%" PRIxS " by %p when RAW_VLOG uses a safe + // snprintf reimplementation that does not malloc to pretty-print NULL + RAW_VLOG(10, "MMap = 0x%" PRIxPTR " of %" PRIuS " at %" PRIu64 " " + "prot %d flags %d fd %d offs %" PRId64, + reinterpret_cast<uintptr_t>(result), size, + reinterpret_cast<uint64>(start), prot, flags, fd, + static_cast<int64>(offset)); + if (result != reinterpret_cast<void*>(MAP_FAILED) && size != 0) { + RecordRegionAddition(result, size); + } +} + +void MemoryRegionMap::MunmapHook(const void* ptr, size_t size) { + RAW_VLOG(10, "MUnmap of %p %" PRIuS "", ptr, size); + if (size != 0) { + RecordRegionRemoval(ptr, size); + } +} + +void MemoryRegionMap::MremapHook(const void* result, + const void* old_addr, size_t old_size, + size_t new_size, int flags, + const void* new_addr) { + RAW_VLOG(10, "MRemap = 0x%" PRIxPTR " of 0x%" PRIxPTR " %" PRIuS " " + "to %" PRIuS " flags %d new_addr=0x%" PRIxPTR, + (uintptr_t)result, (uintptr_t)old_addr, + old_size, new_size, flags, + flags & MREMAP_FIXED ? (uintptr_t)new_addr : 0); + if (result != reinterpret_cast<void*>(-1)) { + RecordRegionRemoval(old_addr, old_size); + RecordRegionAddition(result, new_size); + } +} + +void MemoryRegionMap::SbrkHook(const void* result, ptrdiff_t increment) { + RAW_VLOG(10, "Sbrk = 0x%" PRIxPTR " of %" PRIdS "", (uintptr_t)result, increment); + if (result != reinterpret_cast<void*>(-1)) { + if (increment > 0) { + void* new_end = sbrk(0); + RecordRegionAddition(result, reinterpret_cast<uintptr_t>(new_end) - + reinterpret_cast<uintptr_t>(result)); + } else if (increment < 0) { + void* new_end = sbrk(0); + RecordRegionRemoval(new_end, reinterpret_cast<uintptr_t>(result) - + reinterpret_cast<uintptr_t>(new_end)); + } + } +} + +void MemoryRegionMap::LogAllLocked() { + RAW_CHECK(LockIsHeld(), "should be held (by this thread)"); + RAW_LOG(INFO, "List of regions:"); + uintptr_t previous = 0; + for (RegionSet::const_iterator r = regions_->begin(); + r != regions_->end(); ++r) { + RAW_LOG(INFO, "Memory region 0x%" PRIxPTR "..0x%" PRIxPTR " " + "from 0x%" PRIxPTR " stack=%d", + r->start_addr, r->end_addr, r->caller(), r->is_stack); + RAW_CHECK(previous < r->end_addr, "wow, we messed up the set order"); + // this must be caused by uncontrolled recursive operations on regions_ + previous = r->end_addr; + } + RAW_LOG(INFO, "End of regions list"); +} diff --git a/src/third_party/gperftools-2.5/src/memory_region_map.h b/src/third_party/gperftools-2.5/src/memory_region_map.h new file mode 100644 index 00000000000..ec388e1cc54 --- /dev/null +++ b/src/third_party/gperftools-2.5/src/memory_region_map.h @@ -0,0 +1,413 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +/* Copyright (c) 2006, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: Maxim Lifantsev + */ + +#ifndef BASE_MEMORY_REGION_MAP_H_ +#define BASE_MEMORY_REGION_MAP_H_ + +#include <config.h> + +#ifdef HAVE_PTHREAD +#include <pthread.h> +#endif +#include <stddef.h> +#include <set> +#include "base/stl_allocator.h" +#include "base/spinlock.h" +#include "base/thread_annotations.h" +#include "base/low_level_alloc.h" +#include "heap-profile-stats.h" + +// TODO(maxim): add a unittest: +// execute a bunch of mmaps and compare memory map what strace logs +// execute a bunch of mmap/munmup and compare memory map with +// own accounting of what those mmaps generated + +// Thread-safe class to collect and query the map of all memory regions +// in a process that have been created with mmap, munmap, mremap, sbrk. +// For each memory region, we keep track of (and provide to users) +// the stack trace that allocated that memory region. +// The recorded stack trace depth is bounded by +// a user-supplied max_stack_depth parameter of Init(). +// After initialization with Init() +// (which can happened even before global object constructor execution) +// we collect the map by installing and monitoring MallocHook-s +// to mmap, munmap, mremap, sbrk. +// At any time one can query this map via provided interface. +// For more details on the design of MemoryRegionMap +// see the comment at the top of our .cc file. +class MemoryRegionMap { + private: + // Max call stack recording depth supported by Init(). Set it to be + // high enough for all our clients. Note: we do not define storage + // for this (doing that requires special handling in windows), so + // don't take the address of it! + static const int kMaxStackDepth = 32; + + // Size of the hash table of buckets. A structure of the bucket table is + // described in heap-profile-stats.h. + static const int kHashTableSize = 179999; + + public: + // interface ================================================================ + + // Every client of MemoryRegionMap must call Init() before first use, + // and Shutdown() after last use. This allows us to reference count + // this (singleton) class properly. MemoryRegionMap assumes it's the + // only client of MallocHooks, so a client can only register other + // MallocHooks after calling Init() and must unregister them before + // calling Shutdown(). + + // Initialize this module to record memory allocation stack traces. + // Stack traces that have more than "max_stack_depth" frames + // are automatically shrunk to "max_stack_depth" when they are recorded. + // Init() can be called more than once w/o harm, largest max_stack_depth + // will be the effective one. + // When "use_buckets" is true, then counts of mmap and munmap sizes will be + // recorded with each stack trace. If Init() is called more than once, then + // counting will be effective after any call contained "use_buckets" of true. + // It will install mmap, munmap, mremap, sbrk hooks + // and initialize arena_ and our hook and locks, hence one can use + // MemoryRegionMap::Lock()/Unlock() to manage the locks. + // Uses Lock/Unlock inside. + static void Init(int max_stack_depth, bool use_buckets); + + // Try to shutdown this module undoing what Init() did. + // Returns true iff could do full shutdown (or it was not attempted). + // Full shutdown is attempted when the number of Shutdown() calls equals + // the number of Init() calls. + static bool Shutdown(); + + // Return true if MemoryRegionMap is initialized and recording, i.e. when + // then number of Init() calls are more than the number of Shutdown() calls. + static bool IsRecordingLocked(); + + // Locks to protect our internal data structures. + // These also protect use of arena_ if our Init() has been done. + // The lock is recursive. + static void Lock() EXCLUSIVE_LOCK_FUNCTION(lock_); + static void Unlock() UNLOCK_FUNCTION(lock_); + + // Returns true when the lock is held by this thread (for use in RAW_CHECK-s). + static bool LockIsHeld(); + + // Locker object that acquires the MemoryRegionMap::Lock + // for the duration of its lifetime (a C++ scope). + class LockHolder { + public: + LockHolder() { Lock(); } + ~LockHolder() { Unlock(); } + private: + DISALLOW_COPY_AND_ASSIGN(LockHolder); + }; + + // A memory region that we know about through malloc_hook-s. + // This is essentially an interface through which MemoryRegionMap + // exports the collected data to its clients. Thread-compatible. + struct Region { + uintptr_t start_addr; // region start address + uintptr_t end_addr; // region end address + int call_stack_depth; // number of caller stack frames that we saved + const void* call_stack[kMaxStackDepth]; // caller address stack array + // filled to call_stack_depth size + bool is_stack; // does this region contain a thread's stack: + // a user of MemoryRegionMap supplies this info + + // Convenience accessor for call_stack[0], + // i.e. (the program counter of) the immediate caller + // of this region's allocation function, + // but it also returns NULL when call_stack_depth is 0, + // i.e whe we weren't able to get the call stack. + // This usually happens in recursive calls, when the stack-unwinder + // calls mmap() which in turn calls the stack-unwinder. + uintptr_t caller() const { + return reinterpret_cast<uintptr_t>(call_stack_depth >= 1 + ? call_stack[0] : NULL); + } + + // Return true iff this region overlaps region x. + bool Overlaps(const Region& x) const { + return start_addr < x.end_addr && end_addr > x.start_addr; + } + + private: // helpers for MemoryRegionMap + friend class MemoryRegionMap; + + // The ways we create Region-s: + void Create(const void* start, size_t size) { + start_addr = reinterpret_cast<uintptr_t>(start); + end_addr = start_addr + size; + is_stack = false; // not a stack till marked such + call_stack_depth = 0; + AssertIsConsistent(); + } + void set_call_stack_depth(int depth) { + RAW_DCHECK(call_stack_depth == 0, ""); // only one such set is allowed + call_stack_depth = depth; + AssertIsConsistent(); + } + + // The ways we modify Region-s: + void set_is_stack() { is_stack = true; } + void set_start_addr(uintptr_t addr) { + start_addr = addr; + AssertIsConsistent(); + } + void set_end_addr(uintptr_t addr) { + end_addr = addr; + AssertIsConsistent(); + } + + // Verifies that *this contains consistent data, crashes if not the case. + void AssertIsConsistent() const { + RAW_DCHECK(start_addr < end_addr, ""); + RAW_DCHECK(call_stack_depth >= 0 && + call_stack_depth <= kMaxStackDepth, ""); + } + + // Post-default construction helper to make a Region suitable + // for searching in RegionSet regions_. + void SetRegionSetKey(uintptr_t addr) { + // make sure *this has no usable data: + if (DEBUG_MODE) memset(this, 0xFF, sizeof(*this)); + end_addr = addr; + } + + // Note: call_stack[kMaxStackDepth] as a member lets us make Region + // a simple self-contained struct with correctly behaving bit-vise copying. + // This simplifies the code of this module but wastes some memory: + // in most-often use case of this module (leak checking) + // only one call_stack element out of kMaxStackDepth is actually needed. + // Making the storage for call_stack variable-sized, + // substantially complicates memory management for the Region-s: + // as they need to be created and manipulated for some time + // w/o any memory allocations, yet are also given out to the users. + }; + + // Find the region that covers addr and write its data into *result if found, + // in which case *result gets filled so that it stays fully functional + // even when the underlying region gets removed from MemoryRegionMap. + // Returns success. Uses Lock/Unlock inside. + static bool FindRegion(uintptr_t addr, Region* result); + + // Find the region that contains stack_top, mark that region as + // a stack region, and write its data into *result if found, + // in which case *result gets filled so that it stays fully functional + // even when the underlying region gets removed from MemoryRegionMap. + // Returns success. Uses Lock/Unlock inside. + static bool FindAndMarkStackRegion(uintptr_t stack_top, Region* result); + + // Iterate over the buckets which store mmap and munmap counts per stack + // trace. It calls "callback" for each bucket, and passes "arg" to it. + template<class Type> + static void IterateBuckets(void (*callback)(const HeapProfileBucket*, Type), + Type arg); + + // Get the bucket whose caller stack trace is "key". The stack trace is + // used to a depth of "depth" at most. The requested bucket is created if + // needed. + // The bucket table is described in heap-profile-stats.h. + static HeapProfileBucket* GetBucket(int depth, const void* const key[]); + + private: // our internal types ============================================== + + // Region comparator for sorting with STL + struct RegionCmp { + bool operator()(const Region& x, const Region& y) const { + return x.end_addr < y.end_addr; + } + }; + + // We allocate STL objects in our own arena. + struct MyAllocator { + static void *Allocate(size_t n) { + return LowLevelAlloc::AllocWithArena(n, arena_); + } + static void Free(const void *p, size_t /* n */) { + LowLevelAlloc::Free(const_cast<void*>(p)); + } + }; + + // Set of the memory regions + typedef std::set<Region, RegionCmp, + STL_Allocator<Region, MyAllocator> > RegionSet; + + public: // more in-depth interface ========================================== + + // STL iterator with values of Region + typedef RegionSet::const_iterator RegionIterator; + + // Return the begin/end iterators to all the regions. + // These need Lock/Unlock protection around their whole usage (loop). + // Even when the same thread causes modifications during such a loop + // (which are permitted due to recursive locking) + // the loop iterator will still be valid as long as its region + // has not been deleted, but EndRegionLocked should be + // re-evaluated whenever the set of regions has changed. + static RegionIterator BeginRegionLocked(); + static RegionIterator EndRegionLocked(); + + // Return the accumulated sizes of mapped and unmapped regions. + static int64 MapSize() { return map_size_; } + static int64 UnmapSize() { return unmap_size_; } + + // Effectively private type from our .cc ================================= + // public to let us declare global objects: + union RegionSetRep; + + private: + // representation =========================================================== + + // Counter of clients of this module that have called Init(). + static int client_count_; + + // Maximal number of caller stack frames to save (>= 0). + static int max_stack_depth_; + + // Arena used for our allocations in regions_. + static LowLevelAlloc::Arena* arena_; + + // Set of the mmap/sbrk/mremap-ed memory regions + // To be accessed *only* when Lock() is held. + // Hence we protect the non-recursive lock used inside of arena_ + // with our recursive Lock(). This lets a user prevent deadlocks + // when threads are stopped by TCMalloc_ListAllProcessThreads at random spots + // simply by acquiring our recursive Lock() before that. + static RegionSet* regions_; + + // Lock to protect regions_ and buckets_ variables and the data behind. + static SpinLock lock_; + // Lock to protect the recursive lock itself. + static SpinLock owner_lock_; + + // Recursion count for the recursive lock. + static int recursion_count_; + // The thread id of the thread that's inside the recursive lock. + static pthread_t lock_owner_tid_; + + // Total size of all mapped pages so far + static int64 map_size_; + // Total size of all unmapped pages so far + static int64 unmap_size_; + + // Bucket hash table which is described in heap-profile-stats.h. + static HeapProfileBucket** bucket_table_ GUARDED_BY(lock_); + static int num_buckets_ GUARDED_BY(lock_); + + // The following members are local to MemoryRegionMap::GetBucket() + // and MemoryRegionMap::HandleSavedBucketsLocked() + // and are file-level to ensure that they are initialized at load time. + // + // These are used as temporary storage to break the infinite cycle of mmap + // calling our hook which (sometimes) causes mmap. It must be a static + // fixed-size array. The size 20 is just an expected value for safety. + // The details are described in memory_region_map.cc. + + // Number of unprocessed bucket inserts. + static int saved_buckets_count_ GUARDED_BY(lock_); + + // Unprocessed inserts (must be big enough to hold all mmaps that can be + // caused by a GetBucket call). + // Bucket has no constructor, so that c-tor execution does not interfere + // with the any-time use of the static memory behind saved_buckets. + static HeapProfileBucket saved_buckets_[20] GUARDED_BY(lock_); + + static const void* saved_buckets_keys_[20][kMaxStackDepth] GUARDED_BY(lock_); + + // helpers ================================================================== + + // Helper for FindRegion and FindAndMarkStackRegion: + // returns the region covering 'addr' or NULL; assumes our lock_ is held. + static const Region* DoFindRegionLocked(uintptr_t addr); + + // Verifying wrapper around regions_->insert(region) + // To be called to do InsertRegionLocked's work only! + inline static void DoInsertRegionLocked(const Region& region); + // Handle regions saved by InsertRegionLocked into a tmp static array + // by calling insert_func on them. + inline static void HandleSavedRegionsLocked( + void (*insert_func)(const Region& region)); + + // Restore buckets saved in a tmp static array by GetBucket to the bucket + // table where all buckets eventually should be. + static void RestoreSavedBucketsLocked(); + + // Wrapper around DoInsertRegionLocked + // that handles the case of recursive allocator calls. + inline static void InsertRegionLocked(const Region& region); + + // Record addition of a memory region at address "start" of size "size" + // (called from our mmap/mremap/sbrk hooks). + static void RecordRegionAddition(const void* start, size_t size); + // Record deletion of a memory region at address "start" of size "size" + // (called from our munmap/mremap/sbrk hooks). + static void RecordRegionRemoval(const void* start, size_t size); + + // Record deletion of a memory region of size "size" in a bucket whose + // caller stack trace is "key". The stack trace is used to a depth of + // "depth" at most. + static void RecordRegionRemovalInBucket(int depth, + const void* const key[], + size_t size); + + // Hooks for MallocHook + static void MmapHook(const void* result, + const void* start, size_t size, + int prot, int flags, + int fd, off_t offset); + static void MunmapHook(const void* ptr, size_t size); + static void MremapHook(const void* result, const void* old_addr, + size_t old_size, size_t new_size, int flags, + const void* new_addr); + static void SbrkHook(const void* result, ptrdiff_t increment); + + // Log all memory regions; Useful for debugging only. + // Assumes Lock() is held + static void LogAllLocked(); + + DISALLOW_COPY_AND_ASSIGN(MemoryRegionMap); +}; + +template <class Type> +void MemoryRegionMap::IterateBuckets( + void (*callback)(const HeapProfileBucket*, Type), Type callback_arg) { + for (int index = 0; index < kHashTableSize; index++) { + for (HeapProfileBucket* bucket = bucket_table_[index]; + bucket != NULL; + bucket = bucket->next) { + callback(bucket, callback_arg); + } + } +} + +#endif // BASE_MEMORY_REGION_MAP_H_ diff --git a/src/third_party/gperftools-2.5/src/packed-cache-inl.h b/src/third_party/gperftools-2.5/src/packed-cache-inl.h new file mode 100644 index 00000000000..09462608ece --- /dev/null +++ b/src/third_party/gperftools-2.5/src/packed-cache-inl.h @@ -0,0 +1,239 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2007, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Geoff Pike +// +// This file provides a minimal cache that can hold a <key, value> pair +// with little if any wasted space. The types of the key and value +// must be unsigned integral types or at least have unsigned semantics +// for >>, casting, and similar operations. +// +// Synchronization is not provided. However, the cache is implemented +// as an array of cache entries whose type is chosen at compile time. +// If a[i] is atomic on your hardware for the chosen array type then +// raciness will not necessarily lead to bugginess. The cache entries +// must be large enough to hold a partial key and a value packed +// together. The partial keys are bit strings of length +// kKeybits - kHashbits, and the values are bit strings of length kValuebits. +// +// In an effort to use minimal space, every cache entry represents +// some <key, value> pair; the class provides no way to mark a cache +// entry as empty or uninitialized. In practice, you may want to have +// reserved keys or values to get around this limitation. For example, in +// tcmalloc's PageID-to-sizeclass cache, a value of 0 is used as +// "unknown sizeclass." +// +// Usage Considerations +// -------------------- +// +// kHashbits controls the size of the cache. The best value for +// kHashbits will of course depend on the application. Perhaps try +// tuning the value of kHashbits by measuring different values on your +// favorite benchmark. Also remember not to be a pig; other +// programs that need resources may suffer if you are. +// +// The main uses for this class will be when performance is +// critical and there's a convenient type to hold the cache's +// entries. As described above, the number of bits required +// for a cache entry is (kKeybits - kHashbits) + kValuebits. Suppose +// kKeybits + kValuebits is 43. Then it probably makes sense to +// chose kHashbits >= 11 so that cache entries fit in a uint32. +// +// On the other hand, suppose kKeybits = kValuebits = 64. Then +// using this class may be less worthwhile. You'll probably +// be using 128 bits for each entry anyway, so maybe just pick +// a hash function, H, and use an array indexed by H(key): +// void Put(K key, V value) { a_[H(key)] = pair<K, V>(key, value); } +// V GetOrDefault(K key, V default) { const pair<K, V> &p = a_[H(key)]; ... } +// etc. +// +// Further Details +// --------------- +// +// For caches used only by one thread, the following is true: +// 1. For a cache c, +// (c.Put(key, value), c.GetOrDefault(key, 0)) == value +// and +// (c.Put(key, value), <...>, c.GetOrDefault(key, 0)) == value +// if the elided code contains no c.Put calls. +// +// 2. Has(key) will return false if no <key, value> pair with that key +// has ever been Put. However, a newly initialized cache will have +// some <key, value> pairs already present. When you create a new +// cache, you must specify an "initial value." The initialization +// procedure is equivalent to Clear(initial_value), which is +// equivalent to Put(k, initial_value) for all keys k from 0 to +// 2^kHashbits - 1. +// +// 3. If key and key' differ then the only way Put(key, value) may +// cause Has(key') to change is that Has(key') may change from true to +// false. Furthermore, a Put() call that doesn't change Has(key') +// doesn't change GetOrDefault(key', ...) either. +// +// Implementation details: +// +// This is a direct-mapped cache with 2^kHashbits entries; the hash +// function simply takes the low bits of the key. We store whole keys +// if a whole key plus a whole value fits in an entry. Otherwise, an +// entry is the high bits of a key and a value, packed together. +// E.g., a 20 bit key and a 7 bit value only require a uint16 for each +// entry if kHashbits >= 11. +// +// Alternatives to this scheme will be added as needed. + +#ifndef TCMALLOC_PACKED_CACHE_INL_H_ +#define TCMALLOC_PACKED_CACHE_INL_H_ + +#include "config.h" +#include <stddef.h> // for size_t +#ifdef HAVE_STDINT_H +#include <stdint.h> // for uintptr_t +#endif +#include "base/basictypes.h" +#include "internal_logging.h" + +// A safe way of doing "(1 << n) - 1" -- without worrying about overflow +// Note this will all be resolved to a constant expression at compile-time +#define N_ONES_(IntType, N) \ + ( (N) == 0 ? 0 : ((static_cast<IntType>(1) << ((N)-1))-1 + \ + (static_cast<IntType>(1) << ((N)-1))) ) + +// The types K and V provide upper bounds on the number of valid keys +// and values, but we explicitly require the keys to be less than +// 2^kKeybits and the values to be less than 2^kValuebits. The size of +// the table is controlled by kHashbits, and the type of each entry in +// the cache is T. See also the big comment at the top of the file. +template <int kKeybits, typename T> +class PackedCache { + public: + typedef uintptr_t K; + typedef size_t V; +#ifdef TCMALLOC_SMALL_BUT_SLOW + // Decrease the size map cache if running in the small memory mode. + static const int kHashbits = 12; +#else + static const int kHashbits = 16; +#endif + static const int kValuebits = 7; + static const bool kUseWholeKeys = kKeybits + kValuebits <= 8 * sizeof(T); + + explicit PackedCache(V initial_value) { + COMPILE_ASSERT(kKeybits <= sizeof(K) * 8, key_size); + COMPILE_ASSERT(kValuebits <= sizeof(V) * 8, value_size); + COMPILE_ASSERT(kHashbits <= kKeybits, hash_function); + COMPILE_ASSERT(kKeybits - kHashbits + kValuebits <= kTbits, + entry_size_must_be_big_enough); + Clear(initial_value); + } + + void Put(K key, V value) { + ASSERT(key == (key & kKeyMask)); + ASSERT(value == (value & kValueMask)); + array_[Hash(key)] = KeyToUpper(key) | value; + } + + bool Has(K key) const { + ASSERT(key == (key & kKeyMask)); + return KeyMatch(array_[Hash(key)], key); + } + + V GetOrDefault(K key, V default_value) const { + // As with other code in this class, we touch array_ as few times + // as we can. Assuming entries are read atomically (e.g., their + // type is uintptr_t on most hardware) then certain races are + // harmless. + ASSERT(key == (key & kKeyMask)); + T entry = array_[Hash(key)]; + return KeyMatch(entry, key) ? EntryToValue(entry) : default_value; + } + + void Clear(V value) { + ASSERT(value == (value & kValueMask)); + for (int i = 0; i < 1 << kHashbits; i++) { + ASSERT(kUseWholeKeys || KeyToUpper(i) == 0); + array_[i] = kUseWholeKeys ? (value | KeyToUpper(i)) : value; + } + } + + private: + // We are going to pack a value and the upper part of a key (or a + // whole key) into an entry of type T. The UPPER type is for the + // upper part of a key, after the key has been masked and shifted + // for inclusion in an entry. + typedef T UPPER; + + static V EntryToValue(T t) { return t & kValueMask; } + + // If we have space for a whole key, we just shift it left. + // Otherwise kHashbits determines where in a K to find the upper + // part of the key, and kValuebits determines where in the entry to + // put it. + static UPPER KeyToUpper(K k) { + if (kUseWholeKeys) { + return static_cast<T>(k) << kValuebits; + } else { + const int shift = kHashbits - kValuebits; + // Assume kHashbits >= kValuebits. It'd be easy to lift this assumption. + return static_cast<T>(k >> shift) & kUpperMask; + } + } + + static size_t Hash(K key) { + return static_cast<size_t>(key) & N_ONES_(size_t, kHashbits); + } + + // Does the entry match the relevant part of the given key? + static bool KeyMatch(T entry, K key) { + return kUseWholeKeys ? + (entry >> kValuebits == key) : + ((KeyToUpper(key) ^ entry) & kUpperMask) == 0; + } + + static const int kTbits = 8 * sizeof(T); + static const int kUpperbits = kUseWholeKeys ? kKeybits : kKeybits - kHashbits; + + // For masking a K. + static const K kKeyMask = N_ONES_(K, kKeybits); + + // For masking a T. + static const T kUpperMask = N_ONES_(T, kUpperbits) << kValuebits; + + // For masking a V or a T. + static const V kValueMask = N_ONES_(V, kValuebits); + + // array_ is the cache. Its elements are volatile because any + // thread can write any array element at any time. + volatile T array_[1 << kHashbits]; +}; + +#undef N_ONES_ + +#endif // TCMALLOC_PACKED_CACHE_INL_H_ diff --git a/src/third_party/gperftools-2.5/src/page_heap.cc b/src/third_party/gperftools-2.5/src/page_heap.cc new file mode 100644 index 00000000000..f52ae2af029 --- /dev/null +++ b/src/third_party/gperftools-2.5/src/page_heap.cc @@ -0,0 +1,682 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Sanjay Ghemawat <opensource@google.com> + +#include <config.h> +#ifdef HAVE_INTTYPES_H +#include <inttypes.h> // for PRIuPTR +#endif +#include <errno.h> // for ENOMEM, errno +#include <gperftools/malloc_extension.h> // for MallocRange, etc +#include "base/basictypes.h" +#include "base/commandlineflags.h" +#include "internal_logging.h" // for ASSERT, TCMalloc_Printer, etc +#include "page_heap_allocator.h" // for PageHeapAllocator +#include "static_vars.h" // for Static +#include "system-alloc.h" // for TCMalloc_SystemAlloc, etc + +DEFINE_double(tcmalloc_release_rate, + EnvToDouble("TCMALLOC_RELEASE_RATE", 1.0), + "Rate at which we release unused memory to the system. " + "Zero means we never release memory back to the system. " + "Increase this flag to return memory faster; decrease it " + "to return memory slower. Reasonable rates are in the " + "range [0,10]"); + +DEFINE_int64(tcmalloc_heap_limit_mb, + EnvToInt("TCMALLOC_HEAP_LIMIT_MB", 0), + "Limit total size of the process heap to the " + "specified number of MiB. " + "When we approach the limit the memory is released " + "to the system more aggressively (more minor page faults). " + "Zero means to allocate as long as system allows."); + +namespace tcmalloc { + +PageHeap::PageHeap() + : pagemap_(MetaDataAlloc), + pagemap_cache_(0), + scavenge_counter_(0), + // Start scavenging at kMaxPages list + release_index_(kMaxPages), + aggressive_decommit_(false) { + COMPILE_ASSERT(kNumClasses <= (1 << PageMapCache::kValuebits), valuebits); + DLL_Init(&large_.normal); + DLL_Init(&large_.returned); + for (int i = 0; i < kMaxPages; i++) { + DLL_Init(&free_[i].normal); + DLL_Init(&free_[i].returned); + } +} + +Span* PageHeap::SearchFreeAndLargeLists(Length n) { + ASSERT(Check()); + ASSERT(n > 0); + + // Find first size >= n that has a non-empty list + for (Length s = n; s < kMaxPages; s++) { + Span* ll = &free_[s].normal; + // If we're lucky, ll is non-empty, meaning it has a suitable span. + if (!DLL_IsEmpty(ll)) { + ASSERT(ll->next->location == Span::ON_NORMAL_FREELIST); + return Carve(ll->next, n); + } + // Alternatively, maybe there's a usable returned span. + ll = &free_[s].returned; + if (!DLL_IsEmpty(ll)) { + // We did not call EnsureLimit before, to avoid releasing the span + // that will be taken immediately back. + // Calling EnsureLimit here is not very expensive, as it fails only if + // there is no more normal spans (and it fails efficiently) + // or SystemRelease does not work (there is probably no returned spans). + if (EnsureLimit(n)) { + // ll may have became empty due to coalescing + if (!DLL_IsEmpty(ll)) { + ASSERT(ll->next->location == Span::ON_RETURNED_FREELIST); + return Carve(ll->next, n); + } + } + } + } + // No luck in free lists, our last chance is in a larger class. + return AllocLarge(n); // May be NULL +} + +static const size_t kForcedCoalesceInterval = 128*1024*1024; + +Span* PageHeap::New(Length n) { + ASSERT(Check()); + ASSERT(n > 0); + + Span* result = SearchFreeAndLargeLists(n); + if (result != NULL) + return result; + + if (stats_.free_bytes != 0 && stats_.unmapped_bytes != 0 + && stats_.free_bytes + stats_.unmapped_bytes >= stats_.system_bytes / 4 + && (stats_.system_bytes / kForcedCoalesceInterval + != (stats_.system_bytes + (n << kPageShift)) / kForcedCoalesceInterval)) { + // We're about to grow heap, but there are lots of free pages. + // tcmalloc's design decision to keep unmapped and free spans + // separately and never coalesce them means that sometimes there + // can be free pages span of sufficient size, but it consists of + // "segments" of different type so page heap search cannot find + // it. In order to prevent growing heap and wasting memory in such + // case we're going to unmap all free pages. So that all free + // spans are maximally coalesced. + // + // We're also limiting 'rate' of going into this path to be at + // most once per 128 megs of heap growth. Otherwise programs that + // grow heap frequently (and that means by small amount) could be + // penalized with higher count of minor page faults. + // + // See also large_heap_fragmentation_unittest.cc and + // https://code.google.com/p/gperftools/issues/detail?id=368 + ReleaseAtLeastNPages(static_cast<Length>(0x7fffffff)); + + // then try again. If we are forced to grow heap because of large + // spans fragmentation and not because of problem described above, + // then at the very least we've just unmapped free but + // insufficiently big large spans back to OS. So in case of really + // unlucky memory fragmentation we'll be consuming virtual address + // space, but not real memory + result = SearchFreeAndLargeLists(n); + if (result != NULL) return result; + } + + // Grow the heap and try again. + if (!GrowHeap(n)) { + ASSERT(stats_.unmapped_bytes+ stats_.committed_bytes==stats_.system_bytes); + ASSERT(Check()); + // underlying SysAllocator likely set ENOMEM but we can get here + // due to EnsureLimit so we set it here too. + // + // Setting errno to ENOMEM here allows us to avoid dealing with it + // in fast-path. + errno = ENOMEM; + return NULL; + } + return SearchFreeAndLargeLists(n); +} + +Span* PageHeap::AllocLarge(Length n) { + // find the best span (closest to n in size). + // The following loops implements address-ordered best-fit. + Span *best = NULL; + + // Search through normal list + for (Span* span = large_.normal.next; + span != &large_.normal; + span = span->next) { + if (span->length >= n) { + if ((best == NULL) + || (span->length < best->length) + || ((span->length == best->length) && (span->start < best->start))) { + best = span; + ASSERT(best->location == Span::ON_NORMAL_FREELIST); + } + } + } + + Span *bestNormal = best; + + // Search through released list in case it has a better fit + for (Span* span = large_.returned.next; + span != &large_.returned; + span = span->next) { + if (span->length >= n) { + if ((best == NULL) + || (span->length < best->length) + || ((span->length == best->length) && (span->start < best->start))) { + best = span; + ASSERT(best->location == Span::ON_RETURNED_FREELIST); + } + } + } + + if (best == bestNormal) { + return best == NULL ? NULL : Carve(best, n); + } + + // best comes from returned list. + + if (EnsureLimit(n, false)) { + return Carve(best, n); + } + + if (EnsureLimit(n, true)) { + // best could have been destroyed by coalescing. + // bestNormal is not a best-fit, and it could be destroyed as well. + // We retry, the limit is already ensured: + return AllocLarge(n); + } + + // If bestNormal existed, EnsureLimit would succeeded: + ASSERT(bestNormal == NULL); + // We are not allowed to take best from returned list. + return NULL; +} + +Span* PageHeap::Split(Span* span, Length n) { + ASSERT(0 < n); + ASSERT(n < span->length); + ASSERT(span->location == Span::IN_USE); + ASSERT(span->sizeclass == 0); + Event(span, 'T', n); + + const int extra = span->length - n; + Span* leftover = NewSpan(span->start + n, extra); + ASSERT(leftover->location == Span::IN_USE); + Event(leftover, 'U', extra); + RecordSpan(leftover); + pagemap_.set(span->start + n - 1, span); // Update map from pageid to span + span->length = n; + + return leftover; +} + +void PageHeap::CommitSpan(Span* span) { + TCMalloc_SystemCommit(reinterpret_cast<void*>(span->start << kPageShift), + static_cast<size_t>(span->length << kPageShift)); + stats_.committed_bytes += span->length << kPageShift; +} + +bool PageHeap::DecommitSpan(Span* span) { + bool rv = TCMalloc_SystemRelease(reinterpret_cast<void*>(span->start << kPageShift), + static_cast<size_t>(span->length << kPageShift)); + if (rv) { + stats_.committed_bytes -= span->length << kPageShift; + } + + return rv; +} + +Span* PageHeap::Carve(Span* span, Length n) { + ASSERT(n > 0); + ASSERT(span->location != Span::IN_USE); + const int old_location = span->location; + RemoveFromFreeList(span); + span->location = Span::IN_USE; + Event(span, 'A', n); + + const int extra = span->length - n; + ASSERT(extra >= 0); + if (extra > 0) { + Span* leftover = NewSpan(span->start + n, extra); + leftover->location = old_location; + Event(leftover, 'S', extra); + RecordSpan(leftover); + + // The previous span of |leftover| was just splitted -- no need to + // coalesce them. The next span of |leftover| was not previously coalesced + // with |span|, i.e. is NULL or has got location other than |old_location|. +#ifndef NDEBUG + const PageID p = leftover->start; + const Length len = leftover->length; + Span* next = GetDescriptor(p+len); + ASSERT (next == NULL || + next->location == Span::IN_USE || + next->location != leftover->location); +#endif + + PrependToFreeList(leftover); // Skip coalescing - no candidates possible + span->length = n; + pagemap_.set(span->start + n - 1, span); + } + ASSERT(Check()); + if (old_location == Span::ON_RETURNED_FREELIST) { + // We need to recommit this address space. + CommitSpan(span); + } + ASSERT(span->location == Span::IN_USE); + ASSERT(span->length == n); + ASSERT(stats_.unmapped_bytes+ stats_.committed_bytes==stats_.system_bytes); + return span; +} + +void PageHeap::Delete(Span* span) { + ASSERT(Check()); + ASSERT(span->location == Span::IN_USE); + ASSERT(span->length > 0); + ASSERT(GetDescriptor(span->start) == span); + ASSERT(GetDescriptor(span->start + span->length - 1) == span); + const Length n = span->length; + span->sizeclass = 0; + span->sample = 0; + span->location = Span::ON_NORMAL_FREELIST; + Event(span, 'D', span->length); + MergeIntoFreeList(span); // Coalesces if possible + IncrementalScavenge(n); + ASSERT(stats_.unmapped_bytes+ stats_.committed_bytes==stats_.system_bytes); + ASSERT(Check()); +} + +bool PageHeap::MayMergeSpans(Span *span, Span *other) { + if (aggressive_decommit_) { + return other->location != Span::IN_USE; + } + return span->location == other->location; +} + +void PageHeap::MergeIntoFreeList(Span* span) { + ASSERT(span->location != Span::IN_USE); + + // Coalesce -- we guarantee that "p" != 0, so no bounds checking + // necessary. We do not bother resetting the stale pagemap + // entries for the pieces we are merging together because we only + // care about the pagemap entries for the boundaries. + // + // Note: depending on aggressive_decommit_ mode we allow only + // similar spans to be coalesced. + // + // The following applies if aggressive_decommit_ is enabled: + // + // Note that the adjacent spans we merge into "span" may come out of a + // "normal" (committed) list, and cleanly merge with our IN_USE span, which + // is implicitly committed. If the adjacents spans are on the "returned" + // (decommitted) list, then we must get both spans into the same state before + // or after we coalesce them. The current code always decomits. This is + // achieved by blindly decommitting the entire coalesced region, which may + // include any combination of committed and decommitted spans, at the end of + // the method. + + // TODO(jar): "Always decommit" causes some extra calls to commit when we are + // called in GrowHeap() during an allocation :-/. We need to eval the cost of + // that oscillation, and possibly do something to reduce it. + + // TODO(jar): We need a better strategy for deciding to commit, or decommit, + // based on memory usage and free heap sizes. + + uint64_t temp_committed = 0; + + const PageID p = span->start; + const Length n = span->length; + Span* prev = GetDescriptor(p-1); + if (prev != NULL && MayMergeSpans(span, prev)) { + // Merge preceding span into this span + ASSERT(prev->start + prev->length == p); + const Length len = prev->length; + if (aggressive_decommit_ && prev->location == Span::ON_RETURNED_FREELIST) { + // We're about to put the merge span into the returned freelist and call + // DecommitSpan() on it, which will mark the entire span including this + // one as released and decrease stats_.committed_bytes by the size of the + // merged span. To make the math work out we temporarily increase the + // stats_.committed_bytes amount. + temp_committed = prev->length << kPageShift; + } + RemoveFromFreeList(prev); + DeleteSpan(prev); + span->start -= len; + span->length += len; + pagemap_.set(span->start, span); + Event(span, 'L', len); + } + Span* next = GetDescriptor(p+n); + if (next != NULL && MayMergeSpans(span, next)) { + // Merge next span into this span + ASSERT(next->start == p+n); + const Length len = next->length; + if (aggressive_decommit_ && next->location == Span::ON_RETURNED_FREELIST) { + // See the comment below 'if (prev->location ...' for explanation. + temp_committed += next->length << kPageShift; + } + RemoveFromFreeList(next); + DeleteSpan(next); + span->length += len; + pagemap_.set(span->start + span->length - 1, span); + Event(span, 'R', len); + } + + if (aggressive_decommit_) { + if (DecommitSpan(span)) { + span->location = Span::ON_RETURNED_FREELIST; + stats_.committed_bytes += temp_committed; + } else { + ASSERT(temp_committed == 0); + } + } + PrependToFreeList(span); +} + +void PageHeap::PrependToFreeList(Span* span) { + ASSERT(span->location != Span::IN_USE); + SpanList* list = (span->length < kMaxPages) ? &free_[span->length] : &large_; + if (span->location == Span::ON_NORMAL_FREELIST) { + stats_.free_bytes += (span->length << kPageShift); + DLL_Prepend(&list->normal, span); + } else { + stats_.unmapped_bytes += (span->length << kPageShift); + DLL_Prepend(&list->returned, span); + } +} + +void PageHeap::RemoveFromFreeList(Span* span) { + ASSERT(span->location != Span::IN_USE); + if (span->location == Span::ON_NORMAL_FREELIST) { + stats_.free_bytes -= (span->length << kPageShift); + } else { + stats_.unmapped_bytes -= (span->length << kPageShift); + } + DLL_Remove(span); +} + +void PageHeap::IncrementalScavenge(Length n) { + // Fast path; not yet time to release memory + scavenge_counter_ -= n; + if (scavenge_counter_ >= 0) return; // Not yet time to scavenge + + const double rate = FLAGS_tcmalloc_release_rate; + if (rate <= 1e-6) { + // Tiny release rate means that releasing is disabled. + scavenge_counter_ = kDefaultReleaseDelay; + return; + } + + Length released_pages = ReleaseAtLeastNPages(1); + + if (released_pages == 0) { + // Nothing to scavenge, delay for a while. + scavenge_counter_ = kDefaultReleaseDelay; + } else { + // Compute how long to wait until we return memory. + // FLAGS_tcmalloc_release_rate==1 means wait for 1000 pages + // after releasing one page. + const double mult = 1000.0 / rate; + double wait = mult * static_cast<double>(released_pages); + if (wait > kMaxReleaseDelay) { + // Avoid overflow and bound to reasonable range. + wait = kMaxReleaseDelay; + } + scavenge_counter_ = static_cast<int64_t>(wait); + } +} + +Length PageHeap::ReleaseLastNormalSpan(SpanList* slist) { + Span* s = slist->normal.prev; + ASSERT(s->location == Span::ON_NORMAL_FREELIST); + + if (DecommitSpan(s)) { + RemoveFromFreeList(s); + const Length n = s->length; + s->location = Span::ON_RETURNED_FREELIST; + MergeIntoFreeList(s); // Coalesces if possible. + return n; + } + + return 0; +} + +Length PageHeap::ReleaseAtLeastNPages(Length num_pages) { + Length released_pages = 0; + + // Round robin through the lists of free spans, releasing the last + // span in each list. Stop after releasing at least num_pages + // or when there is nothing more to release. + while (released_pages < num_pages && stats_.free_bytes > 0) { + for (int i = 0; i < kMaxPages+1 && released_pages < num_pages; + i++, release_index_++) { + if (release_index_ > kMaxPages) release_index_ = 0; + SpanList* slist = (release_index_ == kMaxPages) ? + &large_ : &free_[release_index_]; + if (!DLL_IsEmpty(&slist->normal)) { + Length released_len = ReleaseLastNormalSpan(slist); + // Some systems do not support release + if (released_len == 0) return released_pages; + released_pages += released_len; + } + } + } + return released_pages; +} + +bool PageHeap::EnsureLimit(Length n, bool withRelease) +{ + Length limit = (FLAGS_tcmalloc_heap_limit_mb*1024*1024) >> kPageShift; + if (limit == 0) return true; //there is no limit + + // We do not use stats_.system_bytes because it does not take + // MetaDataAllocs into account. + Length takenPages = TCMalloc_SystemTaken >> kPageShift; + //XXX takenPages may be slightly bigger than limit for two reasons: + //* MetaDataAllocs ignore the limit (it is not easy to handle + // out of memory there) + //* sys_alloc may round allocation up to huge page size, + // although smaller limit was ensured + + ASSERT(takenPages >= stats_.unmapped_bytes >> kPageShift); + takenPages -= stats_.unmapped_bytes >> kPageShift; + + if (takenPages + n > limit && withRelease) { + takenPages -= ReleaseAtLeastNPages(takenPages + n - limit); + } + + return takenPages + n <= limit; +} + +void PageHeap::RegisterSizeClass(Span* span, size_t sc) { + // Associate span object with all interior pages as well + ASSERT(span->location == Span::IN_USE); + ASSERT(GetDescriptor(span->start) == span); + ASSERT(GetDescriptor(span->start+span->length-1) == span); + Event(span, 'C', sc); + span->sizeclass = sc; + for (Length i = 1; i < span->length-1; i++) { + pagemap_.set(span->start+i, span); + } +} + +void PageHeap::GetSmallSpanStats(SmallSpanStats* result) { + for (int s = 0; s < kMaxPages; s++) { + result->normal_length[s] = DLL_Length(&free_[s].normal); + result->returned_length[s] = DLL_Length(&free_[s].returned); + } +} + +void PageHeap::GetLargeSpanStats(LargeSpanStats* result) { + result->spans = 0; + result->normal_pages = 0; + result->returned_pages = 0; + for (Span* s = large_.normal.next; s != &large_.normal; s = s->next) { + result->normal_pages += s->length;; + result->spans++; + } + for (Span* s = large_.returned.next; s != &large_.returned; s = s->next) { + result->returned_pages += s->length; + result->spans++; + } +} + +bool PageHeap::GetNextRange(PageID start, base::MallocRange* r) { + Span* span = reinterpret_cast<Span*>(pagemap_.Next(start)); + if (span == NULL) { + return false; + } + r->address = span->start << kPageShift; + r->length = span->length << kPageShift; + r->fraction = 0; + switch (span->location) { + case Span::IN_USE: + r->type = base::MallocRange::INUSE; + r->fraction = 1; + if (span->sizeclass > 0) { + // Only some of the objects in this span may be in use. + const size_t osize = Static::sizemap()->class_to_size(span->sizeclass); + r->fraction = (1.0 * osize * span->refcount) / r->length; + } + break; + case Span::ON_NORMAL_FREELIST: + r->type = base::MallocRange::FREE; + break; + case Span::ON_RETURNED_FREELIST: + r->type = base::MallocRange::UNMAPPED; + break; + default: + r->type = base::MallocRange::UNKNOWN; + break; + } + return true; +} + +static void RecordGrowth(size_t growth) { + StackTrace* t = Static::stacktrace_allocator()->New(); + t->depth = GetStackTrace(t->stack, kMaxStackDepth-1, 3); + t->size = growth; + t->stack[kMaxStackDepth-1] = reinterpret_cast<void*>(Static::growth_stacks()); + Static::set_growth_stacks(t); +} + +bool PageHeap::GrowHeap(Length n) { + ASSERT(kMaxPages >= kMinSystemAlloc); + if (n > kMaxValidPages) return false; + Length ask = (n>kMinSystemAlloc) ? n : static_cast<Length>(kMinSystemAlloc); + size_t actual_size; + void* ptr = NULL; + if (EnsureLimit(ask)) { + ptr = TCMalloc_SystemAlloc(ask << kPageShift, &actual_size, kPageSize); + } + if (ptr == NULL) { + if (n < ask) { + // Try growing just "n" pages + ask = n; + if (EnsureLimit(ask)) { + ptr = TCMalloc_SystemAlloc(ask << kPageShift, &actual_size, kPageSize); + } + } + if (ptr == NULL) return false; + } + ask = actual_size >> kPageShift; + RecordGrowth(ask << kPageShift); + + uint64_t old_system_bytes = stats_.system_bytes; + stats_.system_bytes += (ask << kPageShift); + stats_.committed_bytes += (ask << kPageShift); + const PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift; + ASSERT(p > 0); + + // If we have already a lot of pages allocated, just pre allocate a bunch of + // memory for the page map. This prevents fragmentation by pagemap metadata + // when a program keeps allocating and freeing large blocks. + + if (old_system_bytes < kPageMapBigAllocationThreshold + && stats_.system_bytes >= kPageMapBigAllocationThreshold) { + pagemap_.PreallocateMoreMemory(); + } + + // Make sure pagemap_ has entries for all of the new pages. + // Plus ensure one before and one after so coalescing code + // does not need bounds-checking. + if (pagemap_.Ensure(p-1, ask+2)) { + // Pretend the new area is allocated and then Delete() it to cause + // any necessary coalescing to occur. + Span* span = NewSpan(p, ask); + RecordSpan(span); + Delete(span); + ASSERT(stats_.unmapped_bytes+ stats_.committed_bytes==stats_.system_bytes); + ASSERT(Check()); + return true; + } else { + // We could not allocate memory within "pagemap_" + // TODO: Once we can return memory to the system, return the new span + return false; + } +} + +bool PageHeap::Check() { + ASSERT(free_[0].normal.next == &free_[0].normal); + ASSERT(free_[0].returned.next == &free_[0].returned); + return true; +} + +bool PageHeap::CheckExpensive() { + bool result = Check(); + CheckList(&large_.normal, kMaxPages, 1000000000, Span::ON_NORMAL_FREELIST); + CheckList(&large_.returned, kMaxPages, 1000000000, Span::ON_RETURNED_FREELIST); + for (Length s = 1; s < kMaxPages; s++) { + CheckList(&free_[s].normal, s, s, Span::ON_NORMAL_FREELIST); + CheckList(&free_[s].returned, s, s, Span::ON_RETURNED_FREELIST); + } + return result; +} + +bool PageHeap::CheckList(Span* list, Length min_pages, Length max_pages, + int freelist) { + for (Span* s = list->next; s != list; s = s->next) { + CHECK_CONDITION(s->location == freelist); // NORMAL or RETURNED + CHECK_CONDITION(s->length >= min_pages); + CHECK_CONDITION(s->length <= max_pages); + CHECK_CONDITION(GetDescriptor(s->start) == s); + CHECK_CONDITION(GetDescriptor(s->start+s->length-1) == s); + } + return true; +} + +} // namespace tcmalloc diff --git a/src/third_party/gperftools-2.5/src/page_heap.h b/src/third_party/gperftools-2.5/src/page_heap.h new file mode 100644 index 00000000000..18abed1974a --- /dev/null +++ b/src/third_party/gperftools-2.5/src/page_heap.h @@ -0,0 +1,316 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Sanjay Ghemawat <opensource@google.com> + +#ifndef TCMALLOC_PAGE_HEAP_H_ +#define TCMALLOC_PAGE_HEAP_H_ + +#include <config.h> +#include <stddef.h> // for size_t +#ifdef HAVE_STDINT_H +#include <stdint.h> // for uint64_t, int64_t, uint16_t +#endif +#include <gperftools/malloc_extension.h> +#include "base/basictypes.h" +#include "common.h" +#include "packed-cache-inl.h" +#include "pagemap.h" +#include "span.h" + +// We need to dllexport PageHeap just for the unittest. MSVC complains +// that we don't dllexport the PageHeap members, but we don't need to +// test those, so I just suppress this warning. +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable:4251) +#endif + +// This #ifdef should almost never be set. Set NO_TCMALLOC_SAMPLES if +// you're porting to a system where you really can't get a stacktrace. +// Because we control the definition of GetStackTrace, all clients of +// GetStackTrace should #include us rather than stacktrace.h. +#ifdef NO_TCMALLOC_SAMPLES + // We use #define so code compiles even if you #include stacktrace.h somehow. +# define GetStackTrace(stack, depth, skip) (0) +#else +# include <gperftools/stacktrace.h> +#endif + +namespace base { +struct MallocRange; +} + +namespace tcmalloc { + +// ------------------------------------------------------------------------- +// Map from page-id to per-page data +// ------------------------------------------------------------------------- + +// We use PageMap2<> for 32-bit and PageMap3<> for 64-bit machines. +// We also use a simple one-level cache for hot PageID-to-sizeclass mappings, +// because sometimes the sizeclass is all the information we need. + +// Selector class -- general selector uses 3-level map +template <int BITS> class MapSelector { + public: + typedef TCMalloc_PageMap3<BITS-kPageShift> Type; + typedef PackedCache<BITS-kPageShift, uint64_t> CacheType; +}; + +// A two-level map for 32-bit machines +template <> class MapSelector<32> { + public: + typedef TCMalloc_PageMap2<32-kPageShift> Type; + typedef PackedCache<32-kPageShift, uint16_t> CacheType; +}; + +// ------------------------------------------------------------------------- +// Page-level allocator +// * Eager coalescing +// +// Heap for page-level allocation. We allow allocating and freeing a +// contiguous runs of pages (called a "span"). +// ------------------------------------------------------------------------- + +class PERFTOOLS_DLL_DECL PageHeap { + public: + PageHeap(); + + // Allocate a run of "n" pages. Returns zero if out of memory. + // Caller should not pass "n == 0" -- instead, n should have + // been rounded up already. + Span* New(Length n); + + // Delete the span "[p, p+n-1]". + // REQUIRES: span was returned by earlier call to New() and + // has not yet been deleted. + void Delete(Span* span); + + // Mark an allocated span as being used for small objects of the + // specified size-class. + // REQUIRES: span was returned by an earlier call to New() + // and has not yet been deleted. + void RegisterSizeClass(Span* span, size_t sc); + + // Split an allocated span into two spans: one of length "n" pages + // followed by another span of length "span->length - n" pages. + // Modifies "*span" to point to the first span of length "n" pages. + // Returns a pointer to the second span. + // + // REQUIRES: "0 < n < span->length" + // REQUIRES: span->location == IN_USE + // REQUIRES: span->sizeclass == 0 + Span* Split(Span* span, Length n); + + // Return the descriptor for the specified page. Returns NULL if + // this PageID was not allocated previously. + inline Span* GetDescriptor(PageID p) const { + return reinterpret_cast<Span*>(pagemap_.get(p)); + } + + // If this page heap is managing a range with starting page # >= start, + // store info about the range in *r and return true. Else return false. + bool GetNextRange(PageID start, base::MallocRange* r); + + // Page heap statistics + struct Stats { + Stats() : system_bytes(0), free_bytes(0), unmapped_bytes(0), committed_bytes(0) {} + uint64_t system_bytes; // Total bytes allocated from system + uint64_t free_bytes; // Total bytes on normal freelists + uint64_t unmapped_bytes; // Total bytes on returned freelists + uint64_t committed_bytes; // Bytes committed, always <= system_bytes_. + + }; + inline Stats stats() const { return stats_; } + + struct SmallSpanStats { + // For each free list of small spans, the length (in spans) of the + // normal and returned free lists for that size. + int64 normal_length[kMaxPages]; + int64 returned_length[kMaxPages]; + }; + void GetSmallSpanStats(SmallSpanStats* result); + + // Stats for free large spans (i.e., spans with more than kMaxPages pages). + struct LargeSpanStats { + int64 spans; // Number of such spans + int64 normal_pages; // Combined page length of normal large spans + int64 returned_pages; // Combined page length of unmapped spans + }; + void GetLargeSpanStats(LargeSpanStats* result); + + bool Check(); + // Like Check() but does some more comprehensive checking. + bool CheckExpensive(); + bool CheckList(Span* list, Length min_pages, Length max_pages, + int freelist); // ON_NORMAL_FREELIST or ON_RETURNED_FREELIST + + // Try to release at least num_pages for reuse by the OS. Returns + // the actual number of pages released, which may be less than + // num_pages if there weren't enough pages to release. The result + // may also be larger than num_pages since page_heap might decide to + // release one large range instead of fragmenting it into two + // smaller released and unreleased ranges. + Length ReleaseAtLeastNPages(Length num_pages); + + // Return 0 if we have no information, or else the correct sizeclass for p. + // Reads and writes to pagemap_cache_ do not require locking. + // The entries are 64 bits on 64-bit hardware and 16 bits on + // 32-bit hardware, and we don't mind raciness as long as each read of + // an entry yields a valid entry, not a partially updated entry. + size_t GetSizeClassIfCached(PageID p) const { + return pagemap_cache_.GetOrDefault(p, 0); + } + void CacheSizeClass(PageID p, size_t cl) const { pagemap_cache_.Put(p, cl); } + + bool GetAggressiveDecommit(void) {return aggressive_decommit_;} + void SetAggressiveDecommit(bool aggressive_decommit) { + aggressive_decommit_ = aggressive_decommit; + } + + private: + // Allocates a big block of memory for the pagemap once we reach more than + // 128MB + static const size_t kPageMapBigAllocationThreshold = 128 << 20; + + // Minimum number of pages to fetch from system at a time. Must be + // significantly bigger than kBlockSize to amortize system-call + // overhead, and also to reduce external fragementation. Also, we + // should keep this value big because various incarnations of Linux + // have small limits on the number of mmap() regions per + // address-space. + // REQUIRED: kMinSystemAlloc <= kMaxPages; + static const int kMinSystemAlloc = kMaxPages; + + // Never delay scavenging for more than the following number of + // deallocated pages. With 4K pages, this comes to 4GB of + // deallocation. + static const int kMaxReleaseDelay = 1 << 20; + + // If there is nothing to release, wait for so many pages before + // scavenging again. With 4K pages, this comes to 1GB of memory. + static const int kDefaultReleaseDelay = 1 << 18; + + // Pick the appropriate map and cache types based on pointer size + typedef MapSelector<kAddressBits>::Type PageMap; + typedef MapSelector<kAddressBits>::CacheType PageMapCache; + PageMap pagemap_; + mutable PageMapCache pagemap_cache_; + + // We segregate spans of a given size into two circular linked + // lists: one for normal spans, and one for spans whose memory + // has been returned to the system. + struct SpanList { + Span normal; + Span returned; + }; + + // List of free spans of length >= kMaxPages + SpanList large_; + + // Array mapping from span length to a doubly linked list of free spans + SpanList free_[kMaxPages]; + + // Statistics on system, free, and unmapped bytes + Stats stats_; + + Span* SearchFreeAndLargeLists(Length n); + + bool GrowHeap(Length n); + + // REQUIRES: span->length >= n + // REQUIRES: span->location != IN_USE + // Remove span from its free list, and move any leftover part of + // span into appropriate free lists. Also update "span" to have + // length exactly "n" and mark it as non-free so it can be returned + // to the client. After all that, decrease free_pages_ by n and + // return span. + Span* Carve(Span* span, Length n); + + void RecordSpan(Span* span) { + pagemap_.set(span->start, span); + if (span->length > 1) { + pagemap_.set(span->start + span->length - 1, span); + } + } + + // Allocate a large span of length == n. If successful, returns a + // span of exactly the specified length. Else, returns NULL. + Span* AllocLarge(Length n); + + // Coalesce span with neighboring spans if possible, prepend to + // appropriate free list, and adjust stats. + void MergeIntoFreeList(Span* span); + + // Commit the span. + void CommitSpan(Span* span); + + // Decommit the span. + bool DecommitSpan(Span* span); + + // Prepends span to appropriate free list, and adjusts stats. + void PrependToFreeList(Span* span); + + // Removes span from its free list, and adjust stats. + void RemoveFromFreeList(Span* span); + + // Incrementally release some memory to the system. + // IncrementalScavenge(n) is called whenever n pages are freed. + void IncrementalScavenge(Length n); + + // Release the last span on the normal portion of this list. + // Return the length of that span or zero if release failed. + Length ReleaseLastNormalSpan(SpanList* slist); + + // Checks if we are allowed to take more memory from the system. + // If limit is reached and allowRelease is true, tries to release + // some unused spans. + bool EnsureLimit(Length n, bool allowRelease = true); + + bool MayMergeSpans(Span *span, Span *other); + + // Number of pages to deallocate before doing more scavenging + int64_t scavenge_counter_; + + // Index of last free list where we released memory to the OS. + int release_index_; + + bool aggressive_decommit_; +}; + +} // namespace tcmalloc + +#ifdef _MSC_VER +#pragma warning(pop) +#endif + +#endif // TCMALLOC_PAGE_HEAP_H_ diff --git a/src/third_party/gperftools-2.5/src/page_heap_allocator.h b/src/third_party/gperftools-2.5/src/page_heap_allocator.h new file mode 100644 index 00000000000..892d1c1abe3 --- /dev/null +++ b/src/third_party/gperftools-2.5/src/page_heap_allocator.h @@ -0,0 +1,114 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Sanjay Ghemawat <opensource@google.com> + +#ifndef TCMALLOC_PAGE_HEAP_ALLOCATOR_H_ +#define TCMALLOC_PAGE_HEAP_ALLOCATOR_H_ + +#include <stddef.h> // for NULL, size_t + +#include "common.h" // for MetaDataAlloc +#include "internal_logging.h" // for ASSERT + +namespace tcmalloc { + +// Simple allocator for objects of a specified type. External locking +// is required before accessing one of these objects. +template <class T> +class PageHeapAllocator { + public: + // We use an explicit Init function because these variables are statically + // allocated and their constructors might not have run by the time some + // other static variable tries to allocate memory. + void Init() { + ASSERT(sizeof(T) <= kAllocIncrement); + inuse_ = 0; + free_area_ = NULL; + free_avail_ = 0; + free_list_ = NULL; + // Reserve some space at the beginning to avoid fragmentation. + Delete(New()); + } + + T* New() { + // Consult free list + void* result; + if (free_list_ != NULL) { + result = free_list_; + free_list_ = *(reinterpret_cast<void**>(result)); + } else { + if (free_avail_ < sizeof(T)) { + // Need more room. We assume that MetaDataAlloc returns + // suitably aligned memory. + free_area_ = reinterpret_cast<char*>(MetaDataAlloc(kAllocIncrement)); + if (free_area_ == NULL) { + Log(kCrash, __FILE__, __LINE__, + "FATAL ERROR: Out of memory trying to allocate internal " + "tcmalloc data (bytes, object-size)", + kAllocIncrement, sizeof(T)); + } + free_avail_ = kAllocIncrement; + } + result = free_area_; + free_area_ += sizeof(T); + free_avail_ -= sizeof(T); + } + inuse_++; + return reinterpret_cast<T*>(result); + } + + void Delete(T* p) { + *(reinterpret_cast<void**>(p)) = free_list_; + free_list_ = p; + inuse_--; + } + + int inuse() const { return inuse_; } + + private: + // How much to allocate from system at a time + static const int kAllocIncrement = 128 << 10; + + // Free area from which to carve new objects + char* free_area_; + size_t free_avail_; + + // Free list of already carved objects + void* free_list_; + + // Number of allocated but unfreed objects + int inuse_; +}; + +} // namespace tcmalloc + +#endif // TCMALLOC_PAGE_HEAP_ALLOCATOR_H_ diff --git a/src/third_party/gperftools-2.5/src/pagemap.h b/src/third_party/gperftools-2.5/src/pagemap.h new file mode 100644 index 00000000000..dd9442313af --- /dev/null +++ b/src/third_party/gperftools-2.5/src/pagemap.h @@ -0,0 +1,324 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Sanjay Ghemawat <opensource@google.com> +// +// A data structure used by the caching malloc. It maps from page# to +// a pointer that contains info about that page. We use two +// representations: one for 32-bit addresses, and another for 64 bit +// addresses. Both representations provide the same interface. The +// first representation is implemented as a flat array, the seconds as +// a three-level radix tree that strips away approximately 1/3rd of +// the bits every time. +// +// The BITS parameter should be the number of bits required to hold +// a page number. E.g., with 32 bit pointers and 4K pages (i.e., +// page offset fits in lower 12 bits), BITS == 20. + +#ifndef TCMALLOC_PAGEMAP_H_ +#define TCMALLOC_PAGEMAP_H_ + +#include "config.h" + +#include <stddef.h> // for NULL, size_t +#include <string.h> // for memset +#if defined HAVE_STDINT_H +#include <stdint.h> +#elif defined HAVE_INTTYPES_H +#include <inttypes.h> +#else +#include <sys/types.h> +#endif +#include "internal_logging.h" // for ASSERT + +// Single-level array +template <int BITS> +class TCMalloc_PageMap1 { + private: + static const int LENGTH = 1 << BITS; + + void** array_; + + public: + typedef uintptr_t Number; + + explicit TCMalloc_PageMap1(void* (*allocator)(size_t)) { + array_ = reinterpret_cast<void**>((*allocator)(sizeof(void*) << BITS)); + memset(array_, 0, sizeof(void*) << BITS); + } + + // Ensure that the map contains initialized entries "x .. x+n-1". + // Returns true if successful, false if we could not allocate memory. + bool Ensure(Number x, size_t n) { + // Nothing to do since flat array was allocated at start. All + // that's left is to check for overflow (that is, we don't want to + // ensure a number y where array_[y] would be an out-of-bounds + // access). + return n <= LENGTH - x; // an overflow-free way to do "x + n <= LENGTH" + } + + void PreallocateMoreMemory() {} + + // Return the current value for KEY. Returns NULL if not yet set, + // or if k is out of range. + void* get(Number k) const { + if ((k >> BITS) > 0) { + return NULL; + } + return array_[k]; + } + + // REQUIRES "k" is in range "[0,2^BITS-1]". + // REQUIRES "k" has been ensured before. + // + // Sets the value 'v' for key 'k'. + void set(Number k, void* v) { + array_[k] = v; + } + + // Return the first non-NULL pointer found in this map for + // a page number >= k. Returns NULL if no such number is found. + void* Next(Number k) const { + while (k < (1 << BITS)) { + if (array_[k] != NULL) return array_[k]; + k++; + } + return NULL; + } +}; + +// Two-level radix tree +template <int BITS> +class TCMalloc_PageMap2 { + private: + // Put 32 entries in the root and (2^BITS)/32 entries in each leaf. + static const int ROOT_BITS = 5; + static const int ROOT_LENGTH = 1 << ROOT_BITS; + + static const int LEAF_BITS = BITS - ROOT_BITS; + static const int LEAF_LENGTH = 1 << LEAF_BITS; + + // Leaf node + struct Leaf { + void* values[LEAF_LENGTH]; + }; + + Leaf* root_[ROOT_LENGTH]; // Pointers to 32 child nodes + void* (*allocator_)(size_t); // Memory allocator + + public: + typedef uintptr_t Number; + + explicit TCMalloc_PageMap2(void* (*allocator)(size_t)) { + allocator_ = allocator; + memset(root_, 0, sizeof(root_)); + } + + void* get(Number k) const { + const Number i1 = k >> LEAF_BITS; + const Number i2 = k & (LEAF_LENGTH-1); + if ((k >> BITS) > 0 || root_[i1] == NULL) { + return NULL; + } + return root_[i1]->values[i2]; + } + + void set(Number k, void* v) { + const Number i1 = k >> LEAF_BITS; + const Number i2 = k & (LEAF_LENGTH-1); + ASSERT(i1 < ROOT_LENGTH); + root_[i1]->values[i2] = v; + } + + bool Ensure(Number start, size_t n) { + for (Number key = start; key <= start + n - 1; ) { + const Number i1 = key >> LEAF_BITS; + + // Check for overflow + if (i1 >= ROOT_LENGTH) + return false; + + // Make 2nd level node if necessary + if (root_[i1] == NULL) { + Leaf* leaf = reinterpret_cast<Leaf*>((*allocator_)(sizeof(Leaf))); + if (leaf == NULL) return false; + memset(leaf, 0, sizeof(*leaf)); + root_[i1] = leaf; + } + + // Advance key past whatever is covered by this leaf node + key = ((key >> LEAF_BITS) + 1) << LEAF_BITS; + } + return true; + } + + void PreallocateMoreMemory() { + // Allocate enough to keep track of all possible pages + Ensure(0, 1 << BITS); + } + + void* Next(Number k) const { + while (k < (1 << BITS)) { + const Number i1 = k >> LEAF_BITS; + Leaf* leaf = root_[i1]; + if (leaf != NULL) { + // Scan forward in leaf + for (Number i2 = k & (LEAF_LENGTH - 1); i2 < LEAF_LENGTH; i2++) { + if (leaf->values[i2] != NULL) { + return leaf->values[i2]; + } + } + } + // Skip to next top-level entry + k = (i1 + 1) << LEAF_BITS; + } + return NULL; + } +}; + +// Three-level radix tree +template <int BITS> +class TCMalloc_PageMap3 { + private: + // How many bits should we consume at each interior level + static const int INTERIOR_BITS = (BITS + 2) / 3; // Round-up + static const int INTERIOR_LENGTH = 1 << INTERIOR_BITS; + + // How many bits should we consume at leaf level + static const int LEAF_BITS = BITS - 2*INTERIOR_BITS; + static const int LEAF_LENGTH = 1 << LEAF_BITS; + + // Interior node + struct Node { + Node* ptrs[INTERIOR_LENGTH]; + }; + + // Leaf node + struct Leaf { + void* values[LEAF_LENGTH]; + }; + + Node* root_; // Root of radix tree + void* (*allocator_)(size_t); // Memory allocator + + Node* NewNode() { + Node* result = reinterpret_cast<Node*>((*allocator_)(sizeof(Node))); + if (result != NULL) { + memset(result, 0, sizeof(*result)); + } + return result; + } + + public: + typedef uintptr_t Number; + + explicit TCMalloc_PageMap3(void* (*allocator)(size_t)) { + allocator_ = allocator; + root_ = NewNode(); + } + + void* get(Number k) const { + const Number i1 = k >> (LEAF_BITS + INTERIOR_BITS); + const Number i2 = (k >> LEAF_BITS) & (INTERIOR_LENGTH-1); + const Number i3 = k & (LEAF_LENGTH-1); + if ((k >> BITS) > 0 || + root_->ptrs[i1] == NULL || root_->ptrs[i1]->ptrs[i2] == NULL) { + return NULL; + } + return reinterpret_cast<Leaf*>(root_->ptrs[i1]->ptrs[i2])->values[i3]; + } + + void set(Number k, void* v) { + ASSERT(k >> BITS == 0); + const Number i1 = k >> (LEAF_BITS + INTERIOR_BITS); + const Number i2 = (k >> LEAF_BITS) & (INTERIOR_LENGTH-1); + const Number i3 = k & (LEAF_LENGTH-1); + reinterpret_cast<Leaf*>(root_->ptrs[i1]->ptrs[i2])->values[i3] = v; + } + + bool Ensure(Number start, size_t n) { + for (Number key = start; key <= start + n - 1; ) { + const Number i1 = key >> (LEAF_BITS + INTERIOR_BITS); + const Number i2 = (key >> LEAF_BITS) & (INTERIOR_LENGTH-1); + + // Check for overflow + if (i1 >= INTERIOR_LENGTH || i2 >= INTERIOR_LENGTH) + return false; + + // Make 2nd level node if necessary + if (root_->ptrs[i1] == NULL) { + Node* n = NewNode(); + if (n == NULL) return false; + root_->ptrs[i1] = n; + } + + // Make leaf node if necessary + if (root_->ptrs[i1]->ptrs[i2] == NULL) { + Leaf* leaf = reinterpret_cast<Leaf*>((*allocator_)(sizeof(Leaf))); + if (leaf == NULL) return false; + memset(leaf, 0, sizeof(*leaf)); + root_->ptrs[i1]->ptrs[i2] = reinterpret_cast<Node*>(leaf); + } + + // Advance key past whatever is covered by this leaf node + key = ((key >> LEAF_BITS) + 1) << LEAF_BITS; + } + return true; + } + + void PreallocateMoreMemory() { + } + + void* Next(Number k) const { + while (k < (Number(1) << BITS)) { + const Number i1 = k >> (LEAF_BITS + INTERIOR_BITS); + const Number i2 = (k >> LEAF_BITS) & (INTERIOR_LENGTH-1); + if (root_->ptrs[i1] == NULL) { + // Advance to next top-level entry + k = (i1 + 1) << (LEAF_BITS + INTERIOR_BITS); + } else { + Leaf* leaf = reinterpret_cast<Leaf*>(root_->ptrs[i1]->ptrs[i2]); + if (leaf != NULL) { + for (Number i3 = (k & (LEAF_LENGTH-1)); i3 < LEAF_LENGTH; i3++) { + if (leaf->values[i3] != NULL) { + return leaf->values[i3]; + } + } + } + // Advance to next interior entry + k = ((k >> LEAF_BITS) + 1) << LEAF_BITS; + } + } + return NULL; + } +}; + +#endif // TCMALLOC_PAGEMAP_H_ diff --git a/src/third_party/gperftools-2.5/src/pprof b/src/third_party/gperftools-2.5/src/pprof new file mode 100755 index 00000000000..bee51bf9985 --- /dev/null +++ b/src/third_party/gperftools-2.5/src/pprof @@ -0,0 +1,5590 @@ +#! /usr/bin/env perl + +# Copyright (c) 1998-2007, Google Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# --- +# Program for printing the profile generated by common/profiler.cc, +# or by the heap profiler (common/debugallocation.cc) +# +# The profile contains a sequence of entries of the form: +# <count> <stack trace> +# This program parses the profile, and generates user-readable +# output. +# +# Examples: +# +# % tools/pprof "program" "profile" +# Enters "interactive" mode +# +# % tools/pprof --text "program" "profile" +# Generates one line per procedure +# +# % tools/pprof --gv "program" "profile" +# Generates annotated call-graph and displays via "gv" +# +# % tools/pprof --gv --focus=Mutex "program" "profile" +# Restrict to code paths that involve an entry that matches "Mutex" +# +# % tools/pprof --gv --focus=Mutex --ignore=string "program" "profile" +# Restrict to code paths that involve an entry that matches "Mutex" +# and does not match "string" +# +# % tools/pprof --list=IBF_CheckDocid "program" "profile" +# Generates disassembly listing of all routines with at least one +# sample that match the --list=<regexp> pattern. The listing is +# annotated with the flat and cumulative sample counts at each line. +# +# % tools/pprof --disasm=IBF_CheckDocid "program" "profile" +# Generates disassembly listing of all routines with at least one +# sample that match the --disasm=<regexp> pattern. The listing is +# annotated with the flat and cumulative sample counts at each PC value. +# +# TODO: Use color to indicate files? + +use strict; +use warnings; +use Getopt::Long; +use Cwd; +use POSIX; + +my $PPROF_VERSION = "2.0"; + +# These are the object tools we use which can come from a +# user-specified location using --tools, from the PPROF_TOOLS +# environment variable, or from the environment. +my %obj_tool_map = ( + "objdump" => "objdump", + "nm" => "nm", + "addr2line" => "addr2line", + "c++filt" => "c++filt", + ## ConfigureObjTools may add architecture-specific entries: + #"nm_pdb" => "nm-pdb", # for reading windows (PDB-format) executables + #"addr2line_pdb" => "addr2line-pdb", # ditto + #"otool" => "otool", # equivalent of objdump on OS X +); +# NOTE: these are lists, so you can put in commandline flags if you want. +my @DOT = ("dot"); # leave non-absolute, since it may be in /usr/local +my @GV = ("gv"); +my @EVINCE = ("evince"); # could also be xpdf or perhaps acroread +my @KCACHEGRIND = ("kcachegrind"); +my @PS2PDF = ("ps2pdf"); +# These are used for dynamic profiles +my @URL_FETCHER = ("curl", "-s"); + +# These are the web pages that servers need to support for dynamic profiles +my $HEAP_PAGE = "/pprof/heap"; +my $PROFILE_PAGE = "/pprof/profile"; # must support cgi-param "?seconds=#" +my $PMUPROFILE_PAGE = "/pprof/pmuprofile(?:\\?.*)?"; # must support cgi-param + # ?seconds=#&event=x&period=n +my $GROWTH_PAGE = "/pprof/growth"; +my $CONTENTION_PAGE = "/pprof/contention"; +my $WALL_PAGE = "/pprof/wall(?:\\?.*)?"; # accepts options like namefilter +my $FILTEREDPROFILE_PAGE = "/pprof/filteredprofile(?:\\?.*)?"; +my $CENSUSPROFILE_PAGE = "/pprof/censusprofile(?:\\?.*)?"; # must support cgi-param + # "?seconds=#", + # "?tags_regexp=#" and + # "?type=#". +my $SYMBOL_PAGE = "/pprof/symbol"; # must support symbol lookup via POST +my $PROGRAM_NAME_PAGE = "/pprof/cmdline"; + +# These are the web pages that can be named on the command line. +# All the alternatives must begin with /. +my $PROFILES = "($HEAP_PAGE|$PROFILE_PAGE|$PMUPROFILE_PAGE|" . + "$GROWTH_PAGE|$CONTENTION_PAGE|$WALL_PAGE|" . + "$FILTEREDPROFILE_PAGE|$CENSUSPROFILE_PAGE)"; + +# default binary name +my $UNKNOWN_BINARY = "(unknown)"; + +# There is a pervasive dependency on the length (in hex characters, +# i.e., nibbles) of an address, distinguishing between 32-bit and +# 64-bit profiles. To err on the safe size, default to 64-bit here: +my $address_length = 16; + +my $dev_null = "/dev/null"; +if (! -e $dev_null && $^O =~ /MSWin/) { # $^O is the OS perl was built for + $dev_null = "nul"; +} + +# A list of paths to search for shared object files +my @prefix_list = (); + +# Special routine name that should not have any symbols. +# Used as separator to parse "addr2line -i" output. +my $sep_symbol = '_fini'; +my $sep_address = undef; + +my @stackTraces; + +##### Argument parsing ##### + +sub usage_string { + return <<EOF; +Usage: +$0 [options] <program> <profiles> + <profiles> is a space separated list of profile names. +$0 [options] <symbolized-profiles> + <symbolized-profiles> is a list of profile files where each file contains + the necessary symbol mappings as well as profile data (likely generated + with --raw). +$0 [options] <profile> + <profile> is a remote form. Symbols are obtained from host:port$SYMBOL_PAGE + + Each name can be: + /path/to/profile - a path to a profile file + host:port[/<service>] - a location of a service to get profile from + + The /<service> can be $HEAP_PAGE, $PROFILE_PAGE, /pprof/pmuprofile, + $GROWTH_PAGE, $CONTENTION_PAGE, /pprof/wall, + $CENSUSPROFILE_PAGE, or /pprof/filteredprofile. + For instance: + $0 http://myserver.com:80$HEAP_PAGE + If /<service> is omitted, the service defaults to $PROFILE_PAGE (cpu profiling). +$0 --symbols <program> + Maps addresses to symbol names. In this mode, stdin should be a + list of library mappings, in the same format as is found in the heap- + and cpu-profile files (this loosely matches that of /proc/self/maps + on linux), followed by a list of hex addresses to map, one per line. + + For more help with querying remote servers, including how to add the + necessary server-side support code, see this filename (or one like it): + + /usr/doc/gperftools-$PPROF_VERSION/pprof_remote_servers.html + +Options: + --cum Sort by cumulative data + --base=<base> Subtract <base> from <profile> before display + --interactive Run in interactive mode (interactive "help" gives help) [default] + --seconds=<n> Length of time for dynamic profiles [default=30 secs] + --add_lib=<file> Read additional symbols and line info from the given library + --lib_prefix=<dir> Comma separated list of library path prefixes + --no_strip_temp Do not strip template arguments from function names + +Reporting Granularity: + --addresses Report at address level + --lines Report at source line level + --functions Report at function level [default] + --files Report at source file level + +Output type: + --text Generate text report + --stacks Generate stack traces similar to the heap profiler (requires --text) + --callgrind Generate callgrind format to stdout + --gv Generate Postscript and display + --evince Generate PDF and display + --web Generate SVG and display + --list=<regexp> Generate source listing of matching routines + --disasm=<regexp> Generate disassembly of matching routines + --symbols Print demangled symbol names found at given addresses + --dot Generate DOT file to stdout + --ps Generate Postcript to stdout + --pdf Generate PDF to stdout + --svg Generate SVG to stdout + --gif Generate GIF to stdout + --raw Generate symbolized pprof data (useful with remote fetch) + --collapsed Generate collapsed stacks for building flame graphs + (see http://www.brendangregg.com/flamegraphs.html) + +Heap-Profile Options: + --inuse_space Display in-use (mega)bytes [default] + --inuse_objects Display in-use objects + --alloc_space Display allocated (mega)bytes + --alloc_objects Display allocated objects + --show_bytes Display space in bytes + --drop_negative Ignore negative differences + +Contention-profile options: + --total_delay Display total delay at each region [default] + --contentions Display number of delays at each region + --mean_delay Display mean delay at each region + +Call-graph Options: + --nodecount=<n> Show at most so many nodes [default=80] + --nodefraction=<f> Hide nodes below <f>*total [default=.005] + --edgefraction=<f> Hide edges below <f>*total [default=.001] + --maxdegree=<n> Max incoming/outgoing edges per node [default=8] + --focus=<regexp> Focus on nodes matching <regexp> + --ignore=<regexp> Ignore nodes matching <regexp> + --scale=<n> Set GV scaling [default=0] + --heapcheck Make nodes with non-0 object counts + (i.e. direct leak generators) more visible + +Miscellaneous: + --no-auto-signal-frm Automatically drop 2nd frame that is always same (cpu-only) + (assuming that it is artifact of bad stack captures + which include signal handler frames) + --show_addresses Always show addresses when applicable + --tools=<prefix or binary:fullpath>[,...] \$PATH for object tool pathnames + --test Run unit tests + --help This message + --version Version information + +Environment Variables: + PPROF_TMPDIR Profiles directory. Defaults to \$HOME/pprof + PPROF_TOOLS Prefix for object tools pathnames + +Examples: + +$0 /bin/ls ls.prof + Enters "interactive" mode +$0 --text /bin/ls ls.prof + Outputs one line per procedure +$0 --web /bin/ls ls.prof + Displays annotated call-graph in web browser +$0 --gv /bin/ls ls.prof + Displays annotated call-graph via 'gv' +$0 --gv --focus=Mutex /bin/ls ls.prof + Restricts to code paths including a .*Mutex.* entry +$0 --gv --focus=Mutex --ignore=string /bin/ls ls.prof + Code paths including Mutex but not string +$0 --list=getdir /bin/ls ls.prof + (Per-line) annotated source listing for getdir() +$0 --disasm=getdir /bin/ls ls.prof + (Per-PC) annotated disassembly for getdir() + +$0 http://localhost:1234/ + Enters "interactive" mode +$0 --text localhost:1234 + Outputs one line per procedure for localhost:1234 +$0 --raw localhost:1234 > ./local.raw +$0 --text ./local.raw + Fetches a remote profile for later analysis and then + analyzes it in text mode. +EOF +} + +sub version_string { + return <<EOF +pprof (part of gperftools $PPROF_VERSION) + +Copyright 1998-2007 Google Inc. + +This is BSD licensed software; see the source for copying conditions +and license information. +There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A +PARTICULAR PURPOSE. +EOF +} + +sub usage { + my $msg = shift; + print STDERR "$msg\n\n"; + print STDERR usage_string(); + exit(1); +} + +sub Init() { + # Setup tmp-file name and handler to clean it up. + # We do this in the very beginning so that we can use + # error() and cleanup() function anytime here after. + $main::tmpfile_sym = "/tmp/pprof$$.sym"; + $main::tmpfile_ps = "/tmp/pprof$$"; + $main::next_tmpfile = 0; + $SIG{'INT'} = \&sighandler; + + # Cache from filename/linenumber to source code + $main::source_cache = (); + + $main::opt_help = 0; + $main::opt_version = 0; + $main::opt_show_addresses = 0; + $main::opt_no_auto_signal_frames = 0; + + $main::opt_cum = 0; + $main::opt_base = ''; + $main::opt_addresses = 0; + $main::opt_lines = 0; + $main::opt_functions = 0; + $main::opt_files = 0; + $main::opt_lib_prefix = ""; + + $main::opt_text = 0; + $main::opt_stacks = 0; + $main::opt_callgrind = 0; + $main::opt_list = ""; + $main::opt_disasm = ""; + $main::opt_symbols = 0; + $main::opt_gv = 0; + $main::opt_evince = 0; + $main::opt_web = 0; + $main::opt_dot = 0; + $main::opt_ps = 0; + $main::opt_pdf = 0; + $main::opt_gif = 0; + $main::opt_svg = 0; + $main::opt_raw = 0; + $main::opt_collapsed = 0; + + $main::opt_nodecount = 80; + $main::opt_nodefraction = 0.005; + $main::opt_edgefraction = 0.001; + $main::opt_maxdegree = 8; + $main::opt_focus = ''; + $main::opt_ignore = ''; + $main::opt_scale = 0; + $main::opt_heapcheck = 0; + $main::opt_seconds = 30; + $main::opt_lib = ""; + + $main::opt_inuse_space = 0; + $main::opt_inuse_objects = 0; + $main::opt_alloc_space = 0; + $main::opt_alloc_objects = 0; + $main::opt_show_bytes = 0; + $main::opt_drop_negative = 0; + $main::opt_interactive = 0; + + $main::opt_total_delay = 0; + $main::opt_contentions = 0; + $main::opt_mean_delay = 0; + + $main::opt_tools = ""; + $main::opt_debug = 0; + $main::opt_test = 0; + + # Do not strip template argument in function names + $main::opt_no_strip_temp = 0; + + # These are undocumented flags used only by unittests. + $main::opt_test_stride = 0; + + # Are we using $SYMBOL_PAGE? + $main::use_symbol_page = 0; + + # Files returned by TempName. + %main::tempnames = (); + + # Type of profile we are dealing with + # Supported types: + # cpu + # heap + # growth + # contention + $main::profile_type = ''; # Empty type means "unknown" + + GetOptions("help!" => \$main::opt_help, + "version!" => \$main::opt_version, + "show_addresses!"=> \$main::opt_show_addresses, + "no-auto-signal-frm!"=> \$main::opt_no_auto_signal_frames, + "cum!" => \$main::opt_cum, + "base=s" => \$main::opt_base, + "seconds=i" => \$main::opt_seconds, + "add_lib=s" => \$main::opt_lib, + "lib_prefix=s" => \$main::opt_lib_prefix, + "functions!" => \$main::opt_functions, + "lines!" => \$main::opt_lines, + "addresses!" => \$main::opt_addresses, + "files!" => \$main::opt_files, + "text!" => \$main::opt_text, + "stacks!" => \$main::opt_stacks, + "callgrind!" => \$main::opt_callgrind, + "list=s" => \$main::opt_list, + "disasm=s" => \$main::opt_disasm, + "symbols!" => \$main::opt_symbols, + "gv!" => \$main::opt_gv, + "evince!" => \$main::opt_evince, + "web!" => \$main::opt_web, + "dot!" => \$main::opt_dot, + "ps!" => \$main::opt_ps, + "pdf!" => \$main::opt_pdf, + "svg!" => \$main::opt_svg, + "gif!" => \$main::opt_gif, + "raw!" => \$main::opt_raw, + "collapsed!" => \$main::opt_collapsed, + "interactive!" => \$main::opt_interactive, + "nodecount=i" => \$main::opt_nodecount, + "nodefraction=f" => \$main::opt_nodefraction, + "edgefraction=f" => \$main::opt_edgefraction, + "maxdegree=i" => \$main::opt_maxdegree, + "focus=s" => \$main::opt_focus, + "ignore=s" => \$main::opt_ignore, + "scale=i" => \$main::opt_scale, + "heapcheck" => \$main::opt_heapcheck, + "inuse_space!" => \$main::opt_inuse_space, + "inuse_objects!" => \$main::opt_inuse_objects, + "alloc_space!" => \$main::opt_alloc_space, + "alloc_objects!" => \$main::opt_alloc_objects, + "show_bytes!" => \$main::opt_show_bytes, + "drop_negative!" => \$main::opt_drop_negative, + "total_delay!" => \$main::opt_total_delay, + "contentions!" => \$main::opt_contentions, + "mean_delay!" => \$main::opt_mean_delay, + "tools=s" => \$main::opt_tools, + "no_strip_temp!" => \$main::opt_no_strip_temp, + "test!" => \$main::opt_test, + "debug!" => \$main::opt_debug, + # Undocumented flags used only by unittests: + "test_stride=i" => \$main::opt_test_stride, + ) || usage("Invalid option(s)"); + + # Deal with the standard --help and --version + if ($main::opt_help) { + print usage_string(); + exit(0); + } + + if ($main::opt_version) { + print version_string(); + exit(0); + } + + # Disassembly/listing/symbols mode requires address-level info + if ($main::opt_disasm || $main::opt_list || $main::opt_symbols) { + $main::opt_functions = 0; + $main::opt_lines = 0; + $main::opt_addresses = 1; + $main::opt_files = 0; + } + + # Check heap-profiling flags + if ($main::opt_inuse_space + + $main::opt_inuse_objects + + $main::opt_alloc_space + + $main::opt_alloc_objects > 1) { + usage("Specify at most on of --inuse/--alloc options"); + } + + # Check output granularities + my $grains = + $main::opt_functions + + $main::opt_lines + + $main::opt_addresses + + $main::opt_files + + 0; + if ($grains > 1) { + usage("Only specify one output granularity option"); + } + if ($grains == 0) { + $main::opt_functions = 1; + } + + # Check output modes + my $modes = + $main::opt_text + + $main::opt_callgrind + + ($main::opt_list eq '' ? 0 : 1) + + ($main::opt_disasm eq '' ? 0 : 1) + + ($main::opt_symbols == 0 ? 0 : 1) + + $main::opt_gv + + $main::opt_evince + + $main::opt_web + + $main::opt_dot + + $main::opt_ps + + $main::opt_pdf + + $main::opt_svg + + $main::opt_gif + + $main::opt_raw + + $main::opt_collapsed + + $main::opt_interactive + + 0; + if ($modes > 1) { + usage("Only specify one output mode"); + } + if ($modes == 0) { + if (-t STDOUT) { # If STDOUT is a tty, activate interactive mode + $main::opt_interactive = 1; + } else { + $main::opt_text = 1; + } + } + + if ($main::opt_test) { + RunUnitTests(); + # Should not return + exit(1); + } + + # Binary name and profile arguments list + $main::prog = ""; + @main::pfile_args = (); + + # Remote profiling without a binary (using $SYMBOL_PAGE instead) + if (@ARGV > 0) { + if (IsProfileURL($ARGV[0])) { + printf STDERR "Using remote profile at $ARGV[0].\n"; + $main::use_symbol_page = 1; + } elsif (IsSymbolizedProfileFile($ARGV[0])) { + $main::use_symbolized_profile = 1; + $main::prog = $UNKNOWN_BINARY; # will be set later from the profile file + } + } + + if ($main::use_symbol_page || $main::use_symbolized_profile) { + # We don't need a binary! + my %disabled = ('--lines' => $main::opt_lines, + '--disasm' => $main::opt_disasm); + for my $option (keys %disabled) { + usage("$option cannot be used without a binary") if $disabled{$option}; + } + # Set $main::prog later... + scalar(@ARGV) || usage("Did not specify profile file"); + } elsif ($main::opt_symbols) { + # --symbols needs a binary-name (to run nm on, etc) but not profiles + $main::prog = shift(@ARGV) || usage("Did not specify program"); + } else { + $main::prog = shift(@ARGV) || usage("Did not specify program"); + scalar(@ARGV) || usage("Did not specify profile file"); + } + + # Parse profile file/location arguments + foreach my $farg (@ARGV) { + if ($farg =~ m/(.*)\@([0-9]+)(|\/.*)$/ ) { + my $machine = $1; + my $num_machines = $2; + my $path = $3; + for (my $i = 0; $i < $num_machines; $i++) { + unshift(@main::pfile_args, "$i.$machine$path"); + } + } else { + unshift(@main::pfile_args, $farg); + } + } + + if ($main::use_symbol_page) { + unless (IsProfileURL($main::pfile_args[0])) { + error("The first profile should be a remote form to use $SYMBOL_PAGE\n"); + } + CheckSymbolPage(); + $main::prog = FetchProgramName(); + } elsif (!$main::use_symbolized_profile) { # may not need objtools! + ConfigureObjTools($main::prog) + } + + # Break the opt_lib_prefix into the prefix_list array + @prefix_list = split (',', $main::opt_lib_prefix); + + # Remove trailing / from the prefixes, in the list to prevent + # searching things like /my/path//lib/mylib.so + foreach (@prefix_list) { + s|/+$||; + } +} + +sub Main() { + Init(); + $main::collected_profile = undef; + @main::profile_files = (); + $main::op_time = time(); + + # Printing symbols is special and requires a lot less info that most. + if ($main::opt_symbols) { + PrintSymbols(*STDIN); # Get /proc/maps and symbols output from stdin + return; + } + + # Fetch all profile data + FetchDynamicProfiles(); + + # this will hold symbols that we read from the profile files + my $symbol_map = {}; + + # Read one profile, pick the last item on the list + my $data = ReadProfile($main::prog, pop(@main::profile_files)); + my $profile = $data->{profile}; + my $pcs = $data->{pcs}; + my $libs = $data->{libs}; # Info about main program and shared libraries + $symbol_map = MergeSymbols($symbol_map, $data->{symbols}); + + # Add additional profiles, if available. + if (scalar(@main::profile_files) > 0) { + foreach my $pname (@main::profile_files) { + my $data2 = ReadProfile($main::prog, $pname); + $profile = AddProfile($profile, $data2->{profile}); + $pcs = AddPcs($pcs, $data2->{pcs}); + $symbol_map = MergeSymbols($symbol_map, $data2->{symbols}); + } + } + + # Subtract base from profile, if specified + if ($main::opt_base ne '') { + my $base = ReadProfile($main::prog, $main::opt_base); + $profile = SubtractProfile($profile, $base->{profile}); + $pcs = AddPcs($pcs, $base->{pcs}); + $symbol_map = MergeSymbols($symbol_map, $base->{symbols}); + } + + # Get total data in profile + my $total = TotalProfile($profile); + + # Collect symbols + my $symbols; + if ($main::use_symbolized_profile) { + $symbols = FetchSymbols($pcs, $symbol_map); + } elsif ($main::use_symbol_page) { + $symbols = FetchSymbols($pcs); + } else { + # TODO(csilvers): $libs uses the /proc/self/maps data from profile1, + # which may differ from the data from subsequent profiles, especially + # if they were run on different machines. Use appropriate libs for + # each pc somehow. + $symbols = ExtractSymbols($libs, $pcs); + } + + # Remove uniniteresting stack items + $profile = RemoveUninterestingFrames($symbols, $profile); + + # Focus? + if ($main::opt_focus ne '') { + $profile = FocusProfile($symbols, $profile, $main::opt_focus); + } + + # Ignore? + if ($main::opt_ignore ne '') { + $profile = IgnoreProfile($symbols, $profile, $main::opt_ignore); + } + + my $calls = ExtractCalls($symbols, $profile); + + # Reduce profiles to required output granularity, and also clean + # each stack trace so a given entry exists at most once. + my $reduced = ReduceProfile($symbols, $profile); + + # Get derived profiles + my $flat = FlatProfile($reduced); + my $cumulative = CumulativeProfile($reduced); + + # Print + if (!$main::opt_interactive) { + if ($main::opt_disasm) { + PrintDisassembly($libs, $flat, $cumulative, $main::opt_disasm); + } elsif ($main::opt_list) { + PrintListing($total, $libs, $flat, $cumulative, $main::opt_list, 0); + } elsif ($main::opt_text) { + # Make sure the output is empty when have nothing to report + # (only matters when --heapcheck is given but we must be + # compatible with old branches that did not pass --heapcheck always): + if ($total != 0) { + printf("Total: %s %s\n", Unparse($total), Units()); + } + if ($main::opt_stacks) { + printf("Stacks:\n\n"); + PrintStacksForText($symbols, $profile); + } + PrintText($symbols, $flat, $cumulative, -1); + } elsif ($main::opt_raw) { + PrintSymbolizedProfile($symbols, $profile, $main::prog); + } elsif ($main::opt_collapsed) { + PrintCollapsedStacks($symbols, $profile); + } elsif ($main::opt_callgrind) { + PrintCallgrind($calls); + } else { + if (PrintDot($main::prog, $symbols, $profile, $flat, $cumulative, $total)) { + if ($main::opt_gv) { + RunGV(TempName($main::next_tmpfile, "ps"), ""); + } elsif ($main::opt_evince) { + RunEvince(TempName($main::next_tmpfile, "pdf"), ""); + } elsif ($main::opt_web) { + my $tmp = TempName($main::next_tmpfile, "svg"); + RunWeb($tmp); + # The command we run might hand the file name off + # to an already running browser instance and then exit. + # Normally, we'd remove $tmp on exit (right now), + # but fork a child to remove $tmp a little later, so that the + # browser has time to load it first. + delete $main::tempnames{$tmp}; + if (fork() == 0) { + sleep 5; + unlink($tmp); + exit(0); + } + } + } else { + cleanup(); + exit(1); + } + } + } else { + InteractiveMode($profile, $symbols, $libs, $total); + } + + cleanup(); + exit(0); +} + +##### Entry Point ##### + +Main(); + +# Temporary code to detect if we're running on a Goobuntu system. +# These systems don't have the right stuff installed for the special +# Readline libraries to work, so as a temporary workaround, we default +# to using the normal stdio code, rather than the fancier readline-based +# code +sub ReadlineMightFail { + if (-e '/lib/libtermcap.so.2') { + return 0; # libtermcap exists, so readline should be okay + } else { + return 1; + } +} + +sub RunGV { + my $fname = shift; + my $bg = shift; # "" or " &" if we should run in background + if (!system(ShellEscape(@GV, "--version") . " >$dev_null 2>&1")) { + # Options using double dash are supported by this gv version. + # Also, turn on noantialias to better handle bug in gv for + # postscript files with large dimensions. + # TODO: Maybe we should not pass the --noantialias flag + # if the gv version is known to work properly without the flag. + system(ShellEscape(@GV, "--scale=$main::opt_scale", "--noantialias", $fname) + . $bg); + } else { + # Old gv version - only supports options that use single dash. + print STDERR ShellEscape(@GV, "-scale", $main::opt_scale) . "\n"; + system(ShellEscape(@GV, "-scale", "$main::opt_scale", $fname) . $bg); + } +} + +sub RunEvince { + my $fname = shift; + my $bg = shift; # "" or " &" if we should run in background + system(ShellEscape(@EVINCE, $fname) . $bg); +} + +sub RunWeb { + my $fname = shift; + print STDERR "Loading web page file:///$fname\n"; + + if (`uname` =~ /Darwin/) { + # OS X: open will use standard preference for SVG files. + system("/usr/bin/open", $fname); + return; + } + + if (`uname` =~ /MINGW/) { + # Windows(MinGW): open will use standard preference for SVG files. + system("cmd", "/c", "start", $fname); + return; + } + + # Some kind of Unix; try generic symlinks, then specific browsers. + # (Stop once we find one.) + # Works best if the browser is already running. + my @alt = ( + "/etc/alternatives/gnome-www-browser", + "/etc/alternatives/x-www-browser", + "google-chrome", + "firefox", + ); + foreach my $b (@alt) { + if (system($b, $fname) == 0) { + return; + } + } + + print STDERR "Could not load web browser.\n"; +} + +sub RunKcachegrind { + my $fname = shift; + my $bg = shift; # "" or " &" if we should run in background + print STDERR "Starting '@KCACHEGRIND " . $fname . $bg . "'\n"; + system(ShellEscape(@KCACHEGRIND, $fname) . $bg); +} + + +##### Interactive helper routines ##### + +sub InteractiveMode { + $| = 1; # Make output unbuffered for interactive mode + my ($orig_profile, $symbols, $libs, $total) = @_; + + print STDERR "Welcome to pprof! For help, type 'help'.\n"; + + # Use ReadLine if it's installed and input comes from a console. + if ( -t STDIN && + !ReadlineMightFail() && + defined(eval {require Term::ReadLine}) ) { + my $term = new Term::ReadLine 'pprof'; + while ( defined ($_ = $term->readline('(pprof) '))) { + $term->addhistory($_) if /\S/; + if (!InteractiveCommand($orig_profile, $symbols, $libs, $total, $_)) { + last; # exit when we get an interactive command to quit + } + } + } else { # don't have readline + while (1) { + print STDERR "(pprof) "; + $_ = <STDIN>; + last if ! defined $_ ; + s/\r//g; # turn windows-looking lines into unix-looking lines + + # Save some flags that might be reset by InteractiveCommand() + my $save_opt_lines = $main::opt_lines; + + if (!InteractiveCommand($orig_profile, $symbols, $libs, $total, $_)) { + last; # exit when we get an interactive command to quit + } + + # Restore flags + $main::opt_lines = $save_opt_lines; + } + } +} + +# Takes two args: orig profile, and command to run. +# Returns 1 if we should keep going, or 0 if we were asked to quit +sub InteractiveCommand { + my($orig_profile, $symbols, $libs, $total, $command) = @_; + $_ = $command; # just to make future m//'s easier + if (!defined($_)) { + print STDERR "\n"; + return 0; + } + if (m/^\s*quit/) { + return 0; + } + if (m/^\s*help/) { + InteractiveHelpMessage(); + return 1; + } + # Clear all the mode options -- mode is controlled by "$command" + $main::opt_text = 0; + $main::opt_callgrind = 0; + $main::opt_disasm = 0; + $main::opt_list = 0; + $main::opt_gv = 0; + $main::opt_evince = 0; + $main::opt_cum = 0; + + if (m/^\s*(text|top)(\d*)\s*(.*)/) { + $main::opt_text = 1; + + my $line_limit = ($2 ne "") ? int($2) : 10; + + my $routine; + my $ignore; + ($routine, $ignore) = ParseInteractiveArgs($3); + + my $profile = ProcessProfile($total, $orig_profile, $symbols, "", $ignore); + my $reduced = ReduceProfile($symbols, $profile); + + # Get derived profiles + my $flat = FlatProfile($reduced); + my $cumulative = CumulativeProfile($reduced); + + PrintText($symbols, $flat, $cumulative, $line_limit); + return 1; + } + if (m/^\s*callgrind\s*([^ \n]*)/) { + $main::opt_callgrind = 1; + + # Get derived profiles + my $calls = ExtractCalls($symbols, $orig_profile); + my $filename = $1; + if ( $1 eq '' ) { + $filename = TempName($main::next_tmpfile, "callgrind"); + } + PrintCallgrind($calls, $filename); + if ( $1 eq '' ) { + RunKcachegrind($filename, " & "); + $main::next_tmpfile++; + } + + return 1; + } + if (m/^\s*(web)?list\s*(.+)/) { + my $html = (defined($1) && ($1 eq "web")); + $main::opt_list = 1; + + my $routine; + my $ignore; + ($routine, $ignore) = ParseInteractiveArgs($2); + + my $profile = ProcessProfile($total, $orig_profile, $symbols, "", $ignore); + my $reduced = ReduceProfile($symbols, $profile); + + # Get derived profiles + my $flat = FlatProfile($reduced); + my $cumulative = CumulativeProfile($reduced); + + PrintListing($total, $libs, $flat, $cumulative, $routine, $html); + return 1; + } + if (m/^\s*disasm\s*(.+)/) { + $main::opt_disasm = 1; + + my $routine; + my $ignore; + ($routine, $ignore) = ParseInteractiveArgs($1); + + # Process current profile to account for various settings + my $profile = ProcessProfile($total, $orig_profile, $symbols, "", $ignore); + my $reduced = ReduceProfile($symbols, $profile); + + # Get derived profiles + my $flat = FlatProfile($reduced); + my $cumulative = CumulativeProfile($reduced); + + PrintDisassembly($libs, $flat, $cumulative, $routine); + return 1; + } + if (m/^\s*(gv|web|evince)\s*(.*)/) { + $main::opt_gv = 0; + $main::opt_evince = 0; + $main::opt_web = 0; + if ($1 eq "gv") { + $main::opt_gv = 1; + } elsif ($1 eq "evince") { + $main::opt_evince = 1; + } elsif ($1 eq "web") { + $main::opt_web = 1; + } + + my $focus; + my $ignore; + ($focus, $ignore) = ParseInteractiveArgs($2); + + # Process current profile to account for various settings + my $profile = ProcessProfile($total, $orig_profile, $symbols, + $focus, $ignore); + my $reduced = ReduceProfile($symbols, $profile); + + # Get derived profiles + my $flat = FlatProfile($reduced); + my $cumulative = CumulativeProfile($reduced); + + if (PrintDot($main::prog, $symbols, $profile, $flat, $cumulative, $total)) { + if ($main::opt_gv) { + RunGV(TempName($main::next_tmpfile, "ps"), " &"); + } elsif ($main::opt_evince) { + RunEvince(TempName($main::next_tmpfile, "pdf"), " &"); + } elsif ($main::opt_web) { + RunWeb(TempName($main::next_tmpfile, "svg")); + } + $main::next_tmpfile++; + } + return 1; + } + if (m/^\s*$/) { + return 1; + } + print STDERR "Unknown command: try 'help'.\n"; + return 1; +} + + +sub ProcessProfile { + my $total_count = shift; + my $orig_profile = shift; + my $symbols = shift; + my $focus = shift; + my $ignore = shift; + + # Process current profile to account for various settings + my $profile = $orig_profile; + printf("Total: %s %s\n", Unparse($total_count), Units()); + if ($focus ne '') { + $profile = FocusProfile($symbols, $profile, $focus); + my $focus_count = TotalProfile($profile); + printf("After focusing on '%s': %s %s of %s (%0.1f%%)\n", + $focus, + Unparse($focus_count), Units(), + Unparse($total_count), ($focus_count*100.0) / $total_count); + } + if ($ignore ne '') { + $profile = IgnoreProfile($symbols, $profile, $ignore); + my $ignore_count = TotalProfile($profile); + printf("After ignoring '%s': %s %s of %s (%0.1f%%)\n", + $ignore, + Unparse($ignore_count), Units(), + Unparse($total_count), + ($ignore_count*100.0) / $total_count); + } + + return $profile; +} + +sub InteractiveHelpMessage { + print STDERR <<ENDOFHELP; +Interactive pprof mode + +Commands: + gv + gv [focus] [-ignore1] [-ignore2] + Show graphical hierarchical display of current profile. Without + any arguments, shows all samples in the profile. With the optional + "focus" argument, restricts the samples shown to just those where + the "focus" regular expression matches a routine name on the stack + trace. + + web + web [focus] [-ignore1] [-ignore2] + Like GV, but displays profile in your web browser instead of using + Ghostview. Works best if your web browser is already running. + To change the browser that gets used: + On Linux, set the /etc/alternatives/gnome-www-browser symlink. + On OS X, change the Finder association for SVG files. + + list [routine_regexp] [-ignore1] [-ignore2] + Show source listing of routines whose names match "routine_regexp" + + weblist [routine_regexp] [-ignore1] [-ignore2] + Displays a source listing of routines whose names match "routine_regexp" + in a web browser. You can click on source lines to view the + corresponding disassembly. + + top [--cum] [-ignore1] [-ignore2] + top20 [--cum] [-ignore1] [-ignore2] + top37 [--cum] [-ignore1] [-ignore2] + Show top lines ordered by flat profile count, or cumulative count + if --cum is specified. If a number is present after 'top', the + top K routines will be shown (defaults to showing the top 10) + + disasm [routine_regexp] [-ignore1] [-ignore2] + Show disassembly of routines whose names match "routine_regexp", + annotated with sample counts. + + callgrind + callgrind [filename] + Generates callgrind file. If no filename is given, kcachegrind is called. + + help - This listing + quit or ^D - End pprof + +For commands that accept optional -ignore tags, samples where any routine in +the stack trace matches the regular expression in any of the -ignore +parameters will be ignored. + +Further pprof details are available at this location (or one similar): + + /usr/doc/gperftools-$PPROF_VERSION/cpu_profiler.html + /usr/doc/gperftools-$PPROF_VERSION/heap_profiler.html + +ENDOFHELP +} +sub ParseInteractiveArgs { + my $args = shift; + my $focus = ""; + my $ignore = ""; + my @x = split(/ +/, $args); + foreach $a (@x) { + if ($a =~ m/^(--|-)lines$/) { + $main::opt_lines = 1; + } elsif ($a =~ m/^(--|-)cum$/) { + $main::opt_cum = 1; + } elsif ($a =~ m/^-(.*)/) { + $ignore .= (($ignore ne "") ? "|" : "" ) . $1; + } else { + $focus .= (($focus ne "") ? "|" : "" ) . $a; + } + } + if ($ignore ne "") { + print STDERR "Ignoring samples in call stacks that match '$ignore'\n"; + } + return ($focus, $ignore); +} + +##### Output code ##### + +sub TempName { + my $fnum = shift; + my $ext = shift; + my $file = "$main::tmpfile_ps.$fnum.$ext"; + $main::tempnames{$file} = 1; + return $file; +} + +# Print profile data in packed binary format (64-bit) to standard out +sub PrintProfileData { + my $profile = shift; + my $big_endian = pack("L", 1) eq pack("N", 1); + # print header (64-bit style) + # (zero) (header-size) (version) (sample-period) (zero) + if ($big_endian) { + print pack('L*', 0, 0, 0, 3, 0, 0, 0, 1, 0, 0); + } + else { + print pack('L*', 0, 0, 3, 0, 0, 0, 1, 0, 0, 0); + } + + foreach my $k (keys(%{$profile})) { + my $count = $profile->{$k}; + my @addrs = split(/\n/, $k); + if ($#addrs >= 0) { + my $depth = $#addrs + 1; + # int(foo / 2**32) is the only reliable way to get rid of bottom + # 32 bits on both 32- and 64-bit systems. + if ($big_endian) { + print pack('L*', int($count / 2**32), $count & 0xFFFFFFFF); + print pack('L*', int($depth / 2**32), $depth & 0xFFFFFFFF); + } + else { + print pack('L*', $count & 0xFFFFFFFF, int($count / 2**32)); + print pack('L*', $depth & 0xFFFFFFFF, int($depth / 2**32)); + } + + foreach my $full_addr (@addrs) { + my $addr = $full_addr; + $addr =~ s/0x0*//; # strip off leading 0x, zeroes + if (length($addr) > 16) { + print STDERR "Invalid address in profile: $full_addr\n"; + next; + } + my $low_addr = substr($addr, -8); # get last 8 hex chars + my $high_addr = substr($addr, -16, 8); # get up to 8 more hex chars + if ($big_endian) { + print pack('L*', hex('0x' . $high_addr), hex('0x' . $low_addr)); + } + else { + print pack('L*', hex('0x' . $low_addr), hex('0x' . $high_addr)); + } + } + } + } +} + +# Print symbols and profile data +sub PrintSymbolizedProfile { + my $symbols = shift; + my $profile = shift; + my $prog = shift; + + $SYMBOL_PAGE =~ m,[^/]+$,; # matches everything after the last slash + my $symbol_marker = $&; + + print '--- ', $symbol_marker, "\n"; + if (defined($prog)) { + print 'binary=', $prog, "\n"; + } + while (my ($pc, $name) = each(%{$symbols})) { + my $sep = ' '; + print '0x', $pc; + # We have a list of function names, which include the inlined + # calls. They are separated (and terminated) by --, which is + # illegal in function names. + for (my $j = 2; $j <= $#{$name}; $j += 3) { + print $sep, $name->[$j]; + $sep = '--'; + } + print "\n"; + } + print '---', "\n"; + + $PROFILE_PAGE =~ m,[^/]+$,; # matches everything after the last slash + my $profile_marker = $&; + print '--- ', $profile_marker, "\n"; + if (defined($main::collected_profile)) { + # if used with remote fetch, simply dump the collected profile to output. + open(SRC, "<$main::collected_profile"); + while (<SRC>) { + print $_; + } + close(SRC); + } else { + # dump a cpu-format profile to standard out + PrintProfileData($profile); + } +} + +# Print text output +sub PrintText { + my $symbols = shift; + my $flat = shift; + my $cumulative = shift; + my $line_limit = shift; + + if ($main::opt_stacks && @stackTraces) { + foreach (sort { (split " ", $b)[1] <=> (split " ", $a)[1]; } @stackTraces) { + print "$_\n" if $main::opt_debug; + my ($n1, $s1, $n2, $s2, @addrs) = split; + print "Leak of $s1 bytes in $n1 objects allocated from:\n"; + foreach my $pcstr (@addrs) { + $pcstr =~ s/^0x//; + my $sym; + if (! defined $symbols->{$pcstr}) { + $sym = "unknown"; + } else { + $sym = "$symbols->{$pcstr}[0] $symbols->{$pcstr}[1]"; + } + print "\t@ $pcstr $sym\n"; + } + } + print "\n"; + } + + my $total = TotalProfile($flat); + + # Which profile to sort by? + my $s = $main::opt_cum ? $cumulative : $flat; + + my $running_sum = 0; + my $lines = 0; + foreach my $k (sort { GetEntry($s, $b) <=> GetEntry($s, $a) || $a cmp $b } + keys(%{$cumulative})) { + my $f = GetEntry($flat, $k); + my $c = GetEntry($cumulative, $k); + $running_sum += $f; + + my $sym = $k; + if (exists($symbols->{$k})) { + $sym = $symbols->{$k}->[0] . " " . $symbols->{$k}->[1]; + if ($main::opt_addresses) { + $sym = $k . " " . $sym; + } + } + + if ($f != 0 || $c != 0) { + printf("%8s %6s %6s %8s %6s %s\n", + Unparse($f), + Percent($f, $total), + Percent($running_sum, $total), + Unparse($c), + Percent($c, $total), + $sym); + } + $lines++; + last if ($line_limit >= 0 && $lines >= $line_limit); + } +} + +# Callgrind format has a compression for repeated function and file +# names. You show the name the first time, and just use its number +# subsequently. This can cut down the file to about a third or a +# quarter of its uncompressed size. $key and $val are the key/value +# pair that would normally be printed by callgrind; $map is a map from +# value to number. +sub CompressedCGName { + my($key, $val, $map) = @_; + my $idx = $map->{$val}; + # For very short keys, providing an index hurts rather than helps. + if (length($val) <= 3) { + return "$key=$val\n"; + } elsif (defined($idx)) { + return "$key=($idx)\n"; + } else { + # scalar(keys $map) gives the number of items in the map. + $idx = scalar(keys(%{$map})) + 1; + $map->{$val} = $idx; + return "$key=($idx) $val\n"; + } +} + +# Print the call graph in a way that's suiteable for callgrind. +sub PrintCallgrind { + my $calls = shift; + my $filename; + my %filename_to_index_map; + my %fnname_to_index_map; + + if ($main::opt_interactive) { + $filename = shift; + print STDERR "Writing callgrind file to '$filename'.\n" + } else { + $filename = "&STDOUT"; + } + open(CG, ">$filename"); + print CG ("events: Hits\n\n"); + foreach my $call ( map { $_->[0] } + sort { $a->[1] cmp $b ->[1] || + $a->[2] <=> $b->[2] } + map { /([^:]+):(\d+):([^ ]+)( -> ([^:]+):(\d+):(.+))?/; + [$_, $1, $2] } + keys %$calls ) { + my $count = int($calls->{$call}); + $call =~ /([^:]+):(\d+):([^ ]+)( -> ([^:]+):(\d+):(.+))?/; + my ( $caller_file, $caller_line, $caller_function, + $callee_file, $callee_line, $callee_function ) = + ( $1, $2, $3, $5, $6, $7 ); + + # TODO(csilvers): for better compression, collect all the + # caller/callee_files and functions first, before printing + # anything, and only compress those referenced more than once. + print CG CompressedCGName("fl", $caller_file, \%filename_to_index_map); + print CG CompressedCGName("fn", $caller_function, \%fnname_to_index_map); + if (defined $6) { + print CG CompressedCGName("cfl", $callee_file, \%filename_to_index_map); + print CG CompressedCGName("cfn", $callee_function, \%fnname_to_index_map); + print CG ("calls=$count $callee_line\n"); + } + print CG ("$caller_line $count\n\n"); + } +} + +# Print disassembly for all all routines that match $main::opt_disasm +sub PrintDisassembly { + my $libs = shift; + my $flat = shift; + my $cumulative = shift; + my $disasm_opts = shift; + + my $total = TotalProfile($flat); + + foreach my $lib (@{$libs}) { + my $symbol_table = GetProcedureBoundaries($lib->[0], $disasm_opts); + my $offset = AddressSub($lib->[1], $lib->[3]); + foreach my $routine (sort ByName keys(%{$symbol_table})) { + my $start_addr = $symbol_table->{$routine}->[0]; + my $end_addr = $symbol_table->{$routine}->[1]; + # See if there are any samples in this routine + my $length = hex(AddressSub($end_addr, $start_addr)); + my $addr = AddressAdd($start_addr, $offset); + for (my $i = 0; $i < $length; $i++) { + if (defined($cumulative->{$addr})) { + PrintDisassembledFunction($lib->[0], $offset, + $routine, $flat, $cumulative, + $start_addr, $end_addr, $total); + last; + } + $addr = AddressInc($addr); + } + } + } +} + +# Return reference to array of tuples of the form: +# [start_address, filename, linenumber, instruction, limit_address] +# E.g., +# ["0x806c43d", "/foo/bar.cc", 131, "ret", "0x806c440"] +sub Disassemble { + my $prog = shift; + my $offset = shift; + my $start_addr = shift; + my $end_addr = shift; + + my $objdump = $obj_tool_map{"objdump"}; + my $cmd = ShellEscape($objdump, "-C", "-d", "-l", "--no-show-raw-insn", + "--start-address=0x$start_addr", + "--stop-address=0x$end_addr", $prog); + open(OBJDUMP, "$cmd |") || error("$cmd: $!\n"); + my @result = (); + my $filename = ""; + my $linenumber = -1; + my $last = ["", "", "", ""]; + while (<OBJDUMP>) { + s/\r//g; # turn windows-looking lines into unix-looking lines + chop; + if (m|\s*([^:\s]+):(\d+)\s*$|) { + # Location line of the form: + # <filename>:<linenumber> + $filename = $1; + $linenumber = $2; + } elsif (m/^ +([0-9a-f]+):\s*(.*)/) { + # Disassembly line -- zero-extend address to full length + my $addr = HexExtend($1); + my $k = AddressAdd($addr, $offset); + $last->[4] = $k; # Store ending address for previous instruction + $last = [$k, $filename, $linenumber, $2, $end_addr]; + push(@result, $last); + } + } + close(OBJDUMP); + return @result; +} + +# The input file should contain lines of the form /proc/maps-like +# output (same format as expected from the profiles) or that looks +# like hex addresses (like "0xDEADBEEF"). We will parse all +# /proc/maps output, and for all the hex addresses, we will output +# "short" symbol names, one per line, in the same order as the input. +sub PrintSymbols { + my $maps_and_symbols_file = shift; + + # ParseLibraries expects pcs to be in a set. Fine by us... + my @pclist = (); # pcs in sorted order + my $pcs = {}; + my $map = ""; + foreach my $line (<$maps_and_symbols_file>) { + $line =~ s/\r//g; # turn windows-looking lines into unix-looking lines + if ($line =~ /\b(0x[0-9a-f]+)\b/i) { + push(@pclist, HexExtend($1)); + $pcs->{$pclist[-1]} = 1; + } else { + $map .= $line; + } + } + + my $libs = ParseLibraries($main::prog, $map, $pcs); + my $symbols = ExtractSymbols($libs, $pcs); + + foreach my $pc (@pclist) { + # ->[0] is the shortname, ->[2] is the full name + print(($symbols->{$pc}->[0] || "??") . "\n"); + } +} + + +# For sorting functions by name +sub ByName { + return ShortFunctionName($a) cmp ShortFunctionName($b); +} + +# Print source-listing for all all routines that match $list_opts +sub PrintListing { + my $total = shift; + my $libs = shift; + my $flat = shift; + my $cumulative = shift; + my $list_opts = shift; + my $html = shift; + + my $output = \*STDOUT; + my $fname = ""; + + if ($html) { + # Arrange to write the output to a temporary file + $fname = TempName($main::next_tmpfile, "html"); + $main::next_tmpfile++; + if (!open(TEMP, ">$fname")) { + print STDERR "$fname: $!\n"; + return; + } + $output = \*TEMP; + print $output HtmlListingHeader(); + printf $output ("<div class=\"legend\">%s<br>Total: %s %s</div>\n", + $main::prog, Unparse($total), Units()); + } + + my $listed = 0; + foreach my $lib (@{$libs}) { + my $symbol_table = GetProcedureBoundaries($lib->[0], $list_opts); + my $offset = AddressSub($lib->[1], $lib->[3]); + foreach my $routine (sort ByName keys(%{$symbol_table})) { + # Print if there are any samples in this routine + my $start_addr = $symbol_table->{$routine}->[0]; + my $end_addr = $symbol_table->{$routine}->[1]; + my $length = hex(AddressSub($end_addr, $start_addr)); + my $addr = AddressAdd($start_addr, $offset); + for (my $i = 0; $i < $length; $i++) { + if (defined($cumulative->{$addr})) { + $listed += PrintSource( + $lib->[0], $offset, + $routine, $flat, $cumulative, + $start_addr, $end_addr, + $html, + $output); + last; + } + $addr = AddressInc($addr); + } + } + } + + if ($html) { + if ($listed > 0) { + print $output HtmlListingFooter(); + close($output); + RunWeb($fname); + } else { + close($output); + unlink($fname); + } + } +} + +sub HtmlListingHeader { + return <<'EOF'; +<DOCTYPE html> +<html> +<head> +<title>Pprof listing</title> +<style type="text/css"> +body { + font-family: sans-serif; +} +h1 { + font-size: 1.5em; + margin-bottom: 4px; +} +.legend { + font-size: 1.25em; +} +.line { + color: #aaaaaa; +} +.nop { + color: #aaaaaa; +} +.unimportant { + color: #cccccc; +} +.disasmloc { + color: #000000; +} +.deadsrc { + cursor: pointer; +} +.deadsrc:hover { + background-color: #eeeeee; +} +.livesrc { + color: #0000ff; + cursor: pointer; +} +.livesrc:hover { + background-color: #eeeeee; +} +.asm { + color: #008800; + display: none; +} +</style> +<script type="text/javascript"> +function pprof_toggle_asm(e) { + var target; + if (!e) e = window.event; + if (e.target) target = e.target; + else if (e.srcElement) target = e.srcElement; + + if (target) { + var asm = target.nextSibling; + if (asm && asm.className == "asm") { + asm.style.display = (asm.style.display == "block" ? "" : "block"); + e.preventDefault(); + return false; + } + } +} +</script> +</head> +<body> +EOF +} + +sub HtmlListingFooter { + return <<'EOF'; +</body> +</html> +EOF +} + +sub HtmlEscape { + my $text = shift; + $text =~ s/&/&/g; + $text =~ s/</</g; + $text =~ s/>/>/g; + return $text; +} + +# Returns the indentation of the line, if it has any non-whitespace +# characters. Otherwise, returns -1. +sub Indentation { + my $line = shift; + if (m/^(\s*)\S/) { + return length($1); + } else { + return -1; + } +} + +# If the symbol table contains inlining info, Disassemble() may tag an +# instruction with a location inside an inlined function. But for +# source listings, we prefer to use the location in the function we +# are listing. So use MapToSymbols() to fetch full location +# information for each instruction and then pick out the first +# location from a location list (location list contains callers before +# callees in case of inlining). +# +# After this routine has run, each entry in $instructions contains: +# [0] start address +# [1] filename for function we are listing +# [2] line number for function we are listing +# [3] disassembly +# [4] limit address +# [5] most specific filename (may be different from [1] due to inlining) +# [6] most specific line number (may be different from [2] due to inlining) +sub GetTopLevelLineNumbers { + my ($lib, $offset, $instructions) = @_; + my $pcs = []; + for (my $i = 0; $i <= $#{$instructions}; $i++) { + push(@{$pcs}, $instructions->[$i]->[0]); + } + my $symbols = {}; + MapToSymbols($lib, $offset, $pcs, $symbols); + for (my $i = 0; $i <= $#{$instructions}; $i++) { + my $e = $instructions->[$i]; + push(@{$e}, $e->[1]); + push(@{$e}, $e->[2]); + my $addr = $e->[0]; + my $sym = $symbols->{$addr}; + if (defined($sym)) { + if ($#{$sym} >= 2 && $sym->[1] =~ m/^(.*):(\d+)$/) { + $e->[1] = $1; # File name + $e->[2] = $2; # Line number + } + } + } +} + +# Print source-listing for one routine +sub PrintSource { + my $prog = shift; + my $offset = shift; + my $routine = shift; + my $flat = shift; + my $cumulative = shift; + my $start_addr = shift; + my $end_addr = shift; + my $html = shift; + my $output = shift; + + # Disassemble all instructions (just to get line numbers) + my @instructions = Disassemble($prog, $offset, $start_addr, $end_addr); + GetTopLevelLineNumbers($prog, $offset, \@instructions); + + # Hack 1: assume that the first source file encountered in the + # disassembly contains the routine + my $filename = undef; + for (my $i = 0; $i <= $#instructions; $i++) { + if ($instructions[$i]->[2] >= 0) { + $filename = $instructions[$i]->[1]; + last; + } + } + if (!defined($filename)) { + print STDERR "no filename found in $routine\n"; + return 0; + } + + # Hack 2: assume that the largest line number from $filename is the + # end of the procedure. This is typically safe since if P1 contains + # an inlined call to P2, then P2 usually occurs earlier in the + # source file. If this does not work, we might have to compute a + # density profile or just print all regions we find. + my $lastline = 0; + for (my $i = 0; $i <= $#instructions; $i++) { + my $f = $instructions[$i]->[1]; + my $l = $instructions[$i]->[2]; + if (($f eq $filename) && ($l > $lastline)) { + $lastline = $l; + } + } + + # Hack 3: assume the first source location from "filename" is the start of + # the source code. + my $firstline = 1; + for (my $i = 0; $i <= $#instructions; $i++) { + if ($instructions[$i]->[1] eq $filename) { + $firstline = $instructions[$i]->[2]; + last; + } + } + + # Hack 4: Extend last line forward until its indentation is less than + # the indentation we saw on $firstline + my $oldlastline = $lastline; + { + if (!open(FILE, "<$filename")) { + print STDERR "$filename: $!\n"; + return 0; + } + my $l = 0; + my $first_indentation = -1; + while (<FILE>) { + s/\r//g; # turn windows-looking lines into unix-looking lines + $l++; + my $indent = Indentation($_); + if ($l >= $firstline) { + if ($first_indentation < 0 && $indent >= 0) { + $first_indentation = $indent; + last if ($first_indentation == 0); + } + } + if ($l >= $lastline && $indent >= 0) { + if ($indent >= $first_indentation) { + $lastline = $l+1; + } else { + last; + } + } + } + close(FILE); + } + + # Assign all samples to the range $firstline,$lastline, + # Hack 4: If an instruction does not occur in the range, its samples + # are moved to the next instruction that occurs in the range. + my $samples1 = {}; # Map from line number to flat count + my $samples2 = {}; # Map from line number to cumulative count + my $running1 = 0; # Unassigned flat counts + my $running2 = 0; # Unassigned cumulative counts + my $total1 = 0; # Total flat counts + my $total2 = 0; # Total cumulative counts + my %disasm = (); # Map from line number to disassembly + my $running_disasm = ""; # Unassigned disassembly + my $skip_marker = "---\n"; + if ($html) { + $skip_marker = ""; + for (my $l = $firstline; $l <= $lastline; $l++) { + $disasm{$l} = ""; + } + } + my $last_dis_filename = ''; + my $last_dis_linenum = -1; + my $last_touched_line = -1; # To detect gaps in disassembly for a line + foreach my $e (@instructions) { + # Add up counts for all address that fall inside this instruction + my $c1 = 0; + my $c2 = 0; + for (my $a = $e->[0]; $a lt $e->[4]; $a = AddressInc($a)) { + $c1 += GetEntry($flat, $a); + $c2 += GetEntry($cumulative, $a); + } + + if ($html) { + my $dis = sprintf(" %6s %6s \t\t%8s: %s ", + HtmlPrintNumber($c1), + HtmlPrintNumber($c2), + UnparseAddress($offset, $e->[0]), + CleanDisassembly($e->[3])); + + # Append the most specific source line associated with this instruction + if (length($dis) < 80) { $dis .= (' ' x (80 - length($dis))) }; + $dis = HtmlEscape($dis); + my $f = $e->[5]; + my $l = $e->[6]; + if ($f ne $last_dis_filename) { + $dis .= sprintf("<span class=disasmloc>%s:%d</span>", + HtmlEscape(CleanFileName($f)), $l); + } elsif ($l ne $last_dis_linenum) { + # De-emphasize the unchanged file name portion + $dis .= sprintf("<span class=unimportant>%s</span>" . + "<span class=disasmloc>:%d</span>", + HtmlEscape(CleanFileName($f)), $l); + } else { + # De-emphasize the entire location + $dis .= sprintf("<span class=unimportant>%s:%d</span>", + HtmlEscape(CleanFileName($f)), $l); + } + $last_dis_filename = $f; + $last_dis_linenum = $l; + $running_disasm .= $dis; + $running_disasm .= "\n"; + } + + $running1 += $c1; + $running2 += $c2; + $total1 += $c1; + $total2 += $c2; + my $file = $e->[1]; + my $line = $e->[2]; + if (($file eq $filename) && + ($line >= $firstline) && + ($line <= $lastline)) { + # Assign all accumulated samples to this line + AddEntry($samples1, $line, $running1); + AddEntry($samples2, $line, $running2); + $running1 = 0; + $running2 = 0; + if ($html) { + if ($line != $last_touched_line && $disasm{$line} ne '') { + $disasm{$line} .= "\n"; + } + $disasm{$line} .= $running_disasm; + $running_disasm = ''; + $last_touched_line = $line; + } + } + } + + # Assign any leftover samples to $lastline + AddEntry($samples1, $lastline, $running1); + AddEntry($samples2, $lastline, $running2); + if ($html) { + if ($lastline != $last_touched_line && $disasm{$lastline} ne '') { + $disasm{$lastline} .= "\n"; + } + $disasm{$lastline} .= $running_disasm; + } + + if ($html) { + printf $output ( + "<h1>%s</h1>%s\n<pre onClick=\"pprof_toggle_asm()\">\n" . + "Total:%6s %6s (flat / cumulative %s)\n", + HtmlEscape(ShortFunctionName($routine)), + HtmlEscape(CleanFileName($filename)), + Unparse($total1), + Unparse($total2), + Units()); + } else { + printf $output ( + "ROUTINE ====================== %s in %s\n" . + "%6s %6s Total %s (flat / cumulative)\n", + ShortFunctionName($routine), + CleanFileName($filename), + Unparse($total1), + Unparse($total2), + Units()); + } + if (!open(FILE, "<$filename")) { + print STDERR "$filename: $!\n"; + return 0; + } + my $l = 0; + while (<FILE>) { + s/\r//g; # turn windows-looking lines into unix-looking lines + $l++; + if ($l >= $firstline - 5 && + (($l <= $oldlastline + 5) || ($l <= $lastline))) { + chop; + my $text = $_; + if ($l == $firstline) { print $output $skip_marker; } + my $n1 = GetEntry($samples1, $l); + my $n2 = GetEntry($samples2, $l); + if ($html) { + # Emit a span that has one of the following classes: + # livesrc -- has samples + # deadsrc -- has disassembly, but with no samples + # nop -- has no matching disasembly + # Also emit an optional span containing disassembly. + my $dis = $disasm{$l}; + my $asm = ""; + if (defined($dis) && $dis ne '') { + $asm = "<span class=\"asm\">" . $dis . "</span>"; + } + my $source_class = (($n1 + $n2 > 0) + ? "livesrc" + : (($asm ne "") ? "deadsrc" : "nop")); + printf $output ( + "<span class=\"line\">%5d</span> " . + "<span class=\"%s\">%6s %6s %s</span>%s\n", + $l, $source_class, + HtmlPrintNumber($n1), + HtmlPrintNumber($n2), + HtmlEscape($text), + $asm); + } else { + printf $output( + "%6s %6s %4d: %s\n", + UnparseAlt($n1), + UnparseAlt($n2), + $l, + $text); + } + if ($l == $lastline) { print $output $skip_marker; } + }; + } + close(FILE); + if ($html) { + print $output "</pre>\n"; + } + return 1; +} + +# Return the source line for the specified file/linenumber. +# Returns undef if not found. +sub SourceLine { + my $file = shift; + my $line = shift; + + # Look in cache + if (!defined($main::source_cache{$file})) { + if (100 < scalar keys(%main::source_cache)) { + # Clear the cache when it gets too big + $main::source_cache = (); + } + + # Read all lines from the file + if (!open(FILE, "<$file")) { + print STDERR "$file: $!\n"; + $main::source_cache{$file} = []; # Cache the negative result + return undef; + } + my $lines = []; + push(@{$lines}, ""); # So we can use 1-based line numbers as indices + while (<FILE>) { + push(@{$lines}, $_); + } + close(FILE); + + # Save the lines in the cache + $main::source_cache{$file} = $lines; + } + + my $lines = $main::source_cache{$file}; + if (($line < 0) || ($line > $#{$lines})) { + return undef; + } else { + return $lines->[$line]; + } +} + +# Print disassembly for one routine with interspersed source if available +sub PrintDisassembledFunction { + my $prog = shift; + my $offset = shift; + my $routine = shift; + my $flat = shift; + my $cumulative = shift; + my $start_addr = shift; + my $end_addr = shift; + my $total = shift; + + # Disassemble all instructions + my @instructions = Disassemble($prog, $offset, $start_addr, $end_addr); + + # Make array of counts per instruction + my @flat_count = (); + my @cum_count = (); + my $flat_total = 0; + my $cum_total = 0; + foreach my $e (@instructions) { + # Add up counts for all address that fall inside this instruction + my $c1 = 0; + my $c2 = 0; + for (my $a = $e->[0]; $a lt $e->[4]; $a = AddressInc($a)) { + $c1 += GetEntry($flat, $a); + $c2 += GetEntry($cumulative, $a); + } + push(@flat_count, $c1); + push(@cum_count, $c2); + $flat_total += $c1; + $cum_total += $c2; + } + + # Print header with total counts + printf("ROUTINE ====================== %s\n" . + "%6s %6s %s (flat, cumulative) %.1f%% of total\n", + ShortFunctionName($routine), + Unparse($flat_total), + Unparse($cum_total), + Units(), + ($cum_total * 100.0) / $total); + + # Process instructions in order + my $current_file = ""; + for (my $i = 0; $i <= $#instructions; ) { + my $e = $instructions[$i]; + + # Print the new file name whenever we switch files + if ($e->[1] ne $current_file) { + $current_file = $e->[1]; + my $fname = $current_file; + $fname =~ s|^\./||; # Trim leading "./" + + # Shorten long file names + if (length($fname) >= 58) { + $fname = "..." . substr($fname, -55); + } + printf("-------------------- %s\n", $fname); + } + + # TODO: Compute range of lines to print together to deal with + # small reorderings. + my $first_line = $e->[2]; + my $last_line = $first_line; + my %flat_sum = (); + my %cum_sum = (); + for (my $l = $first_line; $l <= $last_line; $l++) { + $flat_sum{$l} = 0; + $cum_sum{$l} = 0; + } + + # Find run of instructions for this range of source lines + my $first_inst = $i; + while (($i <= $#instructions) && + ($instructions[$i]->[2] >= $first_line) && + ($instructions[$i]->[2] <= $last_line)) { + $e = $instructions[$i]; + $flat_sum{$e->[2]} += $flat_count[$i]; + $cum_sum{$e->[2]} += $cum_count[$i]; + $i++; + } + my $last_inst = $i - 1; + + # Print source lines + for (my $l = $first_line; $l <= $last_line; $l++) { + my $line = SourceLine($current_file, $l); + if (!defined($line)) { + $line = "?\n"; + next; + } else { + $line =~ s/^\s+//; + } + printf("%6s %6s %5d: %s", + UnparseAlt($flat_sum{$l}), + UnparseAlt($cum_sum{$l}), + $l, + $line); + } + + # Print disassembly + for (my $x = $first_inst; $x <= $last_inst; $x++) { + my $e = $instructions[$x]; + printf("%6s %6s %8s: %6s\n", + UnparseAlt($flat_count[$x]), + UnparseAlt($cum_count[$x]), + UnparseAddress($offset, $e->[0]), + CleanDisassembly($e->[3])); + } + } +} + +# Print DOT graph +sub PrintDot { + my $prog = shift; + my $symbols = shift; + my $raw = shift; + my $flat = shift; + my $cumulative = shift; + my $overall_total = shift; + + # Get total + my $local_total = TotalProfile($flat); + my $nodelimit = int($main::opt_nodefraction * $local_total); + my $edgelimit = int($main::opt_edgefraction * $local_total); + my $nodecount = $main::opt_nodecount; + + # Find nodes to include + my @list = (sort { abs(GetEntry($cumulative, $b)) <=> + abs(GetEntry($cumulative, $a)) + || $a cmp $b } + keys(%{$cumulative})); + my $last = $nodecount - 1; + if ($last > $#list) { + $last = $#list; + } + while (($last >= 0) && + (abs(GetEntry($cumulative, $list[$last])) <= $nodelimit)) { + $last--; + } + if ($last < 0) { + print STDERR "No nodes to print\n"; + return 0; + } + + if ($nodelimit > 0 || $edgelimit > 0) { + printf STDERR ("Dropping nodes with <= %s %s; edges with <= %s abs(%s)\n", + Unparse($nodelimit), Units(), + Unparse($edgelimit), Units()); + } + + # Open DOT output file + my $output; + my $escaped_dot = ShellEscape(@DOT); + my $escaped_ps2pdf = ShellEscape(@PS2PDF); + if ($main::opt_gv) { + my $escaped_outfile = ShellEscape(TempName($main::next_tmpfile, "ps")); + $output = "| $escaped_dot -Tps2 >$escaped_outfile"; + } elsif ($main::opt_evince) { + my $escaped_outfile = ShellEscape(TempName($main::next_tmpfile, "pdf")); + $output = "| $escaped_dot -Tps2 | $escaped_ps2pdf - $escaped_outfile"; + } elsif ($main::opt_ps) { + $output = "| $escaped_dot -Tps2"; + } elsif ($main::opt_pdf) { + $output = "| $escaped_dot -Tps2 | $escaped_ps2pdf - -"; + } elsif ($main::opt_web || $main::opt_svg) { + # We need to post-process the SVG, so write to a temporary file always. + my $escaped_outfile = ShellEscape(TempName($main::next_tmpfile, "svg")); + $output = "| $escaped_dot -Tsvg >$escaped_outfile"; + } elsif ($main::opt_gif) { + $output = "| $escaped_dot -Tgif"; + } else { + $output = ">&STDOUT"; + } + open(DOT, $output) || error("$output: $!\n"); + + # Title + printf DOT ("digraph \"%s; %s %s\" {\n", + $prog, + Unparse($overall_total), + Units()); + if ($main::opt_pdf) { + # The output is more printable if we set the page size for dot. + printf DOT ("size=\"8,11\"\n"); + } + printf DOT ("node [width=0.375,height=0.25];\n"); + + # Print legend + printf DOT ("Legend [shape=box,fontsize=24,shape=plaintext," . + "label=\"%s\\l%s\\l%s\\l%s\\l%s\\l\"];\n", + $prog, + sprintf("Total %s: %s", Units(), Unparse($overall_total)), + sprintf("Focusing on: %s", Unparse($local_total)), + sprintf("Dropped nodes with <= %s abs(%s)", + Unparse($nodelimit), Units()), + sprintf("Dropped edges with <= %s %s", + Unparse($edgelimit), Units()) + ); + + # Print nodes + my %node = (); + my $nextnode = 1; + foreach my $a (@list[0..$last]) { + # Pick font size + my $f = GetEntry($flat, $a); + my $c = GetEntry($cumulative, $a); + + my $fs = 8; + if ($local_total > 0) { + $fs = 8 + (50.0 * sqrt(abs($f * 1.0 / $local_total))); + } + + $node{$a} = $nextnode++; + my $sym = $a; + $sym =~ s/\s+/\\n/g; + $sym =~ s/::/\\n/g; + + # Extra cumulative info to print for non-leaves + my $extra = ""; + if ($f != $c) { + $extra = sprintf("\\rof %s (%s)", + Unparse($c), + Percent($c, $local_total)); + } + my $style = ""; + if ($main::opt_heapcheck) { + if ($f > 0) { + # make leak-causing nodes more visible (add a background) + $style = ",style=filled,fillcolor=gray" + } elsif ($f < 0) { + # make anti-leak-causing nodes (which almost never occur) + # stand out as well (triple border) + $style = ",peripheries=3" + } + } + + printf DOT ("N%d [label=\"%s\\n%s (%s)%s\\r" . + "\",shape=box,fontsize=%.1f%s];\n", + $node{$a}, + $sym, + Unparse($f), + Percent($f, $local_total), + $extra, + $fs, + $style, + ); + } + + # Get edges and counts per edge + my %edge = (); + my $n; + my $fullname_to_shortname_map = {}; + FillFullnameToShortnameMap($symbols, $fullname_to_shortname_map); + foreach my $k (keys(%{$raw})) { + # TODO: omit low %age edges + $n = $raw->{$k}; + my @translated = TranslateStack($symbols, $fullname_to_shortname_map, $k); + for (my $i = 1; $i <= $#translated; $i++) { + my $src = $translated[$i]; + my $dst = $translated[$i-1]; + #next if ($src eq $dst); # Avoid self-edges? + if (exists($node{$src}) && exists($node{$dst})) { + my $edge_label = "$src\001$dst"; + if (!exists($edge{$edge_label})) { + $edge{$edge_label} = 0; + } + $edge{$edge_label} += $n; + } + } + } + + # Print edges (process in order of decreasing counts) + my %indegree = (); # Number of incoming edges added per node so far + my %outdegree = (); # Number of outgoing edges added per node so far + foreach my $e (sort { $edge{$b} <=> $edge{$a} } keys(%edge)) { + my @x = split(/\001/, $e); + $n = $edge{$e}; + + # Initialize degree of kept incoming and outgoing edges if necessary + my $src = $x[0]; + my $dst = $x[1]; + if (!exists($outdegree{$src})) { $outdegree{$src} = 0; } + if (!exists($indegree{$dst})) { $indegree{$dst} = 0; } + + my $keep; + if ($indegree{$dst} == 0) { + # Keep edge if needed for reachability + $keep = 1; + } elsif (abs($n) <= $edgelimit) { + # Drop if we are below --edgefraction + $keep = 0; + } elsif ($outdegree{$src} >= $main::opt_maxdegree || + $indegree{$dst} >= $main::opt_maxdegree) { + # Keep limited number of in/out edges per node + $keep = 0; + } else { + $keep = 1; + } + + if ($keep) { + $outdegree{$src}++; + $indegree{$dst}++; + + # Compute line width based on edge count + my $fraction = abs($local_total ? (3 * ($n / $local_total)) : 0); + if ($fraction > 1) { $fraction = 1; } + my $w = $fraction * 2; + if ($w < 1 && ($main::opt_web || $main::opt_svg)) { + # SVG output treats line widths < 1 poorly. + $w = 1; + } + + # Dot sometimes segfaults if given edge weights that are too large, so + # we cap the weights at a large value + my $edgeweight = abs($n) ** 0.7; + if ($edgeweight > 100000) { $edgeweight = 100000; } + $edgeweight = int($edgeweight); + + my $style = sprintf("setlinewidth(%f)", $w); + if ($x[1] =~ m/\(inline\)/) { + $style .= ",dashed"; + } + + # Use a slightly squashed function of the edge count as the weight + printf DOT ("N%s -> N%s [label=%s, weight=%d, style=\"%s\"];\n", + $node{$x[0]}, + $node{$x[1]}, + Unparse($n), + $edgeweight, + $style); + } + } + + print DOT ("}\n"); + close(DOT); + + if ($main::opt_web || $main::opt_svg) { + # Rewrite SVG to be more usable inside web browser. + RewriteSvg(TempName($main::next_tmpfile, "svg")); + } + + return 1; +} + +sub RewriteSvg { + my $svgfile = shift; + + open(SVG, $svgfile) || die "open temp svg: $!"; + my @svg = <SVG>; + close(SVG); + unlink $svgfile; + my $svg = join('', @svg); + + # Dot's SVG output is + # + # <svg width="___" height="___" + # viewBox="___" xmlns=...> + # <g id="graph0" transform="..."> + # ... + # </g> + # </svg> + # + # Change it to + # + # <svg width="100%" height="100%" + # xmlns=...> + # $svg_javascript + # <g id="viewport" transform="translate(0,0)"> + # <g id="graph0" transform="..."> + # ... + # </g> + # </g> + # </svg> + + # Fix width, height; drop viewBox. + $svg =~ s/(?s)<svg width="[^"]+" height="[^"]+"(.*?)viewBox="[^"]+"/<svg width="100%" height="100%"$1/; + + # Insert script, viewport <g> above first <g> + my $svg_javascript = SvgJavascript(); + my $viewport = "<g id=\"viewport\" transform=\"translate(0,0)\">\n"; + $svg =~ s/<g id="graph\d"/$svg_javascript$viewport$&/; + + # Insert final </g> above </svg>. + $svg =~ s/(.*)(<\/svg>)/$1<\/g>$2/; + $svg =~ s/<g id="graph\d"(.*?)/<g id="viewport"$1/; + + if ($main::opt_svg) { + # --svg: write to standard output. + print $svg; + } else { + # Write back to temporary file. + open(SVG, ">$svgfile") || die "open $svgfile: $!"; + print SVG $svg; + close(SVG); + } +} + +sub SvgJavascript { + return <<'EOF'; +<script type="text/ecmascript"><![CDATA[ +// SVGPan +// http://www.cyberz.org/blog/2009/12/08/svgpan-a-javascript-svg-panzoomdrag-library/ +// Local modification: if(true || ...) below to force panning, never moving. + +/** + * SVGPan library 1.2 + * ==================== + * + * Given an unique existing element with id "viewport", including the + * the library into any SVG adds the following capabilities: + * + * - Mouse panning + * - Mouse zooming (using the wheel) + * - Object dargging + * + * Known issues: + * + * - Zooming (while panning) on Safari has still some issues + * + * Releases: + * + * 1.2, Sat Mar 20 08:42:50 GMT 2010, Zeng Xiaohui + * Fixed a bug with browser mouse handler interaction + * + * 1.1, Wed Feb 3 17:39:33 GMT 2010, Zeng Xiaohui + * Updated the zoom code to support the mouse wheel on Safari/Chrome + * + * 1.0, Andrea Leofreddi + * First release + * + * This code is licensed under the following BSD license: + * + * Copyright 2009-2010 Andrea Leofreddi <a.leofreddi@itcharm.com>. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY Andrea Leofreddi ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL Andrea Leofreddi OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * The views and conclusions contained in the software and documentation are those of the + * authors and should not be interpreted as representing official policies, either expressed + * or implied, of Andrea Leofreddi. + */ + +var root = document.documentElement; + +var state = 'none', stateTarget, stateOrigin, stateTf; + +setupHandlers(root); + +/** + * Register handlers + */ +function setupHandlers(root){ + setAttributes(root, { + "onmouseup" : "add(evt)", + "onmousedown" : "handleMouseDown(evt)", + "onmousemove" : "handleMouseMove(evt)", + "onmouseup" : "handleMouseUp(evt)", + //"onmouseout" : "handleMouseUp(evt)", // Decomment this to stop the pan functionality when dragging out of the SVG element + }); + + if(navigator.userAgent.toLowerCase().indexOf('webkit') >= 0) + window.addEventListener('mousewheel', handleMouseWheel, false); // Chrome/Safari + else + window.addEventListener('DOMMouseScroll', handleMouseWheel, false); // Others + + var g = svgDoc.getElementById("svg"); + g.width = "100%"; + g.height = "100%"; +} + +/** + * Instance an SVGPoint object with given event coordinates. + */ +function getEventPoint(evt) { + var p = root.createSVGPoint(); + + p.x = evt.clientX; + p.y = evt.clientY; + + return p; +} + +/** + * Sets the current transform matrix of an element. + */ +function setCTM(element, matrix) { + var s = "matrix(" + matrix.a + "," + matrix.b + "," + matrix.c + "," + matrix.d + "," + matrix.e + "," + matrix.f + ")"; + + element.setAttribute("transform", s); +} + +/** + * Dumps a matrix to a string (useful for debug). + */ +function dumpMatrix(matrix) { + var s = "[ " + matrix.a + ", " + matrix.c + ", " + matrix.e + "\n " + matrix.b + ", " + matrix.d + ", " + matrix.f + "\n 0, 0, 1 ]"; + + return s; +} + +/** + * Sets attributes of an element. + */ +function setAttributes(element, attributes){ + for (i in attributes) + element.setAttributeNS(null, i, attributes[i]); +} + +/** + * Handle mouse move event. + */ +function handleMouseWheel(evt) { + if(evt.preventDefault) + evt.preventDefault(); + + evt.returnValue = false; + + var svgDoc = evt.target.ownerDocument; + + var delta; + + if(evt.wheelDelta) + delta = evt.wheelDelta / 3600; // Chrome/Safari + else + delta = evt.detail / -90; // Mozilla + + var z = 1 + delta; // Zoom factor: 0.9/1.1 + + var g = svgDoc.getElementById("viewport"); + + var p = getEventPoint(evt); + + p = p.matrixTransform(g.getCTM().inverse()); + + // Compute new scale matrix in current mouse position + var k = root.createSVGMatrix().translate(p.x, p.y).scale(z).translate(-p.x, -p.y); + + setCTM(g, g.getCTM().multiply(k)); + + stateTf = stateTf.multiply(k.inverse()); +} + +/** + * Handle mouse move event. + */ +function handleMouseMove(evt) { + if(evt.preventDefault) + evt.preventDefault(); + + evt.returnValue = false; + + var svgDoc = evt.target.ownerDocument; + + var g = svgDoc.getElementById("viewport"); + + if(state == 'pan') { + // Pan mode + var p = getEventPoint(evt).matrixTransform(stateTf); + + setCTM(g, stateTf.inverse().translate(p.x - stateOrigin.x, p.y - stateOrigin.y)); + } else if(state == 'move') { + // Move mode + var p = getEventPoint(evt).matrixTransform(g.getCTM().inverse()); + + setCTM(stateTarget, root.createSVGMatrix().translate(p.x - stateOrigin.x, p.y - stateOrigin.y).multiply(g.getCTM().inverse()).multiply(stateTarget.getCTM())); + + stateOrigin = p; + } +} + +/** + * Handle click event. + */ +function handleMouseDown(evt) { + if(evt.preventDefault) + evt.preventDefault(); + + evt.returnValue = false; + + var svgDoc = evt.target.ownerDocument; + + var g = svgDoc.getElementById("viewport"); + + if(true || evt.target.tagName == "svg") { + // Pan mode + state = 'pan'; + + stateTf = g.getCTM().inverse(); + + stateOrigin = getEventPoint(evt).matrixTransform(stateTf); + } else { + // Move mode + state = 'move'; + + stateTarget = evt.target; + + stateTf = g.getCTM().inverse(); + + stateOrigin = getEventPoint(evt).matrixTransform(stateTf); + } +} + +/** + * Handle mouse button release event. + */ +function handleMouseUp(evt) { + if(evt.preventDefault) + evt.preventDefault(); + + evt.returnValue = false; + + var svgDoc = evt.target.ownerDocument; + + if(state == 'pan' || state == 'move') { + // Quit pan mode + state = ''; + } +} + +]]></script> +EOF +} + +# Provides a map from fullname to shortname for cases where the +# shortname is ambiguous. The symlist has both the fullname and +# shortname for all symbols, which is usually fine, but sometimes -- +# such as overloaded functions -- two different fullnames can map to +# the same shortname. In that case, we use the address of the +# function to disambiguate the two. This function fills in a map that +# maps fullnames to modified shortnames in such cases. If a fullname +# is not present in the map, the 'normal' shortname provided by the +# symlist is the appropriate one to use. +sub FillFullnameToShortnameMap { + my $symbols = shift; + my $fullname_to_shortname_map = shift; + my $shortnames_seen_once = {}; + my $shortnames_seen_more_than_once = {}; + + foreach my $symlist (values(%{$symbols})) { + # TODO(csilvers): deal with inlined symbols too. + my $shortname = $symlist->[0]; + my $fullname = $symlist->[2]; + if ($fullname !~ /<[0-9a-fA-F]+>$/) { # fullname doesn't end in an address + next; # the only collisions we care about are when addresses differ + } + if (defined($shortnames_seen_once->{$shortname}) && + $shortnames_seen_once->{$shortname} ne $fullname) { + $shortnames_seen_more_than_once->{$shortname} = 1; + } else { + $shortnames_seen_once->{$shortname} = $fullname; + } + } + + foreach my $symlist (values(%{$symbols})) { + my $shortname = $symlist->[0]; + my $fullname = $symlist->[2]; + # TODO(csilvers): take in a list of addresses we care about, and only + # store in the map if $symlist->[1] is in that list. Saves space. + next if defined($fullname_to_shortname_map->{$fullname}); + if (defined($shortnames_seen_more_than_once->{$shortname})) { + if ($fullname =~ /<0*([^>]*)>$/) { # fullname has address at end of it + $fullname_to_shortname_map->{$fullname} = "$shortname\@$1"; + } + } + } +} + +# Return a small number that identifies the argument. +# Multiple calls with the same argument will return the same number. +# Calls with different arguments will return different numbers. +sub ShortIdFor { + my $key = shift; + my $id = $main::uniqueid{$key}; + if (!defined($id)) { + $id = keys(%main::uniqueid) + 1; + $main::uniqueid{$key} = $id; + } + return $id; +} + +# Translate a stack of addresses into a stack of symbols +sub TranslateStack { + my $symbols = shift; + my $fullname_to_shortname_map = shift; + my $k = shift; + + my @addrs = split(/\n/, $k); + my @result = (); + for (my $i = 0; $i <= $#addrs; $i++) { + my $a = $addrs[$i]; + + # Skip large addresses since they sometimes show up as fake entries on RH9 + if (length($a) > 8 && $a gt "7fffffffffffffff") { + next; + } + + if ($main::opt_disasm || $main::opt_list) { + # We want just the address for the key + push(@result, $a); + next; + } + + my $symlist = $symbols->{$a}; + if (!defined($symlist)) { + $symlist = [$a, "", $a]; + } + + # We can have a sequence of symbols for a particular entry + # (more than one symbol in the case of inlining). Callers + # come before callees in symlist, so walk backwards since + # the translated stack should contain callees before callers. + for (my $j = $#{$symlist}; $j >= 2; $j -= 3) { + my $func = $symlist->[$j-2]; + my $fileline = $symlist->[$j-1]; + my $fullfunc = $symlist->[$j]; + if (defined($fullname_to_shortname_map->{$fullfunc})) { + $func = $fullname_to_shortname_map->{$fullfunc}; + } + if ($j > 2) { + $func = "$func (inline)"; + } + + # Do not merge nodes corresponding to Callback::Run since that + # causes confusing cycles in dot display. Instead, we synthesize + # a unique name for this frame per caller. + if ($func =~ m/Callback.*::Run$/) { + my $caller = ($i > 0) ? $addrs[$i-1] : 0; + $func = "Run#" . ShortIdFor($caller); + } + + if ($main::opt_addresses) { + push(@result, "$a $func $fileline"); + } elsif ($main::opt_lines) { + if ($func eq '??' && $fileline eq '??:0') { + push(@result, "$a"); + } elsif (!$main::opt_show_addresses) { + push(@result, "$func $fileline"); + } else { + push(@result, "$func $fileline ($a)"); + } + } elsif ($main::opt_functions) { + if ($func eq '??') { + push(@result, "$a"); + } elsif (!$main::opt_show_addresses) { + push(@result, $func); + } else { + push(@result, "$func ($a)"); + } + } elsif ($main::opt_files) { + if ($fileline eq '??:0' || $fileline eq '') { + push(@result, "$a"); + } else { + my $f = $fileline; + $f =~ s/:\d+$//; + push(@result, $f); + } + } else { + push(@result, $a); + last; # Do not print inlined info + } + } + } + + # print join(",", @addrs), " => ", join(",", @result), "\n"; + return @result; +} + +# Generate percent string for a number and a total +sub Percent { + my $num = shift; + my $tot = shift; + if ($tot != 0) { + return sprintf("%.1f%%", $num * 100.0 / $tot); + } else { + return ($num == 0) ? "nan" : (($num > 0) ? "+inf" : "-inf"); + } +} + +# Generate pretty-printed form of number +sub Unparse { + my $num = shift; + if ($main::profile_type eq 'heap' || $main::profile_type eq 'growth') { + if ($main::opt_inuse_objects || $main::opt_alloc_objects) { + return sprintf("%d", $num); + } else { + if ($main::opt_show_bytes) { + return sprintf("%d", $num); + } else { + return sprintf("%.1f", $num / 1048576.0); + } + } + } elsif ($main::profile_type eq 'contention' && !$main::opt_contentions) { + return sprintf("%.3f", $num / 1e9); # Convert nanoseconds to seconds + } else { + return sprintf("%d", $num); + } +} + +# Alternate pretty-printed form: 0 maps to "." +sub UnparseAlt { + my $num = shift; + if ($num == 0) { + return "."; + } else { + return Unparse($num); + } +} + +# Alternate pretty-printed form: 0 maps to "" +sub HtmlPrintNumber { + my $num = shift; + if ($num == 0) { + return ""; + } else { + return Unparse($num); + } +} + +# Return output units +sub Units { + if ($main::profile_type eq 'heap' || $main::profile_type eq 'growth') { + if ($main::opt_inuse_objects || $main::opt_alloc_objects) { + return "objects"; + } else { + if ($main::opt_show_bytes) { + return "B"; + } else { + return "MB"; + } + } + } elsif ($main::profile_type eq 'contention' && !$main::opt_contentions) { + return "seconds"; + } else { + return "samples"; + } +} + +##### Profile manipulation code ##### + +# Generate flattened profile: +# If count is charged to stack [a,b,c,d], in generated profile, +# it will be charged to [a] +sub FlatProfile { + my $profile = shift; + my $result = {}; + foreach my $k (keys(%{$profile})) { + my $count = $profile->{$k}; + my @addrs = split(/\n/, $k); + if ($#addrs >= 0) { + AddEntry($result, $addrs[0], $count); + } + } + return $result; +} + +# Generate cumulative profile: +# If count is charged to stack [a,b,c,d], in generated profile, +# it will be charged to [a], [b], [c], [d] +sub CumulativeProfile { + my $profile = shift; + my $result = {}; + foreach my $k (keys(%{$profile})) { + my $count = $profile->{$k}; + my @addrs = split(/\n/, $k); + foreach my $a (@addrs) { + AddEntry($result, $a, $count); + } + } + return $result; +} + +# If the second-youngest PC on the stack is always the same, returns +# that pc. Otherwise, returns undef. +sub IsSecondPcAlwaysTheSame { + my $profile = shift; + + my $second_pc = undef; + foreach my $k (keys(%{$profile})) { + my @addrs = split(/\n/, $k); + if ($#addrs < 1) { + return undef; + } + if (not defined $second_pc) { + $second_pc = $addrs[1]; + } else { + if ($second_pc ne $addrs[1]) { + return undef; + } + } + } + return $second_pc; +} + +sub ExtractSymbolLocationInlineStack { + my $symbols = shift; + my $address = shift; + my $stack = shift; + # 'addr2line' outputs "??:0" for unknown locations; we do the + # same to be consistent. + if (exists $symbols->{$address}) { + my @localinlinestack = @{$symbols->{$address}}; + for (my $i = $#localinlinestack; $i > 0; $i-=3) { + my $file = $localinlinestack[$i-1]; + my $fn = $localinlinestack[$i-2]; + if ($file eq "?" || $file eq ":0") { + $file = "??:0"; + } + my $suffix = "[inline]"; + if ($i == 2) { + $suffix = ""; + } + push (@$stack, $file.":".$fn.$suffix); + } + } + else { + push (@$stack, "??:0:unknown"); + } +} + +sub ExtractSymbolNameInlineStack { + my $symbols = shift; + my $address = shift; + + my @stack = (); + + if (exists $symbols->{$address}) { + my @localinlinestack = @{$symbols->{$address}}; + for (my $i = $#localinlinestack; $i > 0; $i-=3) { + my $file = $localinlinestack[$i-1]; + my $fn = $localinlinestack[$i-0]; + + if ($file eq "?" || $file eq ":0") { + $file = "??:0"; + } + if ($fn eq '??') { + # If we can't get the symbol name, at least use the file information. + $fn = $file; + } + my $suffix = "[inline]"; + if ($i == 2) { + $suffix = ""; + } + push (@stack, $fn.$suffix); + } + } + else { + # If we can't get a symbol name, at least fill in the address. + push (@stack, $address); + } + + return @stack; +} + +sub ExtractSymbolLocation { + my $symbols = shift; + my $address = shift; + # 'addr2line' outputs "??:0" for unknown locations; we do the + # same to be consistent. + my $location = "??:0:unknown"; + if (exists $symbols->{$address}) { + my $file = $symbols->{$address}->[1]; + if ($file eq "?" || $file eq ":0") { + $file = "??:0" + } + $location = $file . ":" . $symbols->{$address}->[0]; + } + return $location; +} + +# Extracts a graph of calls. +sub ExtractCalls { + my $symbols = shift; + my $profile = shift; + my $calls = {}; + while( my ($stack_trace, $count) = each %$profile ) { + my @address = split(/\n/, $stack_trace); + my @stack = (); + ExtractSymbolLocationInlineStack($symbols, $address[0], \@stack); + for (my $i = 1; $i <= $#address; $i++) { + ExtractSymbolLocationInlineStack($symbols, $address[$i], \@stack); + } + AddEntry($calls, $stack[0], $count); + for (my $i = 1; $i < $#address; $i++) { + AddEntry($calls, "$stack[$i] -> $stack[$i-1]", $count); + } + } + return $calls; +} + +sub PrintStacksForText { + my $symbols = shift; + my $profile = shift; + + while (my ($stack_trace, $count) = each %$profile) { + my @address = split(/\n/, $stack_trace); + for (my $i = 0; $i <= $#address; $i++) { + $address[$i] = sprintf("(%s) %s", $address[$i], ExtractSymbolLocation($symbols, $address[$i])); + } + printf("%-8d %s\n\n", $count, join("\n ", @address)); + } +} + +sub PrintCollapsedStacks { + my $symbols = shift; + my $profile = shift; + + while (my ($stack_trace, $count) = each %$profile) { + my @address = split(/\n/, $stack_trace); + my @names = reverse ( map { ExtractSymbolNameInlineStack($symbols, $_) } @address ); + printf("%s %d\n", join(";", @names), $count); + } +} + +sub RemoveUninterestingFrames { + my $symbols = shift; + my $profile = shift; + + # List of function names to skip + my %skip = (); + my $skip_regexp = 'NOMATCH'; + if ($main::profile_type eq 'heap' || $main::profile_type eq 'growth') { + foreach my $name ('calloc', + 'cfree', + 'malloc', + 'free', + 'memalign', + 'posix_memalign', + 'pvalloc', + 'valloc', + 'realloc', + 'tc_calloc', + 'tc_cfree', + 'tc_malloc', + 'tc_free', + 'tc_memalign', + 'tc_posix_memalign', + 'tc_pvalloc', + 'tc_valloc', + 'tc_realloc', + 'tc_new', + 'tc_delete', + 'tc_newarray', + 'tc_deletearray', + 'tc_new_nothrow', + 'tc_newarray_nothrow', + 'do_malloc', + '::do_malloc', # new name -- got moved to an unnamed ns + '::do_malloc_or_cpp_alloc', + 'DoSampledAllocation', + 'simple_alloc::allocate', + '__malloc_alloc_template::allocate', + '__builtin_delete', + '__builtin_new', + '__builtin_vec_delete', + '__builtin_vec_new', + 'operator new', + 'operator new[]', + # The entry to our memory-allocation routines on OS X + 'malloc_zone_malloc', + 'malloc_zone_calloc', + 'malloc_zone_valloc', + 'malloc_zone_realloc', + 'malloc_zone_memalign', + 'malloc_zone_free', + # These mark the beginning/end of our custom sections + '__start_google_malloc', + '__stop_google_malloc', + '__start_malloc_hook', + '__stop_malloc_hook') { + $skip{$name} = 1; + $skip{"_" . $name} = 1; # Mach (OS X) adds a _ prefix to everything + } + # TODO: Remove TCMalloc once everything has been + # moved into the tcmalloc:: namespace and we have flushed + # old code out of the system. + $skip_regexp = "TCMalloc|^tcmalloc::"; + } elsif ($main::profile_type eq 'contention') { + foreach my $vname ('base::RecordLockProfileData', + 'base::SubmitMutexProfileData', + 'base::SubmitSpinLockProfileData', + 'Mutex::Unlock', + 'Mutex::UnlockSlow', + 'Mutex::ReaderUnlock', + 'MutexLock::~MutexLock', + 'SpinLock::Unlock', + 'SpinLock::SlowUnlock', + 'SpinLockHolder::~SpinLockHolder') { + $skip{$vname} = 1; + } + } elsif ($main::profile_type eq 'cpu' && !$main::opt_no_auto_signal_frames) { + # Drop signal handlers used for CPU profile collection + # TODO(dpeng): this should not be necessary; it's taken + # care of by the general 2nd-pc mechanism below. + foreach my $name ('ProfileData::Add', # historical + 'ProfileData::prof_handler', # historical + 'CpuProfiler::prof_handler', + '__FRAME_END__', + '__pthread_sighandler', + '__restore') { + $skip{$name} = 1; + } + } else { + # Nothing skipped for unknown types + } + + if ($main::profile_type eq 'cpu') { + # If all the second-youngest program counters are the same, + # this STRONGLY suggests that it is an artifact of measurement, + # i.e., stack frames pushed by the CPU profiler signal handler. + # Hence, we delete them. + # (The topmost PC is read from the signal structure, not from + # the stack, so it does not get involved.) + while (my $second_pc = IsSecondPcAlwaysTheSame($profile)) { + my $result = {}; + my $func = ''; + if (exists($symbols->{$second_pc})) { + $second_pc = $symbols->{$second_pc}->[0]; + } + if ($main::opt_no_auto_signal_frames) { + print STDERR "All second stack frames are same: `$second_pc'.\nMight be stack trace capturing bug.\n"; + last; + } + print STDERR "Removing $second_pc from all stack traces.\n"; + foreach my $k (keys(%{$profile})) { + my $count = $profile->{$k}; + my @addrs = split(/\n/, $k); + my $topaddr = POSIX::strtoul($addrs[0], 16); + splice @addrs, 1, 1; + if ($#addrs > 1) { + my $subtopaddr = POSIX::strtoul($addrs[1], 16); + if ($subtopaddr + 1 == $topaddr) { + splice @addrs, 1, 1; + } + } + my $reduced_path = join("\n", @addrs); + AddEntry($result, $reduced_path, $count); + } + $profile = $result; + } + } + + my $result = {}; + foreach my $k (keys(%{$profile})) { + my $count = $profile->{$k}; + my @addrs = split(/\n/, $k); + my @path = (); + foreach my $a (@addrs) { + if (exists($symbols->{$a})) { + my $func = $symbols->{$a}->[0]; + if ($skip{$func} || ($func =~ m/$skip_regexp/)) { + next; + } + } + push(@path, $a); + } + my $reduced_path = join("\n", @path); + AddEntry($result, $reduced_path, $count); + } + return $result; +} + +# Reduce profile to granularity given by user +sub ReduceProfile { + my $symbols = shift; + my $profile = shift; + my $result = {}; + my $fullname_to_shortname_map = {}; + FillFullnameToShortnameMap($symbols, $fullname_to_shortname_map); + foreach my $k (keys(%{$profile})) { + my $count = $profile->{$k}; + my @translated = TranslateStack($symbols, $fullname_to_shortname_map, $k); + my @path = (); + my %seen = (); + $seen{''} = 1; # So that empty keys are skipped + foreach my $e (@translated) { + # To avoid double-counting due to recursion, skip a stack-trace + # entry if it has already been seen + if (!$seen{$e}) { + $seen{$e} = 1; + push(@path, $e); + } + } + my $reduced_path = join("\n", @path); + AddEntry($result, $reduced_path, $count); + } + return $result; +} + +# Does the specified symbol array match the regexp? +sub SymbolMatches { + my $sym = shift; + my $re = shift; + if (defined($sym)) { + for (my $i = 0; $i < $#{$sym}; $i += 3) { + if ($sym->[$i] =~ m/$re/ || $sym->[$i+1] =~ m/$re/) { + return 1; + } + } + } + return 0; +} + +# Focus only on paths involving specified regexps +sub FocusProfile { + my $symbols = shift; + my $profile = shift; + my $focus = shift; + my $result = {}; + foreach my $k (keys(%{$profile})) { + my $count = $profile->{$k}; + my @addrs = split(/\n/, $k); + foreach my $a (@addrs) { + # Reply if it matches either the address/shortname/fileline + if (($a =~ m/$focus/) || SymbolMatches($symbols->{$a}, $focus)) { + AddEntry($result, $k, $count); + last; + } + } + } + return $result; +} + +# Focus only on paths not involving specified regexps +sub IgnoreProfile { + my $symbols = shift; + my $profile = shift; + my $ignore = shift; + my $result = {}; + foreach my $k (keys(%{$profile})) { + my $count = $profile->{$k}; + my @addrs = split(/\n/, $k); + my $matched = 0; + foreach my $a (@addrs) { + # Reply if it matches either the address/shortname/fileline + if (($a =~ m/$ignore/) || SymbolMatches($symbols->{$a}, $ignore)) { + $matched = 1; + last; + } + } + if (!$matched) { + AddEntry($result, $k, $count); + } + } + return $result; +} + +# Get total count in profile +sub TotalProfile { + my $profile = shift; + my $result = 0; + foreach my $k (keys(%{$profile})) { + $result += $profile->{$k}; + } + return $result; +} + +# Add A to B +sub AddProfile { + my $A = shift; + my $B = shift; + + my $R = {}; + # add all keys in A + foreach my $k (keys(%{$A})) { + my $v = $A->{$k}; + AddEntry($R, $k, $v); + } + # add all keys in B + foreach my $k (keys(%{$B})) { + my $v = $B->{$k}; + AddEntry($R, $k, $v); + } + return $R; +} + +# Merges symbol maps +sub MergeSymbols { + my $A = shift; + my $B = shift; + + my $R = {}; + foreach my $k (keys(%{$A})) { + $R->{$k} = $A->{$k}; + } + if (defined($B)) { + foreach my $k (keys(%{$B})) { + $R->{$k} = $B->{$k}; + } + } + return $R; +} + + +# Add A to B +sub AddPcs { + my $A = shift; + my $B = shift; + + my $R = {}; + # add all keys in A + foreach my $k (keys(%{$A})) { + $R->{$k} = 1 + } + # add all keys in B + foreach my $k (keys(%{$B})) { + $R->{$k} = 1 + } + return $R; +} + +# Subtract B from A +sub SubtractProfile { + my $A = shift; + my $B = shift; + + my $R = {}; + foreach my $k (keys(%{$A})) { + my $v = $A->{$k} - GetEntry($B, $k); + if ($v < 0 && $main::opt_drop_negative) { + $v = 0; + } + AddEntry($R, $k, $v); + } + if (!$main::opt_drop_negative) { + # Take care of when subtracted profile has more entries + foreach my $k (keys(%{$B})) { + if (!exists($A->{$k})) { + AddEntry($R, $k, 0 - $B->{$k}); + } + } + } + return $R; +} + +# Get entry from profile; zero if not present +sub GetEntry { + my $profile = shift; + my $k = shift; + if (exists($profile->{$k})) { + return $profile->{$k}; + } else { + return 0; + } +} + +# Add entry to specified profile +sub AddEntry { + my $profile = shift; + my $k = shift; + my $n = shift; + if (!exists($profile->{$k})) { + $profile->{$k} = 0; + } + $profile->{$k} += $n; +} + +# Add a stack of entries to specified profile, and add them to the $pcs +# list. +sub AddEntries { + my $profile = shift; + my $pcs = shift; + my $stack = shift; + my $count = shift; + my @k = (); + + foreach my $e (split(/\s+/, $stack)) { + my $pc = HexExtend($e); + $pcs->{$pc} = 1; + push @k, $pc; + } + AddEntry($profile, (join "\n", @k), $count); +} + +##### Code to profile a server dynamically ##### + +sub CheckSymbolPage { + my $url = SymbolPageURL(); + my $command = ShellEscape(@URL_FETCHER, $url); + open(SYMBOL, "$command |") or error($command); + my $line = <SYMBOL>; + $line =~ s/\r//g; # turn windows-looking lines into unix-looking lines + close(SYMBOL); + unless (defined($line)) { + error("$url doesn't exist\n"); + } + + if ($line =~ /^num_symbols:\s+(\d+)$/) { + if ($1 == 0) { + error("Stripped binary. No symbols available.\n"); + } + } else { + error("Failed to get the number of symbols from $url\n"); + } +} + +sub IsProfileURL { + my $profile_name = shift; + if (-f $profile_name) { + printf STDERR "Using local file $profile_name.\n"; + return 0; + } + return 1; +} + +sub ParseProfileURL { + my $profile_name = shift; + + if (!defined($profile_name) || $profile_name eq "") { + return (); + } + + # Split profile URL - matches all non-empty strings, so no test. + $profile_name =~ m,^(https?://)?([^/]+)(.*?)(/|$PROFILES)?$,; + + my $proto = $1 || "http://"; + my $hostport = $2; + my $prefix = $3; + my $profile = $4 || "/"; + + my $host = $hostport; + $host =~ s/:.*//; + + my $baseurl = "$proto$hostport$prefix"; + return ($host, $baseurl, $profile); +} + +# We fetch symbols from the first profile argument. +sub SymbolPageURL { + my ($host, $baseURL, $path) = ParseProfileURL($main::pfile_args[0]); + return "$baseURL$SYMBOL_PAGE"; +} + +sub FetchProgramName() { + my ($host, $baseURL, $path) = ParseProfileURL($main::pfile_args[0]); + my $url = "$baseURL$PROGRAM_NAME_PAGE"; + my $command_line = ShellEscape(@URL_FETCHER, $url); + open(CMDLINE, "$command_line |") or error($command_line); + my $cmdline = <CMDLINE>; + $cmdline =~ s/\r//g; # turn windows-looking lines into unix-looking lines + close(CMDLINE); + error("Failed to get program name from $url\n") unless defined($cmdline); + $cmdline =~ s/\x00.+//; # Remove argv[1] and latters. + $cmdline =~ s!\n!!g; # Remove LFs. + return $cmdline; +} + +# Gee, curl's -L (--location) option isn't reliable at least +# with its 7.12.3 version. Curl will forget to post data if +# there is a redirection. This function is a workaround for +# curl. Redirection happens on borg hosts. +sub ResolveRedirectionForCurl { + my $url = shift; + my $command_line = ShellEscape(@URL_FETCHER, "--head", $url); + open(CMDLINE, "$command_line |") or error($command_line); + while (<CMDLINE>) { + s/\r//g; # turn windows-looking lines into unix-looking lines + if (/^Location: (.*)/) { + $url = $1; + } + } + close(CMDLINE); + return $url; +} + +# Add a timeout flat to URL_FETCHER. Returns a new list. +sub AddFetchTimeout { + my $timeout = shift; + my @fetcher = @_; + if (defined($timeout)) { + if (join(" ", @fetcher) =~ m/\bcurl -s/) { + push(@fetcher, "--max-time", sprintf("%d", $timeout)); + } elsif (join(" ", @fetcher) =~ m/\brpcget\b/) { + push(@fetcher, sprintf("--deadline=%d", $timeout)); + } + } + return @fetcher; +} + +# Reads a symbol map from the file handle name given as $1, returning +# the resulting symbol map. Also processes variables relating to symbols. +# Currently, the only variable processed is 'binary=<value>' which updates +# $main::prog to have the correct program name. +sub ReadSymbols { + my $in = shift; + my $map = {}; + while (<$in>) { + s/\r//g; # turn windows-looking lines into unix-looking lines + # Removes all the leading zeroes from the symbols, see comment below. + if (m/^0x0*([0-9a-f]+)\s+(.+)/) { + $map->{$1} = $2; + } elsif (m/^---/) { + last; + } elsif (m/^([a-z][^=]*)=(.*)$/ ) { + my ($variable, $value) = ($1, $2); + for ($variable, $value) { + s/^\s+//; + s/\s+$//; + } + if ($variable eq "binary") { + if ($main::prog ne $UNKNOWN_BINARY && $main::prog ne $value) { + printf STDERR ("Warning: Mismatched binary name '%s', using '%s'.\n", + $main::prog, $value); + } + $main::prog = $value; + } else { + printf STDERR ("Ignoring unknown variable in symbols list: " . + "'%s' = '%s'\n", $variable, $value); + } + } + } + return $map; +} + +# Fetches and processes symbols to prepare them for use in the profile output +# code. If the optional 'symbol_map' arg is not given, fetches symbols from +# $SYMBOL_PAGE for all PC values found in profile. Otherwise, the raw symbols +# are assumed to have already been fetched into 'symbol_map' and are simply +# extracted and processed. +sub FetchSymbols { + my $pcset = shift; + my $symbol_map = shift; + + my %seen = (); + my @pcs = grep { !$seen{$_}++ } keys(%$pcset); # uniq + + if (!defined($symbol_map)) { + my $post_data = join("+", sort((map {"0x" . "$_"} @pcs))); + + open(POSTFILE, ">$main::tmpfile_sym"); + print POSTFILE $post_data; + close(POSTFILE); + + my $url = SymbolPageURL(); + + my $command_line; + if (join(" ", @URL_FETCHER) =~ m/\bcurl -s/) { + $url = ResolveRedirectionForCurl($url); + $command_line = ShellEscape(@URL_FETCHER, "-d", "\@$main::tmpfile_sym", + $url); + } else { + $command_line = (ShellEscape(@URL_FETCHER, "--post", $url) + . " < " . ShellEscape($main::tmpfile_sym)); + } + # We use c++filt in case $SYMBOL_PAGE gives us mangled symbols. + my $escaped_cppfilt = ShellEscape($obj_tool_map{"c++filt"}); + open(SYMBOL, "$command_line | $escaped_cppfilt |") or error($command_line); + $symbol_map = ReadSymbols(*SYMBOL{IO}); + close(SYMBOL); + } + + my $symbols = {}; + foreach my $pc (@pcs) { + my $fullname; + # For 64 bits binaries, symbols are extracted with 8 leading zeroes. + # Then /symbol reads the long symbols in as uint64, and outputs + # the result with a "0x%08llx" format which get rid of the zeroes. + # By removing all the leading zeroes in both $pc and the symbols from + # /symbol, the symbols match and are retrievable from the map. + my $shortpc = $pc; + $shortpc =~ s/^0*//; + # Each line may have a list of names, which includes the function + # and also other functions it has inlined. They are separated (in + # PrintSymbolizedProfile), by --, which is illegal in function names. + my $fullnames; + if (defined($symbol_map->{$shortpc})) { + $fullnames = $symbol_map->{$shortpc}; + } else { + $fullnames = "0x" . $pc; # Just use addresses + } + my $sym = []; + $symbols->{$pc} = $sym; + foreach my $fullname (split("--", $fullnames)) { + my $name = ShortFunctionName($fullname); + push(@{$sym}, $name, "?", $fullname); + } + } + return $symbols; +} + +sub BaseName { + my $file_name = shift; + $file_name =~ s!^.*/!!; # Remove directory name + return $file_name; +} + +sub MakeProfileBaseName { + my ($binary_name, $profile_name) = @_; + my ($host, $baseURL, $path) = ParseProfileURL($profile_name); + my $binary_shortname = BaseName($binary_name); + return sprintf("%s.%s.%s", + $binary_shortname, $main::op_time, $host); +} + +sub FetchDynamicProfile { + my $binary_name = shift; + my $profile_name = shift; + my $fetch_name_only = shift; + my $encourage_patience = shift; + + if (!IsProfileURL($profile_name)) { + return $profile_name; + } else { + my ($host, $baseURL, $path) = ParseProfileURL($profile_name); + if ($path eq "" || $path eq "/") { + # Missing type specifier defaults to cpu-profile + $path = $PROFILE_PAGE; + } + + my $profile_file = MakeProfileBaseName($binary_name, $profile_name); + + my $url = "$baseURL$path"; + my $fetch_timeout = undef; + if ($path =~ m/$PROFILE_PAGE|$PMUPROFILE_PAGE/) { + if ($path =~ m/[?]/) { + $url .= "&"; + } else { + $url .= "?"; + } + $url .= sprintf("seconds=%d", $main::opt_seconds); + $fetch_timeout = $main::opt_seconds * 1.01 + 60; + } else { + # For non-CPU profiles, we add a type-extension to + # the target profile file name. + my $suffix = $path; + $suffix =~ s,/,.,g; + $profile_file .= $suffix; + } + + my $profile_dir = $ENV{"PPROF_TMPDIR"} || ($ENV{HOME} . "/pprof"); + if (! -d $profile_dir) { + mkdir($profile_dir) + || die("Unable to create profile directory $profile_dir: $!\n"); + } + my $tmp_profile = "$profile_dir/.tmp.$profile_file"; + my $real_profile = "$profile_dir/$profile_file"; + + if ($fetch_name_only > 0) { + return $real_profile; + } + + my @fetcher = AddFetchTimeout($fetch_timeout, @URL_FETCHER); + my $cmd = ShellEscape(@fetcher, $url) . " > " . ShellEscape($tmp_profile); + if ($path =~ m/$PROFILE_PAGE|$PMUPROFILE_PAGE|$CENSUSPROFILE_PAGE/){ + print STDERR "Gathering CPU profile from $url for $main::opt_seconds seconds to\n ${real_profile}\n"; + if ($encourage_patience) { + print STDERR "Be patient...\n"; + } + } else { + print STDERR "Fetching $path profile from $url to\n ${real_profile}\n"; + } + + (system($cmd) == 0) || error("Failed to get profile: $cmd: $!\n"); + (system("mv", $tmp_profile, $real_profile) == 0) || error("Unable to rename profile\n"); + print STDERR "Wrote profile to $real_profile\n"; + $main::collected_profile = $real_profile; + return $main::collected_profile; + } +} + +# Collect profiles in parallel +sub FetchDynamicProfiles { + my $items = scalar(@main::pfile_args); + my $levels = log($items) / log(2); + + if ($items == 1) { + $main::profile_files[0] = FetchDynamicProfile($main::prog, $main::pfile_args[0], 0, 1); + } else { + # math rounding issues + if ((2 ** $levels) < $items) { + $levels++; + } + my $count = scalar(@main::pfile_args); + for (my $i = 0; $i < $count; $i++) { + $main::profile_files[$i] = FetchDynamicProfile($main::prog, $main::pfile_args[$i], 1, 0); + } + print STDERR "Fetching $count profiles, Be patient...\n"; + FetchDynamicProfilesRecurse($levels, 0, 0); + $main::collected_profile = join(" \\\n ", @main::profile_files); + } +} + +# Recursively fork a process to get enough processes +# collecting profiles +sub FetchDynamicProfilesRecurse { + my $maxlevel = shift; + my $level = shift; + my $position = shift; + + if (my $pid = fork()) { + $position = 0 | ($position << 1); + TryCollectProfile($maxlevel, $level, $position); + wait; + } else { + $position = 1 | ($position << 1); + TryCollectProfile($maxlevel, $level, $position); + cleanup(); + exit(0); + } +} + +# Collect a single profile +sub TryCollectProfile { + my $maxlevel = shift; + my $level = shift; + my $position = shift; + + if ($level >= ($maxlevel - 1)) { + if ($position < scalar(@main::pfile_args)) { + FetchDynamicProfile($main::prog, $main::pfile_args[$position], 0, 0); + } + } else { + FetchDynamicProfilesRecurse($maxlevel, $level+1, $position); + } +} + +##### Parsing code ##### + +# Provide a small streaming-read module to handle very large +# cpu-profile files. Stream in chunks along a sliding window. +# Provides an interface to get one 'slot', correctly handling +# endian-ness differences. A slot is one 32-bit or 64-bit word +# (depending on the input profile). We tell endianness and bit-size +# for the profile by looking at the first 8 bytes: in cpu profiles, +# the second slot is always 3 (we'll accept anything that's not 0). +BEGIN { + package CpuProfileStream; + + sub new { + my ($class, $file, $fname) = @_; + my $self = { file => $file, + base => 0, + stride => 512 * 1024, # must be a multiple of bitsize/8 + slots => [], + unpack_code => "", # N for big-endian, V for little + perl_is_64bit => 1, # matters if profile is 64-bit + }; + bless $self, $class; + # Let unittests adjust the stride + if ($main::opt_test_stride > 0) { + $self->{stride} = $main::opt_test_stride; + } + # Read the first two slots to figure out bitsize and endianness. + my $slots = $self->{slots}; + my $str; + read($self->{file}, $str, 8); + # Set the global $address_length based on what we see here. + # 8 is 32-bit (8 hexadecimal chars); 16 is 64-bit (16 hexadecimal chars). + $address_length = ($str eq (chr(0)x8)) ? 16 : 8; + if ($address_length == 8) { + if (substr($str, 6, 2) eq chr(0)x2) { + $self->{unpack_code} = 'V'; # Little-endian. + } elsif (substr($str, 4, 2) eq chr(0)x2) { + $self->{unpack_code} = 'N'; # Big-endian + } else { + ::error("$fname: header size >= 2**16\n"); + } + @$slots = unpack($self->{unpack_code} . "*", $str); + } else { + # If we're a 64-bit profile, check if we're a 64-bit-capable + # perl. Otherwise, each slot will be represented as a float + # instead of an int64, losing precision and making all the + # 64-bit addresses wrong. We won't complain yet, but will + # later if we ever see a value that doesn't fit in 32 bits. + my $has_q = 0; + eval { $has_q = pack("Q", "1") ? 1 : 1; }; + if (!$has_q) { + $self->{perl_is_64bit} = 0; + } + read($self->{file}, $str, 8); + if (substr($str, 4, 4) eq chr(0)x4) { + # We'd love to use 'Q', but it's a) not universal, b) not endian-proof. + $self->{unpack_code} = 'V'; # Little-endian. + } elsif (substr($str, 0, 4) eq chr(0)x4) { + $self->{unpack_code} = 'N'; # Big-endian + } else { + ::error("$fname: header size >= 2**32\n"); + } + my @pair = unpack($self->{unpack_code} . "*", $str); + # Since we know one of the pair is 0, it's fine to just add them. + @$slots = (0, $pair[0] + $pair[1]); + } + return $self; + } + + # Load more data when we access slots->get(X) which is not yet in memory. + sub overflow { + my ($self) = @_; + my $slots = $self->{slots}; + $self->{base} += $#$slots + 1; # skip over data we're replacing + my $str; + read($self->{file}, $str, $self->{stride}); + if ($address_length == 8) { # the 32-bit case + # This is the easy case: unpack provides 32-bit unpacking primitives. + @$slots = unpack($self->{unpack_code} . "*", $str); + } else { + # We need to unpack 32 bits at a time and combine. + my @b32_values = unpack($self->{unpack_code} . "*", $str); + my @b64_values = (); + for (my $i = 0; $i < $#b32_values; $i += 2) { + # TODO(csilvers): if this is a 32-bit perl, the math below + # could end up in a too-large int, which perl will promote + # to a double, losing necessary precision. Deal with that. + # Right now, we just die. + my ($lo, $hi) = ($b32_values[$i], $b32_values[$i+1]); + if ($self->{unpack_code} eq 'N') { # big-endian + ($lo, $hi) = ($hi, $lo); + } + my $value = $lo + $hi * (2**32); + if (!$self->{perl_is_64bit} && # check value is exactly represented + (($value % (2**32)) != $lo || int($value / (2**32)) != $hi)) { + ::error("Need a 64-bit perl to process this 64-bit profile.\n"); + } + push(@b64_values, $value); + } + @$slots = @b64_values; + } + } + + # Access the i-th long in the file (logically), or -1 at EOF. + sub get { + my ($self, $idx) = @_; + my $slots = $self->{slots}; + while ($#$slots >= 0) { + if ($idx < $self->{base}) { + # The only time we expect a reference to $slots[$i - something] + # after referencing $slots[$i] is reading the very first header. + # Since $stride > |header|, that shouldn't cause any lookback + # errors. And everything after the header is sequential. + print STDERR "Unexpected look-back reading CPU profile"; + return -1; # shrug, don't know what better to return + } elsif ($idx > $self->{base} + $#$slots) { + $self->overflow(); + } else { + return $slots->[$idx - $self->{base}]; + } + } + # If we get here, $slots is [], which means we've reached EOF + return -1; # unique since slots is supposed to hold unsigned numbers + } +} + +# Reads the top, 'header' section of a profile, and returns the last +# line of the header, commonly called a 'header line'. The header +# section of a profile consists of zero or more 'command' lines that +# are instructions to pprof, which pprof executes when reading the +# header. All 'command' lines start with a %. After the command +# lines is the 'header line', which is a profile-specific line that +# indicates what type of profile it is, and perhaps other global +# information about the profile. For instance, here's a header line +# for a heap profile: +# heap profile: 53: 38236 [ 5525: 1284029] @ heapprofile +# For historical reasons, the CPU profile does not contain a text- +# readable header line. If the profile looks like a CPU profile, +# this function returns "". If no header line could be found, this +# function returns undef. +# +# The following commands are recognized: +# %warn -- emit the rest of this line to stderr, prefixed by 'WARNING:' +# +# The input file should be in binmode. +sub ReadProfileHeader { + local *PROFILE = shift; + my $firstchar = ""; + my $line = ""; + read(PROFILE, $firstchar, 1); + seek(PROFILE, -1, 1); # unread the firstchar + if ($firstchar !~ /[[:print:]]/) { # is not a text character + return ""; + } + while (defined($line = <PROFILE>)) { + $line =~ s/\r//g; # turn windows-looking lines into unix-looking lines + if ($line =~ /^%warn\s+(.*)/) { # 'warn' command + # Note this matches both '%warn blah\n' and '%warn\n'. + print STDERR "WARNING: $1\n"; # print the rest of the line + } elsif ($line =~ /^%/) { + print STDERR "Ignoring unknown command from profile header: $line"; + } else { + # End of commands, must be the header line. + return $line; + } + } + return undef; # got to EOF without seeing a header line +} + +sub IsSymbolizedProfileFile { + my $file_name = shift; + if (!(-e $file_name) || !(-r $file_name)) { + return 0; + } + # Check if the file contains a symbol-section marker. + open(TFILE, "<$file_name"); + binmode TFILE; + my $firstline = ReadProfileHeader(*TFILE); + close(TFILE); + if (!$firstline) { + return 0; + } + $SYMBOL_PAGE =~ m,[^/]+$,; # matches everything after the last slash + my $symbol_marker = $&; + return $firstline =~ /^--- *$symbol_marker/; +} + +# Parse profile generated by common/profiler.cc and return a reference +# to a map: +# $result->{version} Version number of profile file +# $result->{period} Sampling period (in microseconds) +# $result->{profile} Profile object +# $result->{map} Memory map info from profile +# $result->{pcs} Hash of all PC values seen, key is hex address +sub ReadProfile { + my $prog = shift; + my $fname = shift; + my $result; # return value + + $CONTENTION_PAGE =~ m,[^/]+$,; # matches everything after the last slash + my $contention_marker = $&; + $GROWTH_PAGE =~ m,[^/]+$,; # matches everything after the last slash + my $growth_marker = $&; + $SYMBOL_PAGE =~ m,[^/]+$,; # matches everything after the last slash + my $symbol_marker = $&; + $PROFILE_PAGE =~ m,[^/]+$,; # matches everything after the last slash + my $profile_marker = $&; + + # Look at first line to see if it is a heap or a CPU profile. + # CPU profile may start with no header at all, and just binary data + # (starting with \0\0\0\0) -- in that case, don't try to read the + # whole firstline, since it may be gigabytes(!) of data. + open(PROFILE, "<$fname") || error("$fname: $!\n"); + binmode PROFILE; # New perls do UTF-8 processing + my $header = ReadProfileHeader(*PROFILE); + if (!defined($header)) { # means "at EOF" + error("Profile is empty.\n"); + } + + my $symbols; + if ($header =~ m/^--- *$symbol_marker/o) { + # Verify that the user asked for a symbolized profile + if (!$main::use_symbolized_profile) { + # we have both a binary and symbolized profiles, abort + error("FATAL ERROR: Symbolized profile\n $fname\ncannot be used with " . + "a binary arg. Try again without passing\n $prog\n"); + } + # Read the symbol section of the symbolized profile file. + $symbols = ReadSymbols(*PROFILE{IO}); + # Read the next line to get the header for the remaining profile. + $header = ReadProfileHeader(*PROFILE) || ""; + } + + $main::profile_type = ''; + if ($header =~ m/^heap profile:.*$growth_marker/o) { + $main::profile_type = 'growth'; + $result = ReadHeapProfile($prog, *PROFILE, $header); + } elsif ($header =~ m/^heap profile:/) { + $main::profile_type = 'heap'; + $result = ReadHeapProfile($prog, *PROFILE, $header); + } elsif ($header =~ m/^--- *$contention_marker/o) { + $main::profile_type = 'contention'; + $result = ReadSynchProfile($prog, *PROFILE); + } elsif ($header =~ m/^--- *Stacks:/) { + print STDERR + "Old format contention profile: mistakenly reports " . + "condition variable signals as lock contentions.\n"; + $main::profile_type = 'contention'; + $result = ReadSynchProfile($prog, *PROFILE); + } elsif ($header =~ m/^--- *$profile_marker/) { + # the binary cpu profile data starts immediately after this line + $main::profile_type = 'cpu'; + $result = ReadCPUProfile($prog, $fname, *PROFILE); + } else { + if (defined($symbols)) { + # a symbolized profile contains a format we don't recognize, bail out + error("$fname: Cannot recognize profile section after symbols.\n"); + } + # no ascii header present -- must be a CPU profile + $main::profile_type = 'cpu'; + $result = ReadCPUProfile($prog, $fname, *PROFILE); + } + + close(PROFILE); + + # if we got symbols along with the profile, return those as well + if (defined($symbols)) { + $result->{symbols} = $symbols; + } + + return $result; +} + +# Subtract one from caller pc so we map back to call instr. +# However, don't do this if we're reading a symbolized profile +# file, in which case the subtract-one was done when the file +# was written. +# +# We apply the same logic to all readers, though ReadCPUProfile uses an +# independent implementation. +sub FixCallerAddresses { + my $stack = shift; + if ($main::use_symbolized_profile) { + return $stack; + } else { + $stack =~ /(\s)/; + my $delimiter = $1; + my @addrs = split(' ', $stack); + my @fixedaddrs; + $#fixedaddrs = $#addrs; + if ($#addrs >= 0) { + $fixedaddrs[0] = $addrs[0]; + } + for (my $i = 1; $i <= $#addrs; $i++) { + $fixedaddrs[$i] = AddressSub($addrs[$i], "0x1"); + } + return join $delimiter, @fixedaddrs; + } +} + +# CPU profile reader +sub ReadCPUProfile { + my $prog = shift; + my $fname = shift; # just used for logging + local *PROFILE = shift; + my $version; + my $period; + my $i; + my $profile = {}; + my $pcs = {}; + + # Parse string into array of slots. + my $slots = CpuProfileStream->new(*PROFILE, $fname); + + # Read header. The current header version is a 5-element structure + # containing: + # 0: header count (always 0) + # 1: header "words" (after this one: 3) + # 2: format version (0) + # 3: sampling period (usec) + # 4: unused padding (always 0) + if ($slots->get(0) != 0 ) { + error("$fname: not a profile file, or old format profile file\n"); + } + $i = 2 + $slots->get(1); + $version = $slots->get(2); + $period = $slots->get(3); + # Do some sanity checking on these header values. + if ($version > (2**32) || $period > (2**32) || $i > (2**32) || $i < 5) { + error("$fname: not a profile file, or corrupted profile file\n"); + } + + # Parse profile + while ($slots->get($i) != -1) { + my $n = $slots->get($i++); + my $d = $slots->get($i++); + if ($d > (2**16)) { # TODO(csilvers): what's a reasonable max-stack-depth? + my $addr = sprintf("0%o", $i * ($address_length == 8 ? 4 : 8)); + print STDERR "At index $i (address $addr):\n"; + error("$fname: stack trace depth >= 2**32\n"); + } + if ($slots->get($i) == 0) { + # End of profile data marker + $i += $d; + last; + } + + # Make key out of the stack entries + my @k = (); + for (my $j = 0; $j < $d; $j++) { + my $pc = $slots->get($i+$j); + # Subtract one from caller pc so we map back to call instr. + # However, don't do this if we're reading a symbolized profile + # file, in which case the subtract-one was done when the file + # was written. + if ($j > 0 && !$main::use_symbolized_profile) { + $pc--; + } + $pc = sprintf("%0*x", $address_length, $pc); + $pcs->{$pc} = 1; + push @k, $pc; + } + + AddEntry($profile, (join "\n", @k), $n); + $i += $d; + } + + # Parse map + my $map = ''; + seek(PROFILE, $i * ($address_length / 2), 0); + read(PROFILE, $map, (stat PROFILE)[7]); + + my $r = {}; + $r->{version} = $version; + $r->{period} = $period; + $r->{profile} = $profile; + $r->{libs} = ParseLibraries($prog, $map, $pcs); + $r->{pcs} = $pcs; + + return $r; +} + +sub ReadHeapProfile { + my $prog = shift; + local *PROFILE = shift; + my $header = shift; + + my $index = 1; + if ($main::opt_inuse_space) { + $index = 1; + } elsif ($main::opt_inuse_objects) { + $index = 0; + } elsif ($main::opt_alloc_space) { + $index = 3; + } elsif ($main::opt_alloc_objects) { + $index = 2; + } + + # Find the type of this profile. The header line looks like: + # heap profile: 1246: 8800744 [ 1246: 8800744] @ <heap-url>/266053 + # There are two pairs <count: size>, the first inuse objects/space, and the + # second allocated objects/space. This is followed optionally by a profile + # type, and if that is present, optionally by a sampling frequency. + # For remote heap profiles (v1): + # The interpretation of the sampling frequency is that the profiler, for + # each sample, calculates a uniformly distributed random integer less than + # the given value, and records the next sample after that many bytes have + # been allocated. Therefore, the expected sample interval is half of the + # given frequency. By default, if not specified, the expected sample + # interval is 128KB. Only remote-heap-page profiles are adjusted for + # sample size. + # For remote heap profiles (v2): + # The sampling frequency is the rate of a Poisson process. This means that + # the probability of sampling an allocation of size X with sampling rate Y + # is 1 - exp(-X/Y) + # For version 2, a typical header line might look like this: + # heap profile: 1922: 127792360 [ 1922: 127792360] @ <heap-url>_v2/524288 + # the trailing number (524288) is the sampling rate. (Version 1 showed + # double the 'rate' here) + my $sampling_algorithm = 0; + my $sample_adjustment = 0; + chomp($header); + my $type = "unknown"; + if ($header =~ m"^heap profile:\s*(\d+):\s+(\d+)\s+\[\s*(\d+):\s+(\d+)\](\s*@\s*([^/]*)(/(\d+))?)?") { + if (defined($6) && ($6 ne '')) { + $type = $6; + my $sample_period = $8; + # $type is "heapprofile" for profiles generated by the + # heap-profiler, and either "heap" or "heap_v2" for profiles + # generated by sampling directly within tcmalloc. It can also + # be "growth" for heap-growth profiles. The first is typically + # found for profiles generated locally, and the others for + # remote profiles. + if (($type eq "heapprofile") || ($type !~ /heap/) ) { + # No need to adjust for the sampling rate with heap-profiler-derived data + $sampling_algorithm = 0; + } elsif ($type =~ /_v2/) { + $sampling_algorithm = 2; # version 2 sampling + if (defined($sample_period) && ($sample_period ne '')) { + $sample_adjustment = int($sample_period); + } + } else { + $sampling_algorithm = 1; # version 1 sampling + if (defined($sample_period) && ($sample_period ne '')) { + $sample_adjustment = int($sample_period)/2; + } + } + } else { + # We detect whether or not this is a remote-heap profile by checking + # that the total-allocated stats ($n2,$s2) are exactly the + # same as the in-use stats ($n1,$s1). It is remotely conceivable + # that a non-remote-heap profile may pass this check, but it is hard + # to imagine how that could happen. + # In this case it's so old it's guaranteed to be remote-heap version 1. + my ($n1, $s1, $n2, $s2) = ($1, $2, $3, $4); + if (($n1 == $n2) && ($s1 == $s2)) { + # This is likely to be a remote-heap based sample profile + $sampling_algorithm = 1; + } + } + } + + if ($sampling_algorithm > 0) { + # For remote-heap generated profiles, adjust the counts and sizes to + # account for the sample rate (we sample once every 128KB by default). + if ($sample_adjustment == 0) { + # Turn on profile adjustment. + $sample_adjustment = 128*1024; + print STDERR "Adjusting heap profiles for 1-in-128KB sampling rate\n"; + } else { + printf STDERR ("Adjusting heap profiles for 1-in-%d sampling rate\n", + $sample_adjustment); + } + if ($sampling_algorithm > 1) { + # We don't bother printing anything for the original version (version 1) + printf STDERR "Heap version $sampling_algorithm\n"; + } + } + + my $profile = {}; + my $pcs = {}; + my $map = ""; + + while (<PROFILE>) { + s/\r//g; # turn windows-looking lines into unix-looking lines + if (/^MAPPED_LIBRARIES:/) { + # Read the /proc/self/maps data + while (<PROFILE>) { + s/\r//g; # turn windows-looking lines into unix-looking lines + $map .= $_; + } + last; + } + + if (/^--- Memory map:/) { + # Read /proc/self/maps data as formatted by DumpAddressMap() + my $buildvar = ""; + while (<PROFILE>) { + s/\r//g; # turn windows-looking lines into unix-looking lines + # Parse "build=<dir>" specification if supplied + if (m/^\s*build=(.*)\n/) { + $buildvar = $1; + } + + # Expand "$build" variable if available + $_ =~ s/\$build\b/$buildvar/g; + + $map .= $_; + } + last; + } + + # Read entry of the form: + # <count1>: <bytes1> [<count2>: <bytes2>] @ a1 a2 a3 ... an + s/^\s*//; + s/\s*$//; + if (m/^\s*(\d+):\s+(\d+)\s+\[\s*(\d+):\s+(\d+)\]\s+@\s+(.*)$/) { + my $stack = $5; + my ($n1, $s1, $n2, $s2) = ($1, $2, $3, $4); + + if ($sample_adjustment) { + if ($sampling_algorithm == 2) { + # Remote-heap version 2 + # The sampling frequency is the rate of a Poisson process. + # This means that the probability of sampling an allocation of + # size X with sampling rate Y is 1 - exp(-X/Y) + if ($n1 != 0) { + my $ratio = (($s1*1.0)/$n1)/($sample_adjustment); + my $scale_factor = 1/(1 - exp(-$ratio)); + $n1 *= $scale_factor; + $s1 *= $scale_factor; + } + if ($n2 != 0) { + my $ratio = (($s2*1.0)/$n2)/($sample_adjustment); + my $scale_factor = 1/(1 - exp(-$ratio)); + $n2 *= $scale_factor; + $s2 *= $scale_factor; + } + } else { + # Remote-heap version 1 + my $ratio; + $ratio = (($s1*1.0)/$n1)/($sample_adjustment); + if ($ratio < 1) { + $n1 /= $ratio; + $s1 /= $ratio; + } + $ratio = (($s2*1.0)/$n2)/($sample_adjustment); + if ($ratio < 1) { + $n2 /= $ratio; + $s2 /= $ratio; + } + } + } + + my @counts = ($n1, $s1, $n2, $s2); + $stack = FixCallerAddresses($stack); + push @stackTraces, "$n1 $s1 $n2 $s2 $stack"; + AddEntries($profile, $pcs, $stack, $counts[$index]); + } + } + + my $r = {}; + $r->{version} = "heap"; + $r->{period} = 1; + $r->{profile} = $profile; + $r->{libs} = ParseLibraries($prog, $map, $pcs); + $r->{pcs} = $pcs; + return $r; +} + +sub ReadSynchProfile { + my $prog = shift; + local *PROFILE = shift; + my $header = shift; + + my $map = ''; + my $profile = {}; + my $pcs = {}; + my $sampling_period = 1; + my $cyclespernanosec = 2.8; # Default assumption for old binaries + my $seen_clockrate = 0; + my $line; + + my $index = 0; + if ($main::opt_total_delay) { + $index = 0; + } elsif ($main::opt_contentions) { + $index = 1; + } elsif ($main::opt_mean_delay) { + $index = 2; + } + + while ( $line = <PROFILE> ) { + $line =~ s/\r//g; # turn windows-looking lines into unix-looking lines + if ( $line =~ /^\s*(\d+)\s+(\d+) \@\s*(.*?)\s*$/ ) { + my ($cycles, $count, $stack) = ($1, $2, $3); + + # Convert cycles to nanoseconds + $cycles /= $cyclespernanosec; + + # Adjust for sampling done by application + $cycles *= $sampling_period; + $count *= $sampling_period; + + my @values = ($cycles, $count, $cycles / $count); + AddEntries($profile, $pcs, FixCallerAddresses($stack), $values[$index]); + + } elsif ( $line =~ /^(slow release).*thread \d+ \@\s*(.*?)\s*$/ || + $line =~ /^\s*(\d+) \@\s*(.*?)\s*$/ ) { + my ($cycles, $stack) = ($1, $2); + if ($cycles !~ /^\d+$/) { + next; + } + + # Convert cycles to nanoseconds + $cycles /= $cyclespernanosec; + + # Adjust for sampling done by application + $cycles *= $sampling_period; + + AddEntries($profile, $pcs, FixCallerAddresses($stack), $cycles); + + } elsif ( $line =~ m/^([a-z][^=]*)=(.*)$/ ) { + my ($variable, $value) = ($1,$2); + for ($variable, $value) { + s/^\s+//; + s/\s+$//; + } + if ($variable eq "cycles/second") { + $cyclespernanosec = $value / 1e9; + $seen_clockrate = 1; + } elsif ($variable eq "sampling period") { + $sampling_period = $value; + } elsif ($variable eq "ms since reset") { + # Currently nothing is done with this value in pprof + # So we just silently ignore it for now + } elsif ($variable eq "discarded samples") { + # Currently nothing is done with this value in pprof + # So we just silently ignore it for now + } else { + printf STDERR ("Ignoring unnknown variable in /contention output: " . + "'%s' = '%s'\n",$variable,$value); + } + } else { + # Memory map entry + $map .= $line; + } + } + + if (!$seen_clockrate) { + printf STDERR ("No cycles/second entry in profile; Guessing %.1f GHz\n", + $cyclespernanosec); + } + + my $r = {}; + $r->{version} = 0; + $r->{period} = $sampling_period; + $r->{profile} = $profile; + $r->{libs} = ParseLibraries($prog, $map, $pcs); + $r->{pcs} = $pcs; + return $r; +} + +# Given a hex value in the form "0x1abcd" or "1abcd", return either +# "0001abcd" or "000000000001abcd", depending on the current (global) +# address length. +sub HexExtend { + my $addr = shift; + + $addr =~ s/^(0x)?0*//; + my $zeros_needed = $address_length - length($addr); + if ($zeros_needed < 0) { + printf STDERR "Warning: address $addr is longer than address length $address_length\n"; + return $addr; + } + return ("0" x $zeros_needed) . $addr; +} + +##### Symbol extraction ##### + +# Aggressively search the lib_prefix values for the given library +# If all else fails, just return the name of the library unmodified. +# If the lib_prefix is "/my/path,/other/path" and $file is "/lib/dir/mylib.so" +# it will search the following locations in this order, until it finds a file: +# /my/path/lib/dir/mylib.so +# /other/path/lib/dir/mylib.so +# /my/path/dir/mylib.so +# /other/path/dir/mylib.so +# /my/path/mylib.so +# /other/path/mylib.so +# /lib/dir/mylib.so (returned as last resort) +sub FindLibrary { + my $file = shift; + my $suffix = $file; + + # Search for the library as described above + do { + foreach my $prefix (@prefix_list) { + my $fullpath = $prefix . $suffix; + if (-e $fullpath) { + return $fullpath; + } + } + } while ($suffix =~ s|^/[^/]+/|/|); + return $file; +} + +# Return path to library with debugging symbols. +# For libc libraries, the copy in /usr/lib/debug contains debugging symbols +sub DebuggingLibrary { + my $file = shift; + if ($file =~ m|^/| && -f "/usr/lib/debug$file") { + return "/usr/lib/debug$file"; + } + if ($file =~ m|^/| && -f "/usr/lib/debug$file.debug") { + return "/usr/lib/debug$file.debug"; + } + return undef; +} + +# Parse text section header of a library using objdump +sub ParseTextSectionHeaderFromObjdump { + my $lib = shift; + + my $size = undef; + my $vma; + my $file_offset; + # Get objdump output from the library file to figure out how to + # map between mapped addresses and addresses in the library. + my $cmd = ShellEscape($obj_tool_map{"objdump"}, "-h", $lib); + open(OBJDUMP, "$cmd |") || error("$cmd: $!\n"); + while (<OBJDUMP>) { + s/\r//g; # turn windows-looking lines into unix-looking lines + # Idx Name Size VMA LMA File off Algn + # 10 .text 00104b2c 420156f0 420156f0 000156f0 2**4 + # For 64-bit objects, VMA and LMA will be 16 hex digits, size and file + # offset may still be 8. But AddressSub below will still handle that. + my @x = split; + if (($#x >= 6) && ($x[1] eq '.text')) { + $size = $x[2]; + $vma = $x[3]; + $file_offset = $x[5]; + last; + } + } + close(OBJDUMP); + + if (!defined($size)) { + return undef; + } + + my $r = {}; + $r->{size} = $size; + $r->{vma} = $vma; + $r->{file_offset} = $file_offset; + + return $r; +} + +# Parse text section header of a library using otool (on OS X) +sub ParseTextSectionHeaderFromOtool { + my $lib = shift; + + my $size = undef; + my $vma = undef; + my $file_offset = undef; + # Get otool output from the library file to figure out how to + # map between mapped addresses and addresses in the library. + my $command = ShellEscape($obj_tool_map{"otool"}, "-l", $lib); + open(OTOOL, "$command |") || error("$command: $!\n"); + my $cmd = ""; + my $sectname = ""; + my $segname = ""; + foreach my $line (<OTOOL>) { + $line =~ s/\r//g; # turn windows-looking lines into unix-looking lines + # Load command <#> + # cmd LC_SEGMENT + # [...] + # Section + # sectname __text + # segname __TEXT + # addr 0x000009f8 + # size 0x00018b9e + # offset 2552 + # align 2^2 (4) + # We will need to strip off the leading 0x from the hex addresses, + # and convert the offset into hex. + if ($line =~ /Load command/) { + $cmd = ""; + $sectname = ""; + $segname = ""; + } elsif ($line =~ /Section/) { + $sectname = ""; + $segname = ""; + } elsif ($line =~ /cmd (\w+)/) { + $cmd = $1; + } elsif ($line =~ /sectname (\w+)/) { + $sectname = $1; + } elsif ($line =~ /segname (\w+)/) { + $segname = $1; + } elsif (!(($cmd eq "LC_SEGMENT" || $cmd eq "LC_SEGMENT_64") && + $sectname eq "__text" && + $segname eq "__TEXT")) { + next; + } elsif ($line =~ /\baddr 0x([0-9a-fA-F]+)/) { + $vma = $1; + } elsif ($line =~ /\bsize 0x([0-9a-fA-F]+)/) { + $size = $1; + } elsif ($line =~ /\boffset ([0-9]+)/) { + $file_offset = sprintf("%016x", $1); + } + if (defined($vma) && defined($size) && defined($file_offset)) { + last; + } + } + close(OTOOL); + + if (!defined($vma) || !defined($size) || !defined($file_offset)) { + return undef; + } + + my $r = {}; + $r->{size} = $size; + $r->{vma} = $vma; + $r->{file_offset} = $file_offset; + + return $r; +} + +sub ParseTextSectionHeader { + # obj_tool_map("otool") is only defined if we're in a Mach-O environment + if (defined($obj_tool_map{"otool"})) { + my $r = ParseTextSectionHeaderFromOtool(@_); + if (defined($r)){ + return $r; + } + } + # If otool doesn't work, or we don't have it, fall back to objdump + return ParseTextSectionHeaderFromObjdump(@_); +} + +# Split /proc/pid/maps dump into a list of libraries +sub ParseLibraries { + return if $main::use_symbol_page; # We don't need libraries info. + my $prog = Cwd::abs_path(shift); + my $map = shift; + my $pcs = shift; + + my $result = []; + my $h = "[a-f0-9]+"; + my $zero_offset = HexExtend("0"); + + my $buildvar = ""; + my $priorlib = ""; + foreach my $l (split("\n", $map)) { + if ($l =~ m/^\s*build=(.*)$/) { + $buildvar = $1; + } + + my $start; + my $finish; + my $offset; + my $lib; + if ($l =~ /^($h)-($h)\s+..x.\s+($h)\s+\S+:\S+\s+\d+\s+(.+\.(so|dll|dylib|bundle)((\.\d+)+\w*(\.\d+){0,3})?)$/i) { + # Full line from /proc/self/maps. Example: + # 40000000-40015000 r-xp 00000000 03:01 12845071 /lib/ld-2.3.2.so + $start = HexExtend($1); + $finish = HexExtend($2); + $offset = HexExtend($3); + $lib = $4; + $lib =~ s|\\|/|g; # turn windows-style paths into unix-style paths + } elsif ($l =~ /^\s*($h)-($h):\s*(\S+\.so(\.\d+)*)/) { + # Cooked line from DumpAddressMap. Example: + # 40000000-40015000: /lib/ld-2.3.2.so + $start = HexExtend($1); + $finish = HexExtend($2); + $offset = $zero_offset; + $lib = $3; + } elsif (($l =~ /^($h)-($h)\s+..x.\s+($h)\s+\S+:\S+\s+\d+\s+(\S+)$/i) && ($4 eq $prog)) { + # PIEs and address space randomization do not play well with our + # default assumption that main executable is at lowest + # addresses. So we're detecting main executable in + # /proc/self/maps as well. + $start = HexExtend($1); + $finish = HexExtend($2); + $offset = HexExtend($3); + $lib = $4; + $lib =~ s|\\|/|g; # turn windows-style paths into unix-style paths + } else { + next; + } + + # Expand "$build" variable if available + $lib =~ s/\$build\b/$buildvar/g; + + $lib = FindLibrary($lib); + + # Check for pre-relocated libraries, which use pre-relocated symbol tables + # and thus require adjusting the offset that we'll use to translate + # VM addresses into symbol table addresses. + # Only do this if we're not going to fetch the symbol table from a + # debugging copy of the library. + if (!DebuggingLibrary($lib)) { + my $text = ParseTextSectionHeader($lib); + if (defined($text)) { + my $vma_offset = AddressSub($text->{vma}, $text->{file_offset}); + $offset = AddressAdd($offset, $vma_offset); + } + } + + # If we find multiple executable segments for a single library, merge them + # into a single entry that spans the complete address range. + if ($lib eq $priorlib) { + my $prior = pop(@{$result}); + $start = @$prior[1]; + # TODO $offset may be wrong if .text is not in the final segment. + } + + push(@{$result}, [$lib, $start, $finish, $offset]); + $priorlib = $lib; + } + + # Append special entry for additional library (not relocated) + if ($main::opt_lib ne "") { + my $text = ParseTextSectionHeader($main::opt_lib); + if (defined($text)) { + my $start = $text->{vma}; + my $finish = AddressAdd($start, $text->{size}); + + push(@{$result}, [$main::opt_lib, $start, $finish, $start]); + } + } + + # Append special entry for the main program. This covers + # 0..max_pc_value_seen, so that we assume pc values not found in one + # of the library ranges will be treated as coming from the main + # program binary. + my $min_pc = HexExtend("0"); + my $max_pc = $min_pc; # find the maximal PC value in any sample + foreach my $pc (keys(%{$pcs})) { + if (HexExtend($pc) gt $max_pc) { $max_pc = HexExtend($pc); } + } + push(@{$result}, [$prog, $min_pc, $max_pc, $zero_offset]); + + return $result; +} + +# Add two hex addresses of length $address_length. +# Run pprof --test for unit test if this is changed. +sub AddressAdd { + my $addr1 = shift; + my $addr2 = shift; + my $sum; + + if ($address_length == 8) { + # Perl doesn't cope with wraparound arithmetic, so do it explicitly: + $sum = (hex($addr1)+hex($addr2)) % (0x10000000 * 16); + return sprintf("%08x", $sum); + + } else { + # Do the addition in 7-nibble chunks to trivialize carry handling. + + if ($main::opt_debug and $main::opt_test) { + print STDERR "AddressAdd $addr1 + $addr2 = "; + } + + my $a1 = substr($addr1,-7); + $addr1 = substr($addr1,0,-7); + my $a2 = substr($addr2,-7); + $addr2 = substr($addr2,0,-7); + $sum = hex($a1) + hex($a2); + my $c = 0; + if ($sum > 0xfffffff) { + $c = 1; + $sum -= 0x10000000; + } + my $r = sprintf("%07x", $sum); + + $a1 = substr($addr1,-7); + $addr1 = substr($addr1,0,-7); + $a2 = substr($addr2,-7); + $addr2 = substr($addr2,0,-7); + $sum = hex($a1) + hex($a2) + $c; + $c = 0; + if ($sum > 0xfffffff) { + $c = 1; + $sum -= 0x10000000; + } + $r = sprintf("%07x", $sum) . $r; + + $sum = hex($addr1) + hex($addr2) + $c; + if ($sum > 0xff) { $sum -= 0x100; } + $r = sprintf("%02x", $sum) . $r; + + if ($main::opt_debug and $main::opt_test) { print STDERR "$r\n"; } + + return $r; + } +} + + +# Subtract two hex addresses of length $address_length. +# Run pprof --test for unit test if this is changed. +sub AddressSub { + my $addr1 = shift; + my $addr2 = shift; + my $diff; + + if ($address_length == 8) { + # Perl doesn't cope with wraparound arithmetic, so do it explicitly: + $diff = (hex($addr1)-hex($addr2)) % (0x10000000 * 16); + return sprintf("%08x", $diff); + + } else { + # Do the addition in 7-nibble chunks to trivialize borrow handling. + # if ($main::opt_debug) { print STDERR "AddressSub $addr1 - $addr2 = "; } + + my $a1 = hex(substr($addr1,-7)); + $addr1 = substr($addr1,0,-7); + my $a2 = hex(substr($addr2,-7)); + $addr2 = substr($addr2,0,-7); + my $b = 0; + if ($a2 > $a1) { + $b = 1; + $a1 += 0x10000000; + } + $diff = $a1 - $a2; + my $r = sprintf("%07x", $diff); + + $a1 = hex(substr($addr1,-7)); + $addr1 = substr($addr1,0,-7); + $a2 = hex(substr($addr2,-7)) + $b; + $addr2 = substr($addr2,0,-7); + $b = 0; + if ($a2 > $a1) { + $b = 1; + $a1 += 0x10000000; + } + $diff = $a1 - $a2; + $r = sprintf("%07x", $diff) . $r; + + $a1 = hex($addr1); + $a2 = hex($addr2) + $b; + if ($a2 > $a1) { $a1 += 0x100; } + $diff = $a1 - $a2; + $r = sprintf("%02x", $diff) . $r; + + # if ($main::opt_debug) { print STDERR "$r\n"; } + + return $r; + } +} + +# Increment a hex addresses of length $address_length. +# Run pprof --test for unit test if this is changed. +sub AddressInc { + my $addr = shift; + my $sum; + + if ($address_length == 8) { + # Perl doesn't cope with wraparound arithmetic, so do it explicitly: + $sum = (hex($addr)+1) % (0x10000000 * 16); + return sprintf("%08x", $sum); + + } else { + # Do the addition in 7-nibble chunks to trivialize carry handling. + # We are always doing this to step through the addresses in a function, + # and will almost never overflow the first chunk, so we check for this + # case and exit early. + + # if ($main::opt_debug) { print STDERR "AddressInc $addr1 = "; } + + my $a1 = substr($addr,-7); + $addr = substr($addr,0,-7); + $sum = hex($a1) + 1; + my $r = sprintf("%07x", $sum); + if ($sum <= 0xfffffff) { + $r = $addr . $r; + # if ($main::opt_debug) { print STDERR "$r\n"; } + return HexExtend($r); + } else { + $r = "0000000"; + } + + $a1 = substr($addr,-7); + $addr = substr($addr,0,-7); + $sum = hex($a1) + 1; + $r = sprintf("%07x", $sum) . $r; + if ($sum <= 0xfffffff) { + $r = $addr . $r; + # if ($main::opt_debug) { print STDERR "$r\n"; } + return HexExtend($r); + } else { + $r = "00000000000000"; + } + + $sum = hex($addr) + 1; + if ($sum > 0xff) { $sum -= 0x100; } + $r = sprintf("%02x", $sum) . $r; + + # if ($main::opt_debug) { print STDERR "$r\n"; } + return $r; + } +} + +# Extract symbols for all PC values found in profile +sub ExtractSymbols { + my $libs = shift; + my $pcset = shift; + + my $symbols = {}; + + # Map each PC value to the containing library. To make this faster, + # we sort libraries by their starting pc value (highest first), and + # advance through the libraries as we advance the pc. Sometimes the + # addresses of libraries may overlap with the addresses of the main + # binary, so to make sure the libraries 'win', we iterate over the + # libraries in reverse order (which assumes the binary doesn't start + # in the middle of a library, which seems a fair assumption). + my @pcs = (sort { $a cmp $b } keys(%{$pcset})); # pcset is 0-extended strings + foreach my $lib (sort {$b->[1] cmp $a->[1]} @{$libs}) { + my $libname = $lib->[0]; + my $start = $lib->[1]; + my $finish = $lib->[2]; + my $offset = $lib->[3]; + + # Get list of pcs that belong in this library. + my $contained = []; + my ($start_pc_index, $finish_pc_index); + # Find smallest finish_pc_index such that $finish < $pc[$finish_pc_index]. + for ($finish_pc_index = $#pcs + 1; $finish_pc_index > 0; + $finish_pc_index--) { + last if $pcs[$finish_pc_index - 1] le $finish; + } + # Find smallest start_pc_index such that $start <= $pc[$start_pc_index]. + for ($start_pc_index = $finish_pc_index; $start_pc_index > 0; + $start_pc_index--) { + last if $pcs[$start_pc_index - 1] lt $start; + } + # This keeps PC values higher than $pc[$finish_pc_index] in @pcs, + # in case there are overlaps in libraries and the main binary. + @{$contained} = splice(@pcs, $start_pc_index, + $finish_pc_index - $start_pc_index); + # Map to symbols + MapToSymbols($libname, AddressSub($start, $offset), $contained, $symbols); + } + + return $symbols; +} + +# Map list of PC values to symbols for a given image +sub MapToSymbols { + my $image = shift; + my $offset = shift; + my $pclist = shift; + my $symbols = shift; + + my $debug = 0; + + # For libc (and other) libraries, the copy in /usr/lib/debug contains debugging symbols + my $debugging = DebuggingLibrary($image); + if ($debugging) { + $image = $debugging; + } + + # Ignore empty binaries + if ($#{$pclist} < 0) { return; } + + # Figure out the addr2line command to use + my $addr2line = $obj_tool_map{"addr2line"}; + my $cmd = ShellEscape($addr2line, "-f", "-C", "-e", $image); + if (exists $obj_tool_map{"addr2line_pdb"}) { + $addr2line = $obj_tool_map{"addr2line_pdb"}; + $cmd = ShellEscape($addr2line, "--demangle", "-f", "-C", "-e", $image); + } + + # If "addr2line" isn't installed on the system at all, just use + # nm to get what info we can (function names, but not line numbers). + if (system(ShellEscape($addr2line, "--help") . " >$dev_null 2>&1") != 0) { + MapSymbolsWithNM($image, $offset, $pclist, $symbols); + return; + } + + # "addr2line -i" can produce a variable number of lines per input + # address, with no separator that allows us to tell when data for + # the next address starts. So we find the address for a special + # symbol (_fini) and interleave this address between all real + # addresses passed to addr2line. The name of this special symbol + # can then be used as a separator. + $sep_address = undef; # May be filled in by MapSymbolsWithNM() + my $nm_symbols = {}; + MapSymbolsWithNM($image, $offset, $pclist, $nm_symbols); + if (defined($sep_address)) { + # Only add " -i" to addr2line if the binary supports it. + # addr2line --help returns 0, but not if it sees an unknown flag first. + if (system("$cmd -i --help >$dev_null 2>&1") == 0) { + $cmd .= " -i"; + } else { + $sep_address = undef; # no need for sep_address if we don't support -i + } + } + + # Make file with all PC values with intervening 'sep_address' so + # that we can reliably detect the end of inlined function list + open(ADDRESSES, ">$main::tmpfile_sym") || error("$main::tmpfile_sym: $!\n"); + if ($debug) { print("---- $image ---\n"); } + for (my $i = 0; $i <= $#{$pclist}; $i++) { + # addr2line always reads hex addresses, and does not need '0x' prefix. + if ($debug) { printf STDERR ("%s\n", $pclist->[$i]); } + printf ADDRESSES ("%s\n", AddressSub($pclist->[$i], $offset)); + if (defined($sep_address)) { + printf ADDRESSES ("%s\n", $sep_address); + } + } + close(ADDRESSES); + if ($debug) { + print("----\n"); + system("cat", $main::tmpfile_sym); + print("---- $cmd ---\n"); + system("$cmd < " . ShellEscape($main::tmpfile_sym)); + print("----\n"); + } + + open(SYMBOLS, "$cmd <" . ShellEscape($main::tmpfile_sym) . " |") + || error("$cmd: $!\n"); + my $count = 0; # Index in pclist + while (<SYMBOLS>) { + # Read fullfunction and filelineinfo from next pair of lines + s/\r?\n$//g; + my $fullfunction = $_; + $_ = <SYMBOLS>; + s/\r?\n$//g; + my $filelinenum = $_; + + if (defined($sep_address) && $fullfunction eq $sep_symbol) { + # Terminating marker for data for this address + $count++; + next; + } + + $filelinenum =~ s|\\|/|g; # turn windows-style paths into unix-style paths + + # Remove discriminator markers as this comes after the line number and + # confuses the rest of this script. + $filelinenum =~ s/ \(discriminator \d+\)$//; + # Convert unknown line numbers into line 0. + $filelinenum =~ s/:\?$/:0/; + + my $pcstr = $pclist->[$count]; + my $function = ShortFunctionName($fullfunction); + my $nms = $nm_symbols->{$pcstr}; + if (defined($nms)) { + if ($fullfunction eq '??') { + # nm found a symbol for us. + $function = $nms->[0]; + $fullfunction = $nms->[2]; + } else { + # MapSymbolsWithNM tags each routine with its starting address, + # useful in case the image has multiple occurrences of this + # routine. (It uses a syntax that resembles template paramters, + # that are automatically stripped out by ShortFunctionName().) + # addr2line does not provide the same information. So we check + # if nm disambiguated our symbol, and if so take the annotated + # (nm) version of the routine-name. TODO(csilvers): this won't + # catch overloaded, inlined symbols, which nm doesn't see. + # Better would be to do a check similar to nm's, in this fn. + if ($nms->[2] =~ m/^\Q$function\E/) { # sanity check it's the right fn + $function = $nms->[0]; + $fullfunction = $nms->[2]; + } + } + } + + # Prepend to accumulated symbols for pcstr + # (so that caller comes before callee) + my $sym = $symbols->{$pcstr}; + if (!defined($sym)) { + $sym = []; + $symbols->{$pcstr} = $sym; + } + unshift(@{$sym}, $function, $filelinenum, $fullfunction); + if ($debug) { printf STDERR ("%s => [%s]\n", $pcstr, join(" ", @{$sym})); } + if (!defined($sep_address)) { + # Inlining is off, so this entry ends immediately + $count++; + } + } + close(SYMBOLS); +} + +# Use nm to map the list of referenced PCs to symbols. Return true iff we +# are able to read procedure information via nm. +sub MapSymbolsWithNM { + my $image = shift; + my $offset = shift; + my $pclist = shift; + my $symbols = shift; + + # Get nm output sorted by increasing address + my $symbol_table = GetProcedureBoundaries($image, "."); + if (!%{$symbol_table}) { + return 0; + } + # Start addresses are already the right length (8 or 16 hex digits). + my @names = sort { $symbol_table->{$a}->[0] cmp $symbol_table->{$b}->[0] } + keys(%{$symbol_table}); + + if ($#names < 0) { + # No symbols: just use addresses + foreach my $pc (@{$pclist}) { + my $pcstr = "0x" . $pc; + $symbols->{$pc} = [$pcstr, "?", $pcstr]; + } + return 0; + } + + # Sort addresses so we can do a join against nm output + my $index = 0; + my $fullname = $names[0]; + my $name = ShortFunctionName($fullname); + foreach my $pc (sort { $a cmp $b } @{$pclist}) { + # Adjust for mapped offset + my $mpc = AddressSub($pc, $offset); + while (($index < $#names) && ($mpc ge $symbol_table->{$fullname}->[1])){ + $index++; + $fullname = $names[$index]; + $name = ShortFunctionName($fullname); + } + if ($mpc lt $symbol_table->{$fullname}->[1]) { + $symbols->{$pc} = [$name, "?", $fullname]; + } else { + my $pcstr = "0x" . $pc; + $symbols->{$pc} = [$pcstr, "?", $pcstr]; + } + } + return 1; +} + +sub ShortFunctionName { + my $function = shift; + while ($function =~ s/\([^()]*\)(\s*const)?//g) { } # Argument types + $function =~ s/<[0-9a-f]*>$//g; # Remove Address + if (!$main::opt_no_strip_temp) { + while ($function =~ s/<[^<>]*>//g) { } # Remove template arguments + } + $function =~ s/^.*\s+(\w+::)/$1/; # Remove leading type + return $function; +} + +# Trim overly long symbols found in disassembler output +sub CleanDisassembly { + my $d = shift; + while ($d =~ s/\([^()%]*\)(\s*const)?//g) { } # Argument types, not (%rax) + while ($d =~ s/(\w+)<[^<>]*>/$1/g) { } # Remove template arguments + return $d; +} + +# Clean file name for display +sub CleanFileName { + my ($f) = @_; + $f =~ s|^/proc/self/cwd/||; + $f =~ s|^\./||; + return $f; +} + +# Make address relative to section and clean up for display +sub UnparseAddress { + my ($offset, $address) = @_; + $address = AddressSub($address, $offset); + $address =~ s/^0x//; + $address =~ s/^0*//; + return $address; +} + +##### Miscellaneous ##### + +# Find the right versions of the above object tools to use. The +# argument is the program file being analyzed, and should be an ELF +# 32-bit or ELF 64-bit executable file. The location of the tools +# is determined by considering the following options in this order: +# 1) --tools option, if set +# 2) PPROF_TOOLS environment variable, if set +# 3) the environment +sub ConfigureObjTools { + my $prog_file = shift; + + # Check for the existence of $prog_file because /usr/bin/file does not + # predictably return error status in prod. + (-e $prog_file) || error("$prog_file does not exist.\n"); + + my $file_type = undef; + if (-e "/usr/bin/file") { + # Follow symlinks (at least for systems where "file" supports that). + my $escaped_prog_file = ShellEscape($prog_file); + $file_type = `/usr/bin/file -L $escaped_prog_file 2>$dev_null || + /usr/bin/file $escaped_prog_file`; + } elsif ($^O == "MSWin32") { + $file_type = "MS Windows"; + } else { + print STDERR "WARNING: Can't determine the file type of $prog_file"; + } + + if ($file_type =~ /64-bit/) { + # Change $address_length to 16 if the program file is ELF 64-bit. + # We can't detect this from many (most?) heap or lock contention + # profiles, since the actual addresses referenced are generally in low + # memory even for 64-bit programs. + $address_length = 16; + } + + if ($file_type =~ /MS Windows/) { + # For windows, we provide a version of nm and addr2line as part of + # the opensource release, which is capable of parsing + # Windows-style PDB executables. It should live in the path, or + # in the same directory as pprof. + $obj_tool_map{"nm_pdb"} = "nm-pdb"; + $obj_tool_map{"addr2line_pdb"} = "addr2line-pdb"; + } + + if ($file_type =~ /Mach-O/) { + # OS X uses otool to examine Mach-O files, rather than objdump. + $obj_tool_map{"otool"} = "otool"; + $obj_tool_map{"addr2line"} = "false"; # no addr2line + $obj_tool_map{"objdump"} = "false"; # no objdump + } + + # Go fill in %obj_tool_map with the pathnames to use: + foreach my $tool (keys %obj_tool_map) { + $obj_tool_map{$tool} = ConfigureTool($obj_tool_map{$tool}); + } +} + +# Returns the path of a caller-specified object tool. If --tools or +# PPROF_TOOLS are specified, then returns the full path to the tool +# with that prefix. Otherwise, returns the path unmodified (which +# means we will look for it on PATH). +sub ConfigureTool { + my $tool = shift; + my $path; + + # --tools (or $PPROF_TOOLS) is a comma separated list, where each + # item is either a) a pathname prefix, or b) a map of the form + # <tool>:<path>. First we look for an entry of type (b) for our + # tool. If one is found, we use it. Otherwise, we consider all the + # pathname prefixes in turn, until one yields an existing file. If + # none does, we use a default path. + my $tools = $main::opt_tools || $ENV{"PPROF_TOOLS"} || ""; + if ($tools =~ m/(,|^)\Q$tool\E:([^,]*)/) { + $path = $2; + # TODO(csilvers): sanity-check that $path exists? Hard if it's relative. + } elsif ($tools ne '') { + foreach my $prefix (split(',', $tools)) { + next if ($prefix =~ /:/); # ignore "tool:fullpath" entries in the list + if (-x $prefix . $tool) { + $path = $prefix . $tool; + last; + } + } + if (!$path) { + error("No '$tool' found with prefix specified by " . + "--tools (or \$PPROF_TOOLS) '$tools'\n"); + } + } else { + # ... otherwise use the version that exists in the same directory as + # pprof. If there's nothing there, use $PATH. + $0 =~ m,[^/]*$,; # this is everything after the last slash + my $dirname = $`; # this is everything up to and including the last slash + if (-x "$dirname$tool") { + $path = "$dirname$tool"; + } else { + $path = $tool; + } + } + if ($main::opt_debug) { print STDERR "Using '$path' for '$tool'.\n"; } + return $path; +} + +sub ShellEscape { + my @escaped_words = (); + foreach my $word (@_) { + my $escaped_word = $word; + if ($word =~ m![^a-zA-Z0-9/.,_=-]!) { # check for anything not in whitelist + $escaped_word =~ s/'/'\\''/; + $escaped_word = "'$escaped_word'"; + } + push(@escaped_words, $escaped_word); + } + return join(" ", @escaped_words); +} + +sub cleanup { + unlink($main::tmpfile_sym); + unlink(keys %main::tempnames); + + # We leave any collected profiles in $HOME/pprof in case the user wants + # to look at them later. We print a message informing them of this. + if ((scalar(@main::profile_files) > 0) && + defined($main::collected_profile)) { + if (scalar(@main::profile_files) == 1) { + print STDERR "Dynamically gathered profile is in $main::collected_profile\n"; + } + print STDERR "If you want to investigate this profile further, you can do:\n"; + print STDERR "\n"; + print STDERR " $0 \\\n"; + print STDERR " $main::prog \\\n"; + print STDERR " $main::collected_profile\n"; + print STDERR "\n"; + } +} + +sub sighandler { + cleanup(); + exit(1); +} + +sub error { + my $msg = shift; + print STDERR $msg; + cleanup(); + exit(1); +} + + +# Run $nm_command and get all the resulting procedure boundaries whose +# names match "$regexp" and returns them in a hashtable mapping from +# procedure name to a two-element vector of [start address, end address] +sub GetProcedureBoundariesViaNm { + my $escaped_nm_command = shift; # shell-escaped + my $regexp = shift; + my $image = shift; + + my $symbol_table = {}; + open(NM, "$escaped_nm_command |") || error("$escaped_nm_command: $!\n"); + my $last_start = "0"; + my $routine = ""; + while (<NM>) { + s/\r//g; # turn windows-looking lines into unix-looking lines + if (m/^\s*([0-9a-f]+) (.) (..*)/) { + my $start_val = $1; + my $type = $2; + my $this_routine = $3; + + # It's possible for two symbols to share the same address, if + # one is a zero-length variable (like __start_google_malloc) or + # one symbol is a weak alias to another (like __libc_malloc). + # In such cases, we want to ignore all values except for the + # actual symbol, which in nm-speak has type "T". The logic + # below does this, though it's a bit tricky: what happens when + # we have a series of lines with the same address, is the first + # one gets queued up to be processed. However, it won't + # *actually* be processed until later, when we read a line with + # a different address. That means that as long as we're reading + # lines with the same address, we have a chance to replace that + # item in the queue, which we do whenever we see a 'T' entry -- + # that is, a line with type 'T'. If we never see a 'T' entry, + # we'll just go ahead and process the first entry (which never + # got touched in the queue), and ignore the others. + if ($start_val eq $last_start && $type =~ /t/i) { + # We are the 'T' symbol at this address, replace previous symbol. + $routine = $this_routine; + next; + } elsif ($start_val eq $last_start) { + # We're not the 'T' symbol at this address, so ignore us. + next; + } + + if ($this_routine eq $sep_symbol) { + $sep_address = HexExtend($start_val); + } + + # Tag this routine with the starting address in case the image + # has multiple occurrences of this routine. We use a syntax + # that resembles template paramters that are automatically + # stripped out by ShortFunctionName() + $this_routine .= "<$start_val>"; + + if (defined($routine) && $routine =~ m/$regexp/) { + $symbol_table->{$routine} = [HexExtend($last_start), + HexExtend($start_val)]; + } + $last_start = $start_val; + $routine = $this_routine; + } elsif (m/^Loaded image name: (.+)/) { + # The win32 nm workalike emits information about the binary it is using. + if ($main::opt_debug) { print STDERR "Using Image $1\n"; } + } elsif (m/^PDB file name: (.+)/) { + # The win32 nm workalike emits information about the pdb it is using. + if ($main::opt_debug) { print STDERR "Using PDB $1\n"; } + } + } + close(NM); + # Handle the last line in the nm output. Unfortunately, we don't know + # how big this last symbol is, because we don't know how big the file + # is. For now, we just give it a size of 0. + # TODO(csilvers): do better here. + if (defined($routine) && $routine =~ m/$regexp/) { + $symbol_table->{$routine} = [HexExtend($last_start), + HexExtend($last_start)]; + } + + # Verify if addr2line can find the $sep_symbol. If not, we use objdump + # to find the address for the $sep_symbol on code section which addr2line + # can find. + if (defined($sep_address)){ + my $start_val = $sep_address; + my $addr2line = $obj_tool_map{"addr2line"}; + my $cmd = ShellEscape($addr2line, "-f", "-C", "-e", $image, "-i"); + open(FINI, "echo $start_val | $cmd |") + || error("echo $start_val | $cmd: $!\n"); + $_ = <FINI>; + s/\r?\n$//g; + my $fini = $_; + close(FINI); + if ($fini ne $sep_symbol){ + my $objdump = $obj_tool_map{"objdump"}; + $cmd = ShellEscape($objdump, "-d", $image); + my $grep = ShellEscape("grep", $sep_symbol); + my $tail = ShellEscape("tail", "-n", "1"); + open(FINI, "$cmd | $grep | $tail |") + || error("$cmd | $grep | $tail: $!\n"); + s/\r//g; # turn windows-looking lines into unix-looking lines + my $data = <FINI>; + if (defined($data)){ + ($start_val, $fini) = split(/ </,$data); + } + close(FINI); + } + $sep_address = HexExtend($start_val); + } + + return $symbol_table; +} + +# Gets the procedure boundaries for all routines in "$image" whose names +# match "$regexp" and returns them in a hashtable mapping from procedure +# name to a two-element vector of [start address, end address]. +# Will return an empty map if nm is not installed or not working properly. +sub GetProcedureBoundaries { + my $image = shift; + my $regexp = shift; + + # If $image doesn't start with /, then put ./ in front of it. This works + # around an obnoxious bug in our probing of nm -f behavior. + # "nm -f $image" is supposed to fail on GNU nm, but if: + # + # a. $image starts with [BbSsPp] (for example, bin/foo/bar), AND + # b. you have a.out in your current directory (a not uncommon occurrence) + # + # then "nm -f $image" succeeds because -f only looks at the first letter of + # the argument, which looks valid because it's [BbSsPp], and then since + # there's no image provided, it looks for a.out and finds it. + # + # This regex makes sure that $image starts with . or /, forcing the -f + # parsing to fail since . and / are not valid formats. + $image =~ s#^[^/]#./$&#; + + # For libc libraries, the copy in /usr/lib/debug contains debugging symbols + my $debugging = DebuggingLibrary($image); + if ($debugging) { + $image = $debugging; + } + + my $nm = $obj_tool_map{"nm"}; + my $cppfilt = $obj_tool_map{"c++filt"}; + + # nm can fail for two reasons: 1) $image isn't a debug library; 2) nm + # binary doesn't support --demangle. In addition, for OS X we need + # to use the -f flag to get 'flat' nm output (otherwise we don't sort + # properly and get incorrect results). Unfortunately, GNU nm uses -f + # in an incompatible way. So first we test whether our nm supports + # --demangle and -f. + my $demangle_flag = ""; + my $cppfilt_flag = ""; + my $to_devnull = ">$dev_null 2>&1"; + if (system(ShellEscape($nm, "--demangle", "image") . $to_devnull) == 0) { + # In this mode, we do "nm --demangle <foo>" + $demangle_flag = "--demangle"; + $cppfilt_flag = ""; + } elsif (system(ShellEscape($cppfilt, $image) . $to_devnull) == 0) { + # In this mode, we do "nm <foo> | c++filt" + $cppfilt_flag = " | " . ShellEscape($cppfilt); + }; + my $flatten_flag = ""; + if (system(ShellEscape($nm, "-f", $image) . $to_devnull) == 0) { + $flatten_flag = "-f"; + } + + # Finally, in the case $imagie isn't a debug library, we try again with + # -D to at least get *exported* symbols. If we can't use --demangle, + # we use c++filt instead, if it exists on this system. + my @nm_commands = (ShellEscape($nm, "-n", $flatten_flag, $demangle_flag, + $image) . " 2>$dev_null $cppfilt_flag", + ShellEscape($nm, "-D", "-n", $flatten_flag, $demangle_flag, + $image) . " 2>$dev_null $cppfilt_flag", + # 6nm is for Go binaries + ShellEscape("6nm", "$image") . " 2>$dev_null | sort", + ); + + # If the executable is an MS Windows PDB-format executable, we'll + # have set up obj_tool_map("nm_pdb"). In this case, we actually + # want to use both unix nm and windows-specific nm_pdb, since + # PDB-format executables can apparently include dwarf .o files. + if (exists $obj_tool_map{"nm_pdb"}) { + push(@nm_commands, + ShellEscape($obj_tool_map{"nm_pdb"}, "--demangle", $image) + . " 2>$dev_null"); + } + + foreach my $nm_command (@nm_commands) { + my $symbol_table = GetProcedureBoundariesViaNm($nm_command, $regexp, $image); + return $symbol_table if (%{$symbol_table}); + } + my $symbol_table = {}; + return $symbol_table; +} + + +# The test vectors for AddressAdd/Sub/Inc are 8-16-nibble hex strings. +# To make them more readable, we add underscores at interesting places. +# This routine removes the underscores, producing the canonical representation +# used by pprof to represent addresses, particularly in the tested routines. +sub CanonicalHex { + my $arg = shift; + return join '', (split '_',$arg); +} + + +# Unit test for AddressAdd: +sub AddressAddUnitTest { + my $test_data_8 = shift; + my $test_data_16 = shift; + my $error_count = 0; + my $fail_count = 0; + my $pass_count = 0; + # print STDERR "AddressAddUnitTest: ", 1+$#{$test_data_8}, " tests\n"; + + # First a few 8-nibble addresses. Note that this implementation uses + # plain old arithmetic, so a quick sanity check along with verifying what + # happens to overflow (we want it to wrap): + $address_length = 8; + foreach my $row (@{$test_data_8}) { + if ($main::opt_debug and $main::opt_test) { print STDERR "@{$row}\n"; } + my $sum = AddressAdd ($row->[0], $row->[1]); + if ($sum ne $row->[2]) { + printf STDERR "ERROR: %s != %s + %s = %s\n", $sum, + $row->[0], $row->[1], $row->[2]; + ++$fail_count; + } else { + ++$pass_count; + } + } + printf STDERR "AddressAdd 32-bit tests: %d passes, %d failures\n", + $pass_count, $fail_count; + $error_count = $fail_count; + $fail_count = 0; + $pass_count = 0; + + # Now 16-nibble addresses. + $address_length = 16; + foreach my $row (@{$test_data_16}) { + if ($main::opt_debug and $main::opt_test) { print STDERR "@{$row}\n"; } + my $sum = AddressAdd (CanonicalHex($row->[0]), CanonicalHex($row->[1])); + my $expected = join '', (split '_',$row->[2]); + if ($sum ne CanonicalHex($row->[2])) { + printf STDERR "ERROR: %s != %s + %s = %s\n", $sum, + $row->[0], $row->[1], $row->[2]; + ++$fail_count; + } else { + ++$pass_count; + } + } + printf STDERR "AddressAdd 64-bit tests: %d passes, %d failures\n", + $pass_count, $fail_count; + $error_count += $fail_count; + + return $error_count; +} + + +# Unit test for AddressSub: +sub AddressSubUnitTest { + my $test_data_8 = shift; + my $test_data_16 = shift; + my $error_count = 0; + my $fail_count = 0; + my $pass_count = 0; + # print STDERR "AddressSubUnitTest: ", 1+$#{$test_data_8}, " tests\n"; + + # First a few 8-nibble addresses. Note that this implementation uses + # plain old arithmetic, so a quick sanity check along with verifying what + # happens to overflow (we want it to wrap): + $address_length = 8; + foreach my $row (@{$test_data_8}) { + if ($main::opt_debug and $main::opt_test) { print STDERR "@{$row}\n"; } + my $sum = AddressSub ($row->[0], $row->[1]); + if ($sum ne $row->[3]) { + printf STDERR "ERROR: %s != %s - %s = %s\n", $sum, + $row->[0], $row->[1], $row->[3]; + ++$fail_count; + } else { + ++$pass_count; + } + } + printf STDERR "AddressSub 32-bit tests: %d passes, %d failures\n", + $pass_count, $fail_count; + $error_count = $fail_count; + $fail_count = 0; + $pass_count = 0; + + # Now 16-nibble addresses. + $address_length = 16; + foreach my $row (@{$test_data_16}) { + if ($main::opt_debug and $main::opt_test) { print STDERR "@{$row}\n"; } + my $sum = AddressSub (CanonicalHex($row->[0]), CanonicalHex($row->[1])); + if ($sum ne CanonicalHex($row->[3])) { + printf STDERR "ERROR: %s != %s - %s = %s\n", $sum, + $row->[0], $row->[1], $row->[3]; + ++$fail_count; + } else { + ++$pass_count; + } + } + printf STDERR "AddressSub 64-bit tests: %d passes, %d failures\n", + $pass_count, $fail_count; + $error_count += $fail_count; + + return $error_count; +} + + +# Unit test for AddressInc: +sub AddressIncUnitTest { + my $test_data_8 = shift; + my $test_data_16 = shift; + my $error_count = 0; + my $fail_count = 0; + my $pass_count = 0; + # print STDERR "AddressIncUnitTest: ", 1+$#{$test_data_8}, " tests\n"; + + # First a few 8-nibble addresses. Note that this implementation uses + # plain old arithmetic, so a quick sanity check along with verifying what + # happens to overflow (we want it to wrap): + $address_length = 8; + foreach my $row (@{$test_data_8}) { + if ($main::opt_debug and $main::opt_test) { print STDERR "@{$row}\n"; } + my $sum = AddressInc ($row->[0]); + if ($sum ne $row->[4]) { + printf STDERR "ERROR: %s != %s + 1 = %s\n", $sum, + $row->[0], $row->[4]; + ++$fail_count; + } else { + ++$pass_count; + } + } + printf STDERR "AddressInc 32-bit tests: %d passes, %d failures\n", + $pass_count, $fail_count; + $error_count = $fail_count; + $fail_count = 0; + $pass_count = 0; + + # Now 16-nibble addresses. + $address_length = 16; + foreach my $row (@{$test_data_16}) { + if ($main::opt_debug and $main::opt_test) { print STDERR "@{$row}\n"; } + my $sum = AddressInc (CanonicalHex($row->[0])); + if ($sum ne CanonicalHex($row->[4])) { + printf STDERR "ERROR: %s != %s + 1 = %s\n", $sum, + $row->[0], $row->[4]; + ++$fail_count; + } else { + ++$pass_count; + } + } + printf STDERR "AddressInc 64-bit tests: %d passes, %d failures\n", + $pass_count, $fail_count; + $error_count += $fail_count; + + return $error_count; +} + + +# Driver for unit tests. +# Currently just the address add/subtract/increment routines for 64-bit. +sub RunUnitTests { + my $error_count = 0; + + # This is a list of tuples [a, b, a+b, a-b, a+1] + my $unit_test_data_8 = [ + [qw(aaaaaaaa 50505050 fafafafa 5a5a5a5a aaaaaaab)], + [qw(50505050 aaaaaaaa fafafafa a5a5a5a6 50505051)], + [qw(ffffffff aaaaaaaa aaaaaaa9 55555555 00000000)], + [qw(00000001 ffffffff 00000000 00000002 00000002)], + [qw(00000001 fffffff0 fffffff1 00000011 00000002)], + ]; + my $unit_test_data_16 = [ + # The implementation handles data in 7-nibble chunks, so those are the + # interesting boundaries. + [qw(aaaaaaaa 50505050 + 00_000000f_afafafa 00_0000005_a5a5a5a 00_000000a_aaaaaab)], + [qw(50505050 aaaaaaaa + 00_000000f_afafafa ff_ffffffa_5a5a5a6 00_0000005_0505051)], + [qw(ffffffff aaaaaaaa + 00_000001a_aaaaaa9 00_0000005_5555555 00_0000010_0000000)], + [qw(00000001 ffffffff + 00_0000010_0000000 ff_ffffff0_0000002 00_0000000_0000002)], + [qw(00000001 fffffff0 + 00_000000f_ffffff1 ff_ffffff0_0000011 00_0000000_0000002)], + + [qw(00_a00000a_aaaaaaa 50505050 + 00_a00000f_afafafa 00_a000005_a5a5a5a 00_a00000a_aaaaaab)], + [qw(0f_fff0005_0505050 aaaaaaaa + 0f_fff000f_afafafa 0f_ffefffa_5a5a5a6 0f_fff0005_0505051)], + [qw(00_000000f_fffffff 01_800000a_aaaaaaa + 01_800001a_aaaaaa9 fe_8000005_5555555 00_0000010_0000000)], + [qw(00_0000000_0000001 ff_fffffff_fffffff + 00_0000000_0000000 00_0000000_0000002 00_0000000_0000002)], + [qw(00_0000000_0000001 ff_fffffff_ffffff0 + ff_fffffff_ffffff1 00_0000000_0000011 00_0000000_0000002)], + ]; + + $error_count += AddressAddUnitTest($unit_test_data_8, $unit_test_data_16); + $error_count += AddressSubUnitTest($unit_test_data_8, $unit_test_data_16); + $error_count += AddressIncUnitTest($unit_test_data_8, $unit_test_data_16); + if ($error_count > 0) { + print STDERR $error_count, " errors: FAILED\n"; + } else { + print STDERR "PASS\n"; + } + exit ($error_count); +} diff --git a/src/third_party/gperftools-2.5/src/profile-handler.cc b/src/third_party/gperftools-2.5/src/profile-handler.cc new file mode 100644 index 00000000000..7fdcb69333f --- /dev/null +++ b/src/third_party/gperftools-2.5/src/profile-handler.cc @@ -0,0 +1,584 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2009, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Sanjay Ghemawat +// Nabeel Mian +// +// Implements management of profile timers and the corresponding signal handler. + +#include "config.h" +#include "profile-handler.h" + +#if !(defined(__CYGWIN__) || defined(__CYGWIN32__)) + +#include <stdio.h> +#include <errno.h> +#include <sys/time.h> + +#include <list> +#include <string> + +#if HAVE_LINUX_SIGEV_THREAD_ID +// for timer_{create,settime} and associated typedefs & constants +#include <time.h> +// for sys_gettid +#include "base/linux_syscall_support.h" +// for perftools_pthread_key_create +#include "maybe_threads.h" +#endif + +#include "base/dynamic_annotations.h" +#include "base/googleinit.h" +#include "base/logging.h" +#include "base/spinlock.h" +#include "maybe_threads.h" + +using std::list; +using std::string; + +// This structure is used by ProfileHandlerRegisterCallback and +// ProfileHandlerUnregisterCallback as a handle to a registered callback. +struct ProfileHandlerToken { + // Sets the callback and associated arg. + ProfileHandlerToken(ProfileHandlerCallback cb, void* cb_arg) + : callback(cb), + callback_arg(cb_arg) { + } + + // Callback function to be invoked on receiving a profile timer interrupt. + ProfileHandlerCallback callback; + // Argument for the callback function. + void* callback_arg; +}; + +// Blocks a signal from being delivered to the current thread while the object +// is alive. Unblocks it upon destruction. +class ScopedSignalBlocker { + public: + ScopedSignalBlocker(int signo) { + sigemptyset(&sig_set_); + sigaddset(&sig_set_, signo); + RAW_CHECK(sigprocmask(SIG_BLOCK, &sig_set_, NULL) == 0, + "sigprocmask (block)"); + } + ~ScopedSignalBlocker() { + RAW_CHECK(sigprocmask(SIG_UNBLOCK, &sig_set_, NULL) == 0, + "sigprocmask (unblock)"); + } + + private: + sigset_t sig_set_; +}; + +// This class manages profile timers and associated signal handler. This is a +// a singleton. +class ProfileHandler { + public: + // Registers the current thread with the profile handler. + void RegisterThread(); + + // Registers a callback routine to receive profile timer ticks. The returned + // token is to be used when unregistering this callback and must not be + // deleted by the caller. + ProfileHandlerToken* RegisterCallback(ProfileHandlerCallback callback, + void* callback_arg); + + // Unregisters a previously registered callback. Expects the token returned + // by the corresponding RegisterCallback routine. + void UnregisterCallback(ProfileHandlerToken* token) + NO_THREAD_SAFETY_ANALYSIS; + + // Unregisters all the callbacks and stops the timer(s). + void Reset(); + + // Gets the current state of profile handler. + void GetState(ProfileHandlerState* state); + + // Initializes and returns the ProfileHandler singleton. + static ProfileHandler* Instance(); + + private: + ProfileHandler(); + ~ProfileHandler(); + + // Largest allowed frequency. + static const int32 kMaxFrequency = 4000; + // Default frequency. + static const int32 kDefaultFrequency = 100; + + // ProfileHandler singleton. + static ProfileHandler* instance_; + + // pthread_once_t for one time initialization of ProfileHandler singleton. + static pthread_once_t once_; + + // Initializes the ProfileHandler singleton via GoogleOnceInit. + static void Init(); + + // Timer state as configured previously. + bool timer_running_; + + // The number of profiling signal interrupts received. + int64 interrupts_ GUARDED_BY(signal_lock_); + + // Profiling signal interrupt frequency, read-only after construction. + int32 frequency_; + + // ITIMER_PROF (which uses SIGPROF), or ITIMER_REAL (which uses SIGALRM). + // Translated into an equivalent choice of clock if per_thread_timer_enabled_ + // is true. + int timer_type_; + + // Signal number for timer signal. + int signal_number_; + + // Counts the number of callbacks registered. + int32 callback_count_ GUARDED_BY(control_lock_); + + // Is profiling allowed at all? + bool allowed_; + + // Must be false if HAVE_LINUX_SIGEV_THREAD_ID is not defined. + bool per_thread_timer_enabled_; + +#ifdef HAVE_LINUX_SIGEV_THREAD_ID + // this is used to destroy per-thread profiling timers on thread + // termination + pthread_key_t thread_timer_key; +#endif + + // This lock serializes the registration of threads and protects the + // callbacks_ list below. + // Locking order: + // In the context of a signal handler, acquire signal_lock_ to walk the + // callback list. Otherwise, acquire control_lock_, disable the signal + // handler and then acquire signal_lock_. + SpinLock control_lock_ ACQUIRED_BEFORE(signal_lock_); + SpinLock signal_lock_; + + // Holds the list of registered callbacks. We expect the list to be pretty + // small. Currently, the cpu profiler (base/profiler) and thread module + // (base/thread.h) are the only two components registering callbacks. + // Following are the locking requirements for callbacks_: + // For read-write access outside the SIGPROF handler: + // - Acquire control_lock_ + // - Disable SIGPROF handler. + // - Acquire signal_lock_ + // For read-only access in the context of SIGPROF handler + // (Read-write access is *not allowed* in the SIGPROF handler) + // - Acquire signal_lock_ + // For read-only access outside SIGPROF handler: + // - Acquire control_lock_ + typedef list<ProfileHandlerToken*> CallbackList; + typedef CallbackList::iterator CallbackIterator; + CallbackList callbacks_ GUARDED_BY(signal_lock_); + + // Starts or stops the interval timer. + // Will ignore any requests to enable or disable when + // per_thread_timer_enabled_ is true. + void UpdateTimer(bool enable) EXCLUSIVE_LOCKS_REQUIRED(signal_lock_); + + // Returns true if the handler is not being used by something else. + // This checks the kernel's signal handler table. + bool IsSignalHandlerAvailable(); + + // Signal handler. Iterates over and calls all the registered callbacks. + static void SignalHandler(int sig, siginfo_t* sinfo, void* ucontext); + + DISALLOW_COPY_AND_ASSIGN(ProfileHandler); +}; + +ProfileHandler* ProfileHandler::instance_ = NULL; +pthread_once_t ProfileHandler::once_ = PTHREAD_ONCE_INIT; + +const int32 ProfileHandler::kMaxFrequency; +const int32 ProfileHandler::kDefaultFrequency; + +// If we are LD_PRELOAD-ed against a non-pthreads app, then these functions +// won't be defined. We declare them here, for that case (with weak linkage) +// which will cause the non-definition to resolve to NULL. We can then check +// for NULL or not in Instance. +extern "C" { +int pthread_once(pthread_once_t *, void (*)(void)) ATTRIBUTE_WEAK; +int pthread_kill(pthread_t thread_id, int signo) ATTRIBUTE_WEAK; + +#if HAVE_LINUX_SIGEV_THREAD_ID +int timer_create(clockid_t clockid, struct sigevent* evp, + timer_t* timerid) ATTRIBUTE_WEAK; +int timer_delete(timer_t timerid) ATTRIBUTE_WEAK; +int timer_settime(timer_t timerid, int flags, const struct itimerspec* value, + struct itimerspec* ovalue) ATTRIBUTE_WEAK; +#endif +} + +#if HAVE_LINUX_SIGEV_THREAD_ID + +struct timer_id_holder { + timer_t timerid; + timer_id_holder(timer_t _timerid) : timerid(_timerid) {} +}; + +extern "C" { + static void ThreadTimerDestructor(void *arg) { + if (!arg) { + return; + } + timer_id_holder *holder = static_cast<timer_id_holder *>(arg); + timer_delete(holder->timerid); + delete holder; + } +} + +static void CreateThreadTimerKey(pthread_key_t *pkey) { + int rv = perftools_pthread_key_create(pkey, ThreadTimerDestructor); + if (rv) { + RAW_LOG(FATAL, "aborting due to pthread_key_create error: %s", strerror(rv)); + } +} + +static void StartLinuxThreadTimer(int timer_type, int signal_number, + int32 frequency, pthread_key_t timer_key) { + int rv; + struct sigevent sevp; + timer_t timerid; + struct itimerspec its; + memset(&sevp, 0, sizeof(sevp)); + sevp.sigev_notify = SIGEV_THREAD_ID; + sevp._sigev_un._tid = sys_gettid(); + sevp.sigev_signo = signal_number; + clockid_t clock = CLOCK_THREAD_CPUTIME_ID; + if (timer_type == ITIMER_REAL) { + clock = CLOCK_MONOTONIC; + } + rv = timer_create(clock, &sevp, &timerid); + if (rv) { + RAW_LOG(FATAL, "aborting due to timer_create error: %s", strerror(errno)); + } + + timer_id_holder *holder = new timer_id_holder(timerid); + rv = perftools_pthread_setspecific(timer_key, holder); + if (rv) { + RAW_LOG(FATAL, "aborting due to pthread_setspecific error: %s", strerror(rv)); + } + + its.it_interval.tv_sec = 0; + its.it_interval.tv_nsec = 1000000000 / frequency; + its.it_value = its.it_interval; + rv = timer_settime(timerid, 0, &its, 0); + if (rv) { + RAW_LOG(FATAL, "aborting due to timer_settime error: %s", strerror(errno)); + } +} +#endif + +void ProfileHandler::Init() { + instance_ = new ProfileHandler(); +} + +ProfileHandler* ProfileHandler::Instance() { + if (pthread_once) { + pthread_once(&once_, Init); + } + if (instance_ == NULL) { + // This will be true on systems that don't link in pthreads, + // including on FreeBSD where pthread_once has a non-zero address + // (but doesn't do anything) even when pthreads isn't linked in. + Init(); + assert(instance_ != NULL); + } + return instance_; +} + +ProfileHandler::ProfileHandler() + : timer_running_(false), + interrupts_(0), + callback_count_(0), + allowed_(true), + per_thread_timer_enabled_(false) { + SpinLockHolder cl(&control_lock_); + + timer_type_ = (getenv("CPUPROFILE_REALTIME") ? ITIMER_REAL : ITIMER_PROF); + signal_number_ = (timer_type_ == ITIMER_PROF ? SIGPROF : SIGALRM); + + // Get frequency of interrupts (if specified) + char junk; + const char* fr = getenv("CPUPROFILE_FREQUENCY"); + if (fr != NULL && (sscanf(fr, "%u%c", &frequency_, &junk) == 1) && + (frequency_ > 0)) { + // Limit to kMaxFrequency + frequency_ = (frequency_ > kMaxFrequency) ? kMaxFrequency : frequency_; + } else { + frequency_ = kDefaultFrequency; + } + + if (!allowed_) { + return; + } + +#if HAVE_LINUX_SIGEV_THREAD_ID + // Do this early because we might be overriding signal number. + + const char *per_thread = getenv("CPUPROFILE_PER_THREAD_TIMERS"); + const char *signal_number = getenv("CPUPROFILE_TIMER_SIGNAL"); + + if (per_thread || signal_number) { + if (timer_create && pthread_once) { + CreateThreadTimerKey(&thread_timer_key); + per_thread_timer_enabled_ = true; + // Override signal number if requested. + if (signal_number) { + signal_number_ = strtol(signal_number, NULL, 0); + } + } else { + RAW_LOG(INFO, + "Ignoring CPUPROFILE_PER_THREAD_TIMERS and\n" + " CPUPROFILE_TIMER_SIGNAL due to lack of timer_create().\n" + " Preload or link to librt.so for this to work"); + } + } +#endif + + // If something else is using the signal handler, + // assume it has priority over us and stop. + if (!IsSignalHandlerAvailable()) { + RAW_LOG(INFO, "Disabling profiler because signal %d handler is already in use.", + signal_number_); + allowed_ = false; + return; + } + + // Install the signal handler. + struct sigaction sa; + sa.sa_sigaction = SignalHandler; + sa.sa_flags = SA_RESTART | SA_SIGINFO; + sigemptyset(&sa.sa_mask); + RAW_CHECK(sigaction(signal_number_, &sa, NULL) == 0, "sigprof (enable)"); +} + +ProfileHandler::~ProfileHandler() { + Reset(); +#ifdef HAVE_LINUX_SIGEV_THREAD_ID + if (per_thread_timer_enabled_) { + perftools_pthread_key_delete(thread_timer_key); + } +#endif +} + +void ProfileHandler::RegisterThread() { + SpinLockHolder cl(&control_lock_); + + if (!allowed_) { + return; + } + + // Record the thread identifier and start the timer if profiling is on. + ScopedSignalBlocker block(signal_number_); + SpinLockHolder sl(&signal_lock_); +#if HAVE_LINUX_SIGEV_THREAD_ID + if (per_thread_timer_enabled_) { + StartLinuxThreadTimer(timer_type_, signal_number_, frequency_, + thread_timer_key); + return; + } +#endif + UpdateTimer(callback_count_ > 0); +} + +ProfileHandlerToken* ProfileHandler::RegisterCallback( + ProfileHandlerCallback callback, void* callback_arg) { + + ProfileHandlerToken* token = new ProfileHandlerToken(callback, callback_arg); + + SpinLockHolder cl(&control_lock_); + { + ScopedSignalBlocker block(signal_number_); + SpinLockHolder sl(&signal_lock_); + callbacks_.push_back(token); + ++callback_count_; + UpdateTimer(true); + } + return token; +} + +void ProfileHandler::UnregisterCallback(ProfileHandlerToken* token) { + SpinLockHolder cl(&control_lock_); + for (CallbackIterator it = callbacks_.begin(); it != callbacks_.end(); + ++it) { + if ((*it) == token) { + RAW_CHECK(callback_count_ > 0, "Invalid callback count"); + { + ScopedSignalBlocker block(signal_number_); + SpinLockHolder sl(&signal_lock_); + delete *it; + callbacks_.erase(it); + --callback_count_; + if (callback_count_ == 0) + UpdateTimer(false); + } + return; + } + } + // Unknown token. + RAW_LOG(FATAL, "Invalid token"); +} + +void ProfileHandler::Reset() { + SpinLockHolder cl(&control_lock_); + { + ScopedSignalBlocker block(signal_number_); + SpinLockHolder sl(&signal_lock_); + CallbackIterator it = callbacks_.begin(); + while (it != callbacks_.end()) { + CallbackIterator tmp = it; + ++it; + delete *tmp; + callbacks_.erase(tmp); + } + callback_count_ = 0; + UpdateTimer(false); + } +} + +void ProfileHandler::GetState(ProfileHandlerState* state) { + SpinLockHolder cl(&control_lock_); + { + ScopedSignalBlocker block(signal_number_); + SpinLockHolder sl(&signal_lock_); // Protects interrupts_. + state->interrupts = interrupts_; + } + state->frequency = frequency_; + state->callback_count = callback_count_; + state->allowed = allowed_; +} + +void ProfileHandler::UpdateTimer(bool enable) { + if (per_thread_timer_enabled_) { + // Ignore any attempts to disable it because that's not supported, and it's + // always enabled so enabling is always a NOP. + return; + } + + if (enable == timer_running_) { + return; + } + timer_running_ = enable; + + struct itimerval timer; + static const int kMillion = 1000000; + int interval_usec = enable ? kMillion / frequency_ : 0; + timer.it_interval.tv_sec = interval_usec / kMillion; + timer.it_interval.tv_usec = interval_usec % kMillion; + timer.it_value = timer.it_interval; + setitimer(timer_type_, &timer, 0); +} + +bool ProfileHandler::IsSignalHandlerAvailable() { + struct sigaction sa; + RAW_CHECK(sigaction(signal_number_, NULL, &sa) == 0, "is-signal-handler avail"); + + // We only take over the handler if the current one is unset. + // It must be SIG_IGN or SIG_DFL, not some other function. + // SIG_IGN must be allowed because when profiling is allowed but + // not actively in use, this code keeps the handler set to SIG_IGN. + // That setting will be inherited across fork+exec. In order for + // any child to be able to use profiling, SIG_IGN must be treated + // as available. + return sa.sa_handler == SIG_IGN || sa.sa_handler == SIG_DFL; +} + +void ProfileHandler::SignalHandler(int sig, siginfo_t* sinfo, void* ucontext) { + int saved_errno = errno; + // At this moment, instance_ must be initialized because the handler is + // enabled in RegisterThread or RegisterCallback only after + // ProfileHandler::Instance runs. + ProfileHandler* instance = ANNOTATE_UNPROTECTED_READ(instance_); + RAW_CHECK(instance != NULL, "ProfileHandler is not initialized"); + { + SpinLockHolder sl(&instance->signal_lock_); + ++instance->interrupts_; + for (CallbackIterator it = instance->callbacks_.begin(); + it != instance->callbacks_.end(); + ++it) { + (*it)->callback(sig, sinfo, ucontext, (*it)->callback_arg); + } + } + errno = saved_errno; +} + +// This module initializer registers the main thread, so it must be +// executed in the context of the main thread. +REGISTER_MODULE_INITIALIZER(profile_main, ProfileHandlerRegisterThread()); + +void ProfileHandlerRegisterThread() { + ProfileHandler::Instance()->RegisterThread(); +} + +ProfileHandlerToken* ProfileHandlerRegisterCallback( + ProfileHandlerCallback callback, void* callback_arg) { + return ProfileHandler::Instance()->RegisterCallback(callback, callback_arg); +} + +void ProfileHandlerUnregisterCallback(ProfileHandlerToken* token) { + ProfileHandler::Instance()->UnregisterCallback(token); +} + +void ProfileHandlerReset() { + return ProfileHandler::Instance()->Reset(); +} + +void ProfileHandlerGetState(ProfileHandlerState* state) { + ProfileHandler::Instance()->GetState(state); +} + +#else // OS_CYGWIN + +// ITIMER_PROF doesn't work under cygwin. ITIMER_REAL is available, but doesn't +// work as well for profiling, and also interferes with alarm(). Because of +// these issues, unless a specific need is identified, profiler support is +// disabled under Cygwin. +void ProfileHandlerRegisterThread() { +} + +ProfileHandlerToken* ProfileHandlerRegisterCallback( + ProfileHandlerCallback callback, void* callback_arg) { + return NULL; +} + +void ProfileHandlerUnregisterCallback(ProfileHandlerToken* token) { +} + +void ProfileHandlerReset() { +} + +void ProfileHandlerGetState(ProfileHandlerState* state) { +} + +#endif // OS_CYGWIN diff --git a/src/third_party/gperftools-2.5/src/profile-handler.h b/src/third_party/gperftools-2.5/src/profile-handler.h new file mode 100644 index 00000000000..3eae169d55a --- /dev/null +++ b/src/third_party/gperftools-2.5/src/profile-handler.h @@ -0,0 +1,142 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +/* Copyright (c) 2009, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: Nabeel Mian + * + * This module manages the cpu profile timers and the associated interrupt + * handler. When enabled, all threads in the program are profiled. + * + * Any component interested in receiving a profile timer interrupt can do so by + * registering a callback. All registered callbacks must be async-signal-safe. + * + * Note: This module requires the sole ownership of the configured timer and + * signal. The timer defaults to ITIMER_PROF, can be changed to ITIMER_REAL by + * the environment variable CPUPROFILE_REALTIME, or is changed to a POSIX timer + * with CPUPROFILE_PER_THREAD_TIMERS. The signal defaults to SIGPROF/SIGALRM to + * match the choice of timer and can be set to an arbitrary value using + * CPUPROFILE_TIMER_SIGNAL with CPUPROFILE_PER_THREAD_TIMERS. + */ + +#ifndef BASE_PROFILE_HANDLER_H_ +#define BASE_PROFILE_HANDLER_H_ + +#include "config.h" +#include <signal.h> +#ifdef COMPILER_MSVC +#include "conflict-signal.h" +#endif +#include "base/basictypes.h" + +/* Forward declaration. */ +struct ProfileHandlerToken; + +/* + * Callback function to be used with ProfilefHandlerRegisterCallback. This + * function will be called in the context of SIGPROF signal handler and must + * be async-signal-safe. The first three arguments are the values provided by + * the SIGPROF signal handler. We use void* to avoid using ucontext_t on + * non-POSIX systems. + * + * Requirements: + * - Callback must be async-signal-safe. + * - None of the functions in ProfileHandler are async-signal-safe. Therefore, + * callback function *must* not call any of the ProfileHandler functions. + * - Callback is not required to be re-entrant. At most one instance of + * callback can run at a time. + * + * Notes: + * - The SIGPROF signal handler saves and restores errno, so the callback + * doesn't need to. + * - Callback code *must* not acquire lock(s) to serialize access to data shared + * with the code outside the signal handler (callback must be + * async-signal-safe). If such a serialization is needed, follow the model + * used by profiler.cc: + * + * When code other than the signal handler modifies the shared data it must: + * - Acquire lock. + * - Unregister the callback with the ProfileHandler. + * - Modify shared data. + * - Re-register the callback. + * - Release lock. + * and the callback code gets a lockless, read-write access to the data. + */ +typedef void (*ProfileHandlerCallback)(int sig, siginfo_t* sig_info, + void* ucontext, void* callback_arg); + +/* + * Registers a new thread with profile handler and should be called only once + * per thread. The main thread is registered at program startup. This routine + * is called by the Thread module in google3/thread whenever a new thread is + * created. This function is not async-signal-safe. + */ +void ProfileHandlerRegisterThread(); + +/* + * Registers a callback routine. This callback function will be called in the + * context of SIGPROF handler, so must be async-signal-safe. The returned token + * is to be used when unregistering this callback via + * ProfileHandlerUnregisterCallback. Registering the first callback enables + * the SIGPROF signal handler. Caller must not free the returned token. This + * function is not async-signal-safe. + */ +ProfileHandlerToken* ProfileHandlerRegisterCallback( + ProfileHandlerCallback callback, void* callback_arg); + +/* + * Unregisters a previously registered callback. Expects the token returned + * by the corresponding ProfileHandlerRegisterCallback and asserts that the + * passed token is valid. Unregistering the last callback disables the SIGPROF + * signal handler. It waits for the currently running callback to + * complete before returning. This function is not async-signal-safe. + */ +void ProfileHandlerUnregisterCallback(ProfileHandlerToken* token); + +/* + * FOR TESTING ONLY + * Unregisters all the callbacks, stops the timers (if shared) and disables the + * SIGPROF handler. All the threads, including the main thread, need to be + * re-registered after this call. This function is not async-signal-safe. + */ +void ProfileHandlerReset(); + +/* + * Stores profile handler's current state. This function is not + * async-signal-safe. + */ +struct ProfileHandlerState { + int32 frequency; /* Profiling frequency */ + int32 callback_count; /* Number of callbacks registered */ + int64 interrupts; /* Number of interrupts received */ + bool allowed; /* Profiling is allowed */ +}; +void ProfileHandlerGetState(struct ProfileHandlerState* state); + +#endif /* BASE_PROFILE_HANDLER_H_ */ diff --git a/src/third_party/gperftools-2.5/src/profiledata.cc b/src/third_party/gperftools-2.5/src/profiledata.cc new file mode 100644 index 00000000000..8b05d3aa45c --- /dev/null +++ b/src/third_party/gperftools-2.5/src/profiledata.cc @@ -0,0 +1,332 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2007, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// --- +// Author: Sanjay Ghemawat +// Chris Demetriou (refactoring) +// +// Collect profiling data. + +#include <config.h> +#include <assert.h> +#include <stdio.h> +#include <stdlib.h> +#include <errno.h> +#ifdef HAVE_UNISTD_H +#include <unistd.h> +#endif +#include <sys/time.h> +#include <string.h> +#include <fcntl.h> + +#include "profiledata.h" + +#include "base/logging.h" +#include "base/sysinfo.h" + +// All of these are initialized in profiledata.h. +const int ProfileData::kMaxStackDepth; +const int ProfileData::kAssociativity; +const int ProfileData::kBuckets; +const int ProfileData::kBufferLength; + +ProfileData::Options::Options() + : frequency_(1) { +} + +// This function is safe to call from asynchronous signals (but is not +// re-entrant). However, that's not part of its public interface. +void ProfileData::Evict(const Entry& entry) { + const int d = entry.depth; + const int nslots = d + 2; // Number of slots needed in eviction buffer + if (num_evicted_ + nslots > kBufferLength) { + FlushEvicted(); + assert(num_evicted_ == 0); + assert(nslots <= kBufferLength); + } + evict_[num_evicted_++] = entry.count; + evict_[num_evicted_++] = d; + memcpy(&evict_[num_evicted_], entry.stack, d * sizeof(Slot)); + num_evicted_ += d; +} + +ProfileData::ProfileData() + : hash_(0), + evict_(0), + num_evicted_(0), + out_(-1), + count_(0), + evictions_(0), + total_bytes_(0), + fname_(0), + start_time_(0) { +} + +bool ProfileData::Start(const char* fname, + const ProfileData::Options& options) { + if (enabled()) { + return false; + } + + // Open output file and initialize various data structures + int fd = open(fname, O_CREAT | O_WRONLY | O_TRUNC, 0666); + if (fd < 0) { + // Can't open outfile for write + return false; + } + + start_time_ = time(NULL); + fname_ = strdup(fname); + + // Reset counters + num_evicted_ = 0; + count_ = 0; + evictions_ = 0; + total_bytes_ = 0; + + hash_ = new Bucket[kBuckets]; + evict_ = new Slot[kBufferLength]; + memset(hash_, 0, sizeof(hash_[0]) * kBuckets); + + // Record special entries + evict_[num_evicted_++] = 0; // count for header + evict_[num_evicted_++] = 3; // depth for header + evict_[num_evicted_++] = 0; // Version number + CHECK_NE(0, options.frequency()); + int period = 1000000 / options.frequency(); + evict_[num_evicted_++] = period; // Period (microseconds) + evict_[num_evicted_++] = 0; // Padding + + out_ = fd; + + return true; +} + +ProfileData::~ProfileData() { + Stop(); +} + +// Dump /proc/maps data to fd. Copied from heap-profile-table.cc. +#define NO_INTR(fn) do {} while ((fn) < 0 && errno == EINTR) + +static void FDWrite(int fd, const char* buf, size_t len) { + while (len > 0) { + ssize_t r; + NO_INTR(r = write(fd, buf, len)); + RAW_CHECK(r >= 0, "write failed"); + buf += r; + len -= r; + } +} + +static void DumpProcSelfMaps(int fd) { + ProcMapsIterator::Buffer iterbuf; + ProcMapsIterator it(0, &iterbuf); // 0 means "current pid" + + uint64 start, end, offset; + int64 inode; + char *flags, *filename; + ProcMapsIterator::Buffer linebuf; + while (it.Next(&start, &end, &flags, &offset, &inode, &filename)) { + int written = it.FormatLine(linebuf.buf_, sizeof(linebuf.buf_), + start, end, flags, offset, inode, filename, + 0); + FDWrite(fd, linebuf.buf_, written); + } +} + +void ProfileData::Stop() { + if (!enabled()) { + return; + } + + // Move data from hash table to eviction buffer + for (int b = 0; b < kBuckets; b++) { + Bucket* bucket = &hash_[b]; + for (int a = 0; a < kAssociativity; a++) { + if (bucket->entry[a].count > 0) { + Evict(bucket->entry[a]); + } + } + } + + if (num_evicted_ + 3 > kBufferLength) { + // Ensure there is enough room for end of data marker + FlushEvicted(); + } + + // Write end of data marker + evict_[num_evicted_++] = 0; // count + evict_[num_evicted_++] = 1; // depth + evict_[num_evicted_++] = 0; // end of data marker + FlushEvicted(); + + // Dump "/proc/self/maps" so we get list of mapped shared libraries + DumpProcSelfMaps(out_); + + Reset(); + fprintf(stderr, "PROFILE: interrupts/evictions/bytes = %d/%d/%" PRIuS "\n", + count_, evictions_, total_bytes_); +} + +void ProfileData::Reset() { + if (!enabled()) { + return; + } + + // Don't reset count_, evictions_, or total_bytes_ here. They're used + // by Stop to print information about the profile after reset, and are + // cleared by Start when starting a new profile. + close(out_); + delete[] hash_; + hash_ = 0; + delete[] evict_; + evict_ = 0; + num_evicted_ = 0; + free(fname_); + fname_ = 0; + start_time_ = 0; + + out_ = -1; +} + +// This function is safe to call from asynchronous signals (but is not +// re-entrant). However, that's not part of its public interface. +void ProfileData::GetCurrentState(State* state) const { + if (enabled()) { + state->enabled = true; + state->start_time = start_time_; + state->samples_gathered = count_; + int buf_size = sizeof(state->profile_name); + strncpy(state->profile_name, fname_, buf_size); + state->profile_name[buf_size-1] = '\0'; + } else { + state->enabled = false; + state->start_time = 0; + state->samples_gathered = 0; + state->profile_name[0] = '\0'; + } +} + +// This function is safe to call from asynchronous signals (but is not +// re-entrant). However, that's not part of its public interface. +void ProfileData::FlushTable() { + if (!enabled()) { + return; + } + + // Move data from hash table to eviction buffer + for (int b = 0; b < kBuckets; b++) { + Bucket* bucket = &hash_[b]; + for (int a = 0; a < kAssociativity; a++) { + if (bucket->entry[a].count > 0) { + Evict(bucket->entry[a]); + bucket->entry[a].depth = 0; + bucket->entry[a].count = 0; + } + } + } + + // Write out all pending data + FlushEvicted(); +} + +void ProfileData::Add(int depth, const void* const* stack) { + if (!enabled()) { + return; + } + + if (depth > kMaxStackDepth) depth = kMaxStackDepth; + RAW_CHECK(depth > 0, "ProfileData::Add depth <= 0"); + + // Make hash-value + Slot h = 0; + for (int i = 0; i < depth; i++) { + Slot slot = reinterpret_cast<Slot>(stack[i]); + h = (h << 8) | (h >> (8*(sizeof(h)-1))); + h += (slot * 31) + (slot * 7) + (slot * 3); + } + + count_++; + + // See if table already has an entry for this trace + bool done = false; + Bucket* bucket = &hash_[h % kBuckets]; + for (int a = 0; a < kAssociativity; a++) { + Entry* e = &bucket->entry[a]; + if (e->depth == depth) { + bool match = true; + for (int i = 0; i < depth; i++) { + if (e->stack[i] != reinterpret_cast<Slot>(stack[i])) { + match = false; + break; + } + } + if (match) { + e->count++; + done = true; + break; + } + } + } + + if (!done) { + // Evict entry with smallest count + Entry* e = &bucket->entry[0]; + for (int a = 1; a < kAssociativity; a++) { + if (bucket->entry[a].count < e->count) { + e = &bucket->entry[a]; + } + } + if (e->count > 0) { + evictions_++; + Evict(*e); + } + + // Use the newly evicted entry + e->depth = depth; + e->count = 1; + for (int i = 0; i < depth; i++) { + e->stack[i] = reinterpret_cast<Slot>(stack[i]); + } + } +} + +// This function is safe to call from asynchronous signals (but is not +// re-entrant). However, that's not part of its public interface. +void ProfileData::FlushEvicted() { + if (num_evicted_ > 0) { + const char* buf = reinterpret_cast<char*>(evict_); + size_t bytes = sizeof(evict_[0]) * num_evicted_; + total_bytes_ += bytes; + FDWrite(out_, buf, bytes); + } + num_evicted_ = 0; +} diff --git a/src/third_party/gperftools-2.5/src/profiledata.h b/src/third_party/gperftools-2.5/src/profiledata.h new file mode 100644 index 00000000000..44033f02d76 --- /dev/null +++ b/src/third_party/gperftools-2.5/src/profiledata.h @@ -0,0 +1,184 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2007, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// --- +// Author: Sanjay Ghemawat +// Chris Demetriou (refactoring) +// +// Collect profiling data. +// +// The profile data file format is documented in +// doc/cpuprofile-fileformat.html + + +#ifndef BASE_PROFILEDATA_H_ +#define BASE_PROFILEDATA_H_ + +#include <config.h> +#include <time.h> // for time_t +#include <stdint.h> +#include "base/basictypes.h" + +// A class that accumulates profile samples and writes them to a file. +// +// Each sample contains a stack trace and a count. Memory usage is +// reduced by combining profile samples that have the same stack trace +// by adding up the associated counts. +// +// Profile data is accumulated in a bounded amount of memory, and will +// flushed to a file as necessary to stay within the memory limit. +// +// Use of this class assumes external synchronization. The exact +// requirements of that synchronization are that: +// +// - 'Add' may be called from asynchronous signals, but is not +// re-entrant. +// +// - None of 'Start', 'Stop', 'Reset', 'Flush', and 'Add' may be +// called at the same time. +// +// - 'Start', 'Stop', or 'Reset' should not be called while 'Enabled' +// or 'GetCurrent' are running, and vice versa. +// +// A profiler which uses asyncronous signals to add samples will +// typically use two locks to protect this data structure: +// +// - A SpinLock which is held over all calls except for the 'Add' +// call made from the signal handler. +// +// - A SpinLock which is held over calls to 'Start', 'Stop', 'Reset', +// 'Flush', and 'Add'. (This SpinLock should be acquired after +// the first SpinLock in all cases where both are needed.) +class ProfileData { + public: + struct State { + bool enabled; // Is profiling currently enabled? + time_t start_time; // If enabled, when was profiling started? + char profile_name[1024]; // Name of file being written, or '\0' + int samples_gathered; // Number of samples gathered to far (or 0) + }; + + class Options { + public: + Options(); + + // Get and set the sample frequency. + int frequency() const { + return frequency_; + } + void set_frequency(int frequency) { + frequency_ = frequency; + } + + private: + int frequency_; // Sample frequency. + }; + + static const int kMaxStackDepth = 64; // Max stack depth stored in profile + + ProfileData(); + ~ProfileData(); + + // If data collection is not already enabled start to collect data + // into fname. Parameters related to this profiling run are specified + // by 'options'. + // + // Returns true if data collection could be started, otherwise (if an + // error occurred or if data collection was already enabled) returns + // false. + bool Start(const char *fname, const Options& options); + + // If data collection is enabled, stop data collection and write the + // data to disk. + void Stop(); + + // Stop data collection without writing anything else to disk, and + // discard any collected data. + void Reset(); + + // If data collection is enabled, record a sample with 'depth' + // entries from 'stack'. (depth must be > 0.) At most + // kMaxStackDepth stack entries will be recorded, starting with + // stack[0]. + // + // This function is safe to call from asynchronous signals (but is + // not re-entrant). + void Add(int depth, const void* const* stack); + + // If data collection is enabled, write the data to disk (and leave + // the collector enabled). + void FlushTable(); + + // Is data collection currently enabled? + bool enabled() const { return out_ >= 0; } + + // Get the current state of the data collector. + void GetCurrentState(State* state) const; + + private: + static const int kAssociativity = 4; // For hashtable + static const int kBuckets = 1 << 10; // For hashtable + static const int kBufferLength = 1 << 18; // For eviction buffer + + // Type of slots: each slot can be either a count, or a PC value + typedef uintptr_t Slot; + + // Hash-table/eviction-buffer entry (a.k.a. a sample) + struct Entry { + Slot count; // Number of hits + Slot depth; // Stack depth + Slot stack[kMaxStackDepth]; // Stack contents + }; + + // Hash table bucket + struct Bucket { + Entry entry[kAssociativity]; + }; + + Bucket* hash_; // hash table + Slot* evict_; // evicted entries + int num_evicted_; // how many evicted entries? + int out_; // fd for output file. + int count_; // How many samples recorded + int evictions_; // How many evictions + size_t total_bytes_; // How much output + char* fname_; // Profile file name + time_t start_time_; // Start time, or 0 + + // Move 'entry' to the eviction buffer. + void Evict(const Entry& entry); + + // Write contents of eviction buffer to disk. + void FlushEvicted(); + + DISALLOW_COPY_AND_ASSIGN(ProfileData); +}; + +#endif // BASE_PROFILEDATA_H_ diff --git a/src/third_party/gperftools-2.5/src/profiler.cc b/src/third_party/gperftools-2.5/src/profiler.cc new file mode 100644 index 00000000000..f4f59900088 --- /dev/null +++ b/src/third_party/gperftools-2.5/src/profiler.cc @@ -0,0 +1,431 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Sanjay Ghemawat +// Chris Demetriou (refactoring) +// +// Profile current program by sampling stack-trace every so often + +#include "config.h" +#include "getpc.h" // should be first to get the _GNU_SOURCE dfn +#include <signal.h> +#include <assert.h> +#include <stdio.h> +#include <errno.h> +#include <string.h> +#ifdef HAVE_UNISTD_H +#include <unistd.h> // for getpid() +#endif +#if defined(HAVE_SYS_UCONTEXT_H) +#include <sys/ucontext.h> +#elif defined(HAVE_UCONTEXT_H) +#include <ucontext.h> +#elif defined(HAVE_CYGWIN_SIGNAL_H) +#include <cygwin/signal.h> +typedef ucontext ucontext_t; +#else +typedef int ucontext_t; // just to quiet the compiler, mostly +#endif +#include <sys/time.h> +#include <string> +#include <gperftools/profiler.h> +#include <gperftools/stacktrace.h> +#include "base/commandlineflags.h" +#include "base/logging.h" +#include "base/googleinit.h" +#include "base/spinlock.h" +#include "base/sysinfo.h" /* for GetUniquePathFromEnv, etc */ +#include "profiledata.h" +#include "profile-handler.h" +#ifdef HAVE_CONFLICT_SIGNAL_H +#include "conflict-signal.h" /* used on msvc machines */ +#endif + +using std::string; + +DEFINE_bool(cpu_profiler_unittest, + EnvToBool("PERFTOOLS_UNITTEST", true), + "Determines whether or not we are running under the \ + control of a unit test. This allows us to include or \ + exclude certain behaviours."); + +// Collects up all profile data. This is a singleton, which is +// initialized by a constructor at startup. If no cpu profiler +// signal is specified then the profiler lifecycle is either +// manaully controlled via the API or attached to the scope of +// the singleton (program scope). Otherwise the cpu toggle is +// used to allow for user selectable control via signal generation. +// This is very useful for profiling a daemon process without +// having to start and stop the daemon or having to modify the +// source code to use the cpu profiler API. +class CpuProfiler { + public: + CpuProfiler(); + ~CpuProfiler(); + + // Start profiler to write profile info into fname + bool Start(const char* fname, const ProfilerOptions* options); + + // Stop profiling and write the data to disk. + void Stop(); + + // Write the data to disk (and continue profiling). + void FlushTable(); + + bool Enabled(); + + void GetCurrentState(ProfilerState* state); + + static CpuProfiler instance_; + + private: + // This lock implements the locking requirements described in the ProfileData + // documentation, specifically: + // + // lock_ is held all over all collector_ method calls except for the 'Add' + // call made from the signal handler, to protect against concurrent use of + // collector_'s control routines. Code other than signal handler must + // unregister the signal handler before calling any collector_ method. + // 'Add' method in the collector is protected by a guarantee from + // ProfileHandle that only one instance of prof_handler can run at a time. + SpinLock lock_; + ProfileData collector_; + + // Filter function and its argument, if any. (NULL means include all + // samples). Set at start, read-only while running. Written while holding + // lock_, read and executed in the context of SIGPROF interrupt. + int (*filter_)(void*); + void* filter_arg_; + + // Opaque token returned by the profile handler. To be used when calling + // ProfileHandlerUnregisterCallback. + ProfileHandlerToken* prof_handler_token_; + + // Sets up a callback to receive SIGPROF interrupt. + void EnableHandler(); + + // Disables receiving SIGPROF interrupt. + void DisableHandler(); + + // Signal handler that records the interrupted pc in the profile data. + static void prof_handler(int sig, siginfo_t*, void* signal_ucontext, + void* cpu_profiler); +}; + +// Signal handler that is registered when a user selectable signal +// number is defined in the environment variable CPUPROFILESIGNAL. +static void CpuProfilerSwitch(int signal_number) +{ + bool static started = false; + static unsigned profile_count = 0; + static char base_profile_name[1024] = "\0"; + + if (base_profile_name[0] == '\0') { + if (!GetUniquePathFromEnv("CPUPROFILE", base_profile_name)) { + RAW_LOG(FATAL,"Cpu profiler switch is registered but no CPUPROFILE is defined"); + return; + } + } + if (!started) + { + char full_profile_name[1024]; + + snprintf(full_profile_name, sizeof(full_profile_name), "%s.%u", + base_profile_name, profile_count++); + + if(!ProfilerStart(full_profile_name)) + { + RAW_LOG(FATAL, "Can't turn on cpu profiling for '%s': %s\n", + full_profile_name, strerror(errno)); + } + } + else + { + ProfilerStop(); + } + started = !started; +} + +// Profile data structure singleton: Constructor will check to see if +// profiling should be enabled. Destructor will write profile data +// out to disk. +CpuProfiler CpuProfiler::instance_; + +// Initialize profiling: activated if getenv("CPUPROFILE") exists. +CpuProfiler::CpuProfiler() + : prof_handler_token_(NULL) { + // TODO(cgd) Move this code *out* of the CpuProfile constructor into a + // separate object responsible for initialization. With ProfileHandler there + // is no need to limit the number of profilers. + if (getenv("CPUPROFILE") == NULL) { + if (!FLAGS_cpu_profiler_unittest) { + RAW_LOG(WARNING, "CPU profiler linked but no valid CPUPROFILE environment variable found\n"); + } + return; + } + + // We don't enable profiling if setuid -- it's a security risk +#ifdef HAVE_GETEUID + if (getuid() != geteuid()) { + if (!FLAGS_cpu_profiler_unittest) { + RAW_LOG(WARNING, "Cannot perform CPU profiling when running with setuid\n"); + } + return; + } +#endif + + char *signal_number_str = getenv("CPUPROFILESIGNAL"); + if (signal_number_str != NULL) { + long int signal_number = strtol(signal_number_str, NULL, 10); + if (signal_number >= 1 && signal_number <= 64) { + intptr_t old_signal_handler = reinterpret_cast<intptr_t>(signal(signal_number, CpuProfilerSwitch)); + if (old_signal_handler == 0) { + RAW_LOG(INFO,"Using signal %d as cpu profiling switch", signal_number); + } else { + RAW_LOG(FATAL, "Signal %d already in use\n", signal_number); + } + } else { + RAW_LOG(FATAL, "Signal number %s is invalid\n", signal_number_str); + } + } else { + char fname[PATH_MAX]; + if (!GetUniquePathFromEnv("CPUPROFILE", fname)) { + if (!FLAGS_cpu_profiler_unittest) { + RAW_LOG(WARNING, "CPU profiler linked but no valid CPUPROFILE environment variable found\n"); + } + return; + } + + if (!Start(fname, NULL)) { + RAW_LOG(FATAL, "Can't turn on cpu profiling for '%s': %s\n", + fname, strerror(errno)); + } + } +} + +bool CpuProfiler::Start(const char* fname, const ProfilerOptions* options) { + SpinLockHolder cl(&lock_); + + if (collector_.enabled()) { + return false; + } + + ProfileHandlerState prof_handler_state; + ProfileHandlerGetState(&prof_handler_state); + + ProfileData::Options collector_options; + collector_options.set_frequency(prof_handler_state.frequency); + if (!collector_.Start(fname, collector_options)) { + return false; + } + + filter_ = NULL; + if (options != NULL && options->filter_in_thread != NULL) { + filter_ = options->filter_in_thread; + filter_arg_ = options->filter_in_thread_arg; + } + + // Setup handler for SIGPROF interrupts + EnableHandler(); + + return true; +} + +CpuProfiler::~CpuProfiler() { + Stop(); +} + +// Stop profiling and write out any collected profile data +void CpuProfiler::Stop() { + SpinLockHolder cl(&lock_); + + if (!collector_.enabled()) { + return; + } + + // Unregister prof_handler to stop receiving SIGPROF interrupts before + // stopping the collector. + DisableHandler(); + + // DisableHandler waits for the currently running callback to complete and + // guarantees no future invocations. It is safe to stop the collector. + collector_.Stop(); +} + +void CpuProfiler::FlushTable() { + SpinLockHolder cl(&lock_); + + if (!collector_.enabled()) { + return; + } + + // Unregister prof_handler to stop receiving SIGPROF interrupts before + // flushing the profile data. + DisableHandler(); + + // DisableHandler waits for the currently running callback to complete and + // guarantees no future invocations. It is safe to flush the profile data. + collector_.FlushTable(); + + EnableHandler(); +} + +bool CpuProfiler::Enabled() { + SpinLockHolder cl(&lock_); + return collector_.enabled(); +} + +void CpuProfiler::GetCurrentState(ProfilerState* state) { + ProfileData::State collector_state; + { + SpinLockHolder cl(&lock_); + collector_.GetCurrentState(&collector_state); + } + + state->enabled = collector_state.enabled; + state->start_time = static_cast<time_t>(collector_state.start_time); + state->samples_gathered = collector_state.samples_gathered; + int buf_size = sizeof(state->profile_name); + strncpy(state->profile_name, collector_state.profile_name, buf_size); + state->profile_name[buf_size-1] = '\0'; +} + +void CpuProfiler::EnableHandler() { + RAW_CHECK(prof_handler_token_ == NULL, "SIGPROF handler already registered"); + prof_handler_token_ = ProfileHandlerRegisterCallback(prof_handler, this); + RAW_CHECK(prof_handler_token_ != NULL, "Failed to set up SIGPROF handler"); +} + +void CpuProfiler::DisableHandler() { + RAW_CHECK(prof_handler_token_ != NULL, "SIGPROF handler is not registered"); + ProfileHandlerUnregisterCallback(prof_handler_token_); + prof_handler_token_ = NULL; +} + +// Signal handler that records the pc in the profile-data structure. We do no +// synchronization here. profile-handler.cc guarantees that at most one +// instance of prof_handler() will run at a time. All other routines that +// access the data touched by prof_handler() disable this signal handler before +// accessing the data and therefore cannot execute concurrently with +// prof_handler(). +void CpuProfiler::prof_handler(int sig, siginfo_t*, void* signal_ucontext, + void* cpu_profiler) { + CpuProfiler* instance = static_cast<CpuProfiler*>(cpu_profiler); + + if (instance->filter_ == NULL || + (*instance->filter_)(instance->filter_arg_)) { + void* stack[ProfileData::kMaxStackDepth]; + + // Under frame-pointer-based unwinding at least on x86, the + // top-most active routine doesn't show up as a normal frame, but + // as the "pc" value in the signal handler context. + stack[0] = GetPC(*reinterpret_cast<ucontext_t*>(signal_ucontext)); + + // We skip the top three stack trace entries (this function, + // SignalHandler::SignalHandler and one signal handler frame) + // since they are artifacts of profiling and should not be + // measured. Other profiling related frames may be removed by + // "pprof" at analysis time. Instead of skipping the top frames, + // we could skip nothing, but that would increase the profile size + // unnecessarily. + int depth = GetStackTraceWithContext(stack + 1, arraysize(stack) - 1, + 3, signal_ucontext); + + void **used_stack; + if (depth > 0 && stack[1] == stack[0]) { + // in case of non-frame-pointer-based unwinding we will get + // duplicate of PC in stack[1], which we don't want + used_stack = stack + 1; + } else { + used_stack = stack; + depth++; // To account for pc value in stack[0]; + } + + instance->collector_.Add(depth, used_stack); + } +} + +#if !(defined(__CYGWIN__) || defined(__CYGWIN32__)) + +extern "C" PERFTOOLS_DLL_DECL void ProfilerRegisterThread() { + ProfileHandlerRegisterThread(); +} + +extern "C" PERFTOOLS_DLL_DECL void ProfilerFlush() { + CpuProfiler::instance_.FlushTable(); +} + +extern "C" PERFTOOLS_DLL_DECL int ProfilingIsEnabledForAllThreads() { + return CpuProfiler::instance_.Enabled(); +} + +extern "C" PERFTOOLS_DLL_DECL int ProfilerStart(const char* fname) { + return CpuProfiler::instance_.Start(fname, NULL); +} + +extern "C" PERFTOOLS_DLL_DECL int ProfilerStartWithOptions( + const char *fname, const ProfilerOptions *options) { + return CpuProfiler::instance_.Start(fname, options); +} + +extern "C" PERFTOOLS_DLL_DECL void ProfilerStop() { + CpuProfiler::instance_.Stop(); +} + +extern "C" PERFTOOLS_DLL_DECL void ProfilerGetCurrentState( + ProfilerState* state) { + CpuProfiler::instance_.GetCurrentState(state); +} + +#else // OS_CYGWIN + +// ITIMER_PROF doesn't work under cygwin. ITIMER_REAL is available, but doesn't +// work as well for profiling, and also interferes with alarm(). Because of +// these issues, unless a specific need is identified, profiler support is +// disabled under Cygwin. +extern "C" void ProfilerRegisterThread() { } +extern "C" void ProfilerFlush() { } +extern "C" int ProfilingIsEnabledForAllThreads() { return 0; } +extern "C" int ProfilerStart(const char* fname) { return 0; } +extern "C" int ProfilerStartWithOptions(const char *fname, + const ProfilerOptions *options) { + return 0; +} +extern "C" void ProfilerStop() { } +extern "C" void ProfilerGetCurrentState(ProfilerState* state) { + memset(state, 0, sizeof(*state)); +} + +#endif // OS_CYGWIN + +// DEPRECATED routines +extern "C" PERFTOOLS_DLL_DECL void ProfilerEnable() { } +extern "C" PERFTOOLS_DLL_DECL void ProfilerDisable() { } diff --git a/src/third_party/gperftools-2.5/src/raw_printer.cc b/src/third_party/gperftools-2.5/src/raw_printer.cc new file mode 100644 index 00000000000..3cf028eeae0 --- /dev/null +++ b/src/third_party/gperftools-2.5/src/raw_printer.cc @@ -0,0 +1,72 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: sanjay@google.com (Sanjay Ghemawat) + +#include <config.h> +#include <stdarg.h> +#include <stdio.h> +#include "raw_printer.h" +#include "base/logging.h" + +namespace base { + +RawPrinter::RawPrinter(char* buf, int length) + : base_(buf), + ptr_(buf), + limit_(buf + length - 1) { + RAW_DCHECK(length > 0, ""); + *ptr_ = '\0'; + *limit_ = '\0'; +} + +void RawPrinter::Printf(const char* format, ...) { + if (limit_ > ptr_) { + va_list ap; + va_start(ap, format); + int avail = limit_ - ptr_; + // We pass avail+1 to vsnprintf() since that routine needs room + // to store the trailing \0. + const int r = perftools_vsnprintf(ptr_, avail+1, format, ap); + va_end(ap); + if (r < 0) { + // Perhaps an old glibc that returns -1 on truncation? + ptr_ = limit_; + } else if (r > avail) { + // Truncation + ptr_ = limit_; + } else { + ptr_ += r; + } + } +} + +} diff --git a/src/third_party/gperftools-2.5/src/raw_printer.h b/src/third_party/gperftools-2.5/src/raw_printer.h new file mode 100644 index 00000000000..9288bb5eeaa --- /dev/null +++ b/src/third_party/gperftools-2.5/src/raw_printer.h @@ -0,0 +1,90 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Sanjay Ghemawat +// +// A printf() wrapper that writes into a fixed length buffer. +// Useful in low-level code that does not want to use allocating +// routines like StringPrintf(). +// +// The implementation currently uses vsnprintf(). This seems to +// be fine for use in many low-level contexts, but we may need to +// rethink this decision if we hit a problem with it calling +// down into malloc() etc. + +#ifndef BASE_RAW_PRINTER_H_ +#define BASE_RAW_PRINTER_H_ + +#include <config.h> +#include "base/basictypes.h" + +namespace base { + +class RawPrinter { + public: + // REQUIRES: "length > 0" + // Will printf any data added to this into "buf[0,length-1]" and + // will arrange to always keep buf[] null-terminated. + RawPrinter(char* buf, int length); + + // Return the number of bytes that have been appended to the string + // so far. Does not count any bytes that were dropped due to overflow. + int length() const { return (ptr_ - base_); } + + // Return the number of bytes that can be added to this. + int space_left() const { return (limit_ - ptr_); } + + // Format the supplied arguments according to the "format" string + // and append to this. Will silently truncate the output if it does + // not fit. + void Printf(const char* format, ...) +#ifdef HAVE___ATTRIBUTE__ + __attribute__ ((__format__ (__printf__, 2, 3))) +#endif +; + + private: + // We can write into [ptr_ .. limit_-1]. + // *limit_ is also writable, but reserved for a terminating \0 + // in case we overflow. + // + // Invariants: *ptr_ == \0 + // Invariants: *limit_ == \0 + char* base_; // Initial pointer + char* ptr_; // Where should we write next + char* limit_; // One past last non-\0 char we can write + + DISALLOW_COPY_AND_ASSIGN(RawPrinter); +}; + +} + +#endif // BASE_RAW_PRINTER_H_ diff --git a/src/third_party/gperftools-2.5/src/sampler.cc b/src/third_party/gperftools-2.5/src/sampler.cc new file mode 100755 index 00000000000..cc711123340 --- /dev/null +++ b/src/third_party/gperftools-2.5/src/sampler.cc @@ -0,0 +1,131 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// All Rights Reserved. +// +// Author: Daniel Ford + +#include "sampler.h" + +#include <algorithm> // For min() +#include <math.h> +#include "base/commandlineflags.h" + +using std::min; + +// The approximate gap in bytes between sampling actions. +// I.e., we take one sample approximately once every +// tcmalloc_sample_parameter bytes of allocation +// i.e. about once every 512KB if value is 1<<19. +#ifdef NO_TCMALLOC_SAMPLES +DEFINE_int64(tcmalloc_sample_parameter, 0, + "Unused: code is compiled with NO_TCMALLOC_SAMPLES"); +#else +DEFINE_int64(tcmalloc_sample_parameter, + EnvToInt64("TCMALLOC_SAMPLE_PARAMETER", 0), + "The approximate gap in bytes between sampling actions. " + "This must be between 1 and 2^58."); +#endif + +namespace tcmalloc { + +// Statics for Sampler +double Sampler::log_table_[1<<kFastlogNumBits]; + +// Populate the lookup table for FastLog2. +// This approximates the log2 curve with a step function. +// Steps have height equal to log2 of the mid-point of the step. +void Sampler::PopulateFastLog2Table() { + for (int i = 0; i < (1<<kFastlogNumBits); i++) { + log_table_[i] = (log(1.0 + static_cast<double>(i+0.5)/(1<<kFastlogNumBits)) + / log(2.0)); + } +} + +int Sampler::GetSamplePeriod() { + return FLAGS_tcmalloc_sample_parameter; +} + +// Run this before using your sampler +void Sampler::Init(uint32_t seed) { + // Initialize PRNG + if (seed != 0) { + rnd_ = seed; + } else { + rnd_ = static_cast<uint32_t>(reinterpret_cast<uintptr_t>(this)); + if (rnd_ == 0) { + rnd_ = 1; + } + } + // Step it forward 20 times for good measure + for (int i = 0; i < 20; i++) { + rnd_ = NextRandom(rnd_); + } + // Initialize counter + bytes_until_sample_ = PickNextSamplingPoint(); +} + +// Initialize the Statics for the Sampler class +void Sampler::InitStatics() { + PopulateFastLog2Table(); +} + +// Generates a geometric variable with the specified mean (512K by default). +// This is done by generating a random number between 0 and 1 and applying +// the inverse cumulative distribution function for an exponential. +// Specifically: Let m be the inverse of the sample period, then +// the probability distribution function is m*exp(-mx) so the CDF is +// p = 1 - exp(-mx), so +// q = 1 - p = exp(-mx) +// log_e(q) = -mx +// -log_e(q)/m = x +// log_2(q) * (-log_e(2) * 1/m) = x +// In the code, q is actually in the range 1 to 2**26, hence the -26 below +size_t Sampler::PickNextSamplingPoint() { + rnd_ = NextRandom(rnd_); + // Take the top 26 bits as the random number + // (This plus the 1<<58 sampling bound give a max possible step of + // 5194297183973780480 bytes.) + const uint64_t prng_mod_power = 48; // Number of bits in prng + // The uint32_t cast is to prevent a (hard-to-reproduce) NAN + // under piii debug for some binaries. + double q = static_cast<uint32_t>(rnd_ >> (prng_mod_power - 26)) + 1.0; + // Put the computed p-value through the CDF of a geometric. + // For faster performance (save ~1/20th exec time), replace + // min(0.0, FastLog2(q) - 26) by (Fastlog2(q) - 26.000705) + // The value 26.000705 is used rather than 26 to compensate + // for inaccuracies in FastLog2 which otherwise result in a + // negative answer. + return static_cast<size_t>(min(0.0, (FastLog2(q) - 26)) * (-log(2.0) + * FLAGS_tcmalloc_sample_parameter) + 1); +} + +} // namespace tcmalloc diff --git a/src/third_party/gperftools-2.5/src/sampler.h b/src/third_party/gperftools-2.5/src/sampler.h new file mode 100755 index 00000000000..eb316d7493d --- /dev/null +++ b/src/third_party/gperftools-2.5/src/sampler.h @@ -0,0 +1,180 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// All Rights Reserved. +// +// Author: Daniel Ford + +#ifndef TCMALLOC_SAMPLER_H_ +#define TCMALLOC_SAMPLER_H_ + +#include "config.h" +#include <stddef.h> // for size_t +#ifdef HAVE_STDINT_H +#include <stdint.h> // for uint64_t, uint32_t, int32_t +#endif +#include <string.h> // for memcpy +#include "base/basictypes.h" // for ASSERT +#include "internal_logging.h" // for ASSERT + +namespace tcmalloc { + +//------------------------------------------------------------------- +// Sampler to decide when to create a sample trace for an allocation +// Not thread safe: Each thread should have it's own sampler object. +// Caller must use external synchronization if used +// from multiple threads. +// +// With 512K average sample step (the default): +// the probability of sampling a 4K allocation is about 0.00778 +// the probability of sampling a 1MB allocation is about 0.865 +// the probability of sampling a 1GB allocation is about 1.00000 +// In general, the probablity of sampling is an allocation of size X +// given a flag value of Y (default 1M) is: +// 1 - e^(-X/Y) +// +// With 128K average sample step: +// the probability of sampling a 1MB allocation is about 0.99966 +// the probability of sampling a 1GB allocation is about 1.0 +// (about 1 - 2**(-26)) +// With 1M average sample step: +// the probability of sampling a 4K allocation is about 0.00390 +// the probability of sampling a 1MB allocation is about 0.632 +// the probability of sampling a 1GB allocation is about 1.0 +// +// The sampler works by representing memory as a long stream from +// which allocations are taken. Some of the bytes in this stream are +// marked and if an allocation includes a marked byte then it is +// sampled. Bytes are marked according to a Poisson point process +// with each byte being marked independently with probability +// p = 1/tcmalloc_sample_parameter. This makes the probability +// of sampling an allocation of X bytes equal to the CDF of +// a geometric with mean tcmalloc_sample_parameter. (ie. the +// probability that at least one byte in the range is marked). This +// is accurately given by the CDF of the corresponding exponential +// distribution : 1 - e^(X/tcmalloc_sample_parameter_) +// Independence of the byte marking ensures independence of +// the sampling of each allocation. +// +// This scheme is implemented by noting that, starting from any +// fixed place, the number of bytes until the next marked byte +// is geometrically distributed. This number is recorded as +// bytes_until_sample_. Every allocation subtracts from this +// number until it is less than 0. When this happens the current +// allocation is sampled. +// +// When an allocation occurs, bytes_until_sample_ is reset to +// a new independtly sampled geometric number of bytes. The +// memoryless property of the point process means that this may +// be taken as the number of bytes after the end of the current +// allocation until the next marked byte. This ensures that +// very large allocations which would intersect many marked bytes +// only result in a single call to PickNextSamplingPoint. +//------------------------------------------------------------------- + +class PERFTOOLS_DLL_DECL Sampler { + public: + // Initialize this sampler. + // Passing a seed of 0 gives a non-deterministic + // seed value given by casting the object ("this") + void Init(uint32_t seed); + void Cleanup(); + + // Record allocation of "k" bytes. Return true iff allocation + // should be sampled + bool SampleAllocation(size_t k); + + // Generate a geometric with mean 512K (or FLAG_tcmalloc_sample_parameter) + size_t PickNextSamplingPoint(); + + // Initialize the statics for the Sampler class + static void InitStatics(); + + // Returns the current sample period + int GetSamplePeriod(); + + // The following are public for the purposes of testing + static uint64_t NextRandom(uint64_t rnd_); // Returns the next prng value + static double FastLog2(const double & d); // Computes Log2(x) quickly + static void PopulateFastLog2Table(); // Populate the lookup table + + private: + size_t bytes_until_sample_; // Bytes until we sample next + uint64_t rnd_; // Cheap random number generator + + // Statics for the fast log + // Note that this code may not depend on anything in //util + // hence the duplication of functionality here + static const int kFastlogNumBits = 10; + static const int kFastlogMask = (1 << kFastlogNumBits) - 1; + static double log_table_[1<<kFastlogNumBits]; // Constant +}; + +inline bool Sampler::SampleAllocation(size_t k) { + if (bytes_until_sample_ < k) { + bytes_until_sample_ = PickNextSamplingPoint(); + return true; + } else { + bytes_until_sample_ -= k; + return false; + } +} + +// Inline functions which are public for testing purposes + +// Returns the next prng value. +// pRNG is: aX+b mod c with a = 0x5DEECE66D, b = 0xB, c = 1<<48 +// This is the lrand64 generator. +inline uint64_t Sampler::NextRandom(uint64_t rnd) { + const uint64_t prng_mult = 0x5DEECE66DLL; + const uint64_t prng_add = 0xB; + const uint64_t prng_mod_power = 48; + const uint64_t prng_mod_mask = + ~((~static_cast<uint64_t>(0)) << prng_mod_power); + return (prng_mult * rnd + prng_add) & prng_mod_mask; +} + +// Adapted from //util/math/fastmath.[h|cc] by Noam Shazeer +// This mimics the VeryFastLog2 code in those files +inline double Sampler::FastLog2(const double & d) { + ASSERT(d>0); + COMPILE_ASSERT(sizeof(d) == sizeof(uint64_t), DoubleMustBe64Bits); + uint64_t x; + memcpy(&x, &d, sizeof(x)); // we depend on the compiler inlining this + const uint32_t x_high = x >> 32; + const uint32_t y = x_high >> (20 - kFastlogNumBits) & kFastlogMask; + const int32_t exponent = ((x_high >> 20) & 0x7FF) - 1023; + return exponent + log_table_[y]; +} + +} // namespace tcmalloc + +#endif // TCMALLOC_SAMPLER_H_ diff --git a/src/third_party/gperftools-2.5/src/solaris/libstdc++.la b/src/third_party/gperftools-2.5/src/solaris/libstdc++.la new file mode 100644 index 00000000000..3edf4254192 --- /dev/null +++ b/src/third_party/gperftools-2.5/src/solaris/libstdc++.la @@ -0,0 +1,51 @@ +# libstdc++.la - a libtool library file +# Generated by ltmain.sh - GNU libtool 1.4a-GCC3.0 (1.641.2.256 2001/05/28 20:09:07 with GCC-local changes) +# +# Please DO NOT delete this file! +# It is necessary for linking the library. + +# --- +# NOTE: This file lives in /usr/sfw/lib on Solaris 10. Unfortunately, +# due to an apparent bug in the Solaris 10 6/06 release, +# /usr/sfw/lib/libstdc++.la is empty. Below is the correct content, +# according to +# http://forum.java.sun.com/thread.jspa?threadID=5073150 +# By passing LDFLAGS='-Lsrc/solaris' to configure, make will pick up +# this copy of the file rather than the empty copy in /usr/sfw/lib. +# +# Also see +# http://www.technicalarticles.org/index.php/Compiling_MySQL_5.0_on_Solaris_10 +# +# Note: this is for 32-bit systems. If you have a 64-bit system, +# uncomment the appropriate dependency_libs line below. +# ---- + +# The name that we can dlopen(3). +dlname='libstdc++.so.6' + +# Names of this library. +library_names='libstdc++.so.6.0.3 libstdc++.so.6 libstdc++.so' + +# The name of the static archive. +old_library='libstdc++.a' + +# Libraries that this one depends upon. +# 32-bit version: +dependency_libs='-lc -lm -L/usr/sfw/lib -lgcc_s' +# 64-bit version: +#dependency_libs='-L/lib/64 -lc -lm -L/usr/sfw/lib/64 -lgcc_s' + +# Version information for libstdc++. +current=6 +age=0 +revision=3 + +# Is this an already installed library? +installed=yes + +# Files to dlopen/dlpreopen +dlopen='' +dlpreopen='' + +# Directory that this library needs to be installed in: +libdir='/usr/sfw/lib' diff --git a/src/third_party/gperftools-2.5/src/span.cc b/src/third_party/gperftools-2.5/src/span.cc new file mode 100644 index 00000000000..4d089640d18 --- /dev/null +++ b/src/third_party/gperftools-2.5/src/span.cc @@ -0,0 +1,102 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Sanjay Ghemawat <opensource@google.com> + +#include <config.h> +#include "span.h" + +#include <string.h> // for NULL, memset + +#include "internal_logging.h" // for ASSERT +#include "page_heap_allocator.h" // for PageHeapAllocator +#include "static_vars.h" // for Static + +namespace tcmalloc { + +#ifdef SPAN_HISTORY +void Event(Span* span, char op, int v = 0) { + span->history[span->nexthistory] = op; + span->value[span->nexthistory] = v; + span->nexthistory++; + if (span->nexthistory == sizeof(span->history)) span->nexthistory = 0; +} +#endif + +Span* NewSpan(PageID p, Length len) { + Span* result = Static::span_allocator()->New(); + memset(result, 0, sizeof(*result)); + result->start = p; + result->length = len; +#ifdef SPAN_HISTORY + result->nexthistory = 0; +#endif + return result; +} + +void DeleteSpan(Span* span) { +#ifndef NDEBUG + // In debug mode, trash the contents of deleted Spans + memset(span, 0x3f, sizeof(*span)); +#endif + Static::span_allocator()->Delete(span); +} + +void DLL_Init(Span* list) { + list->next = list; + list->prev = list; +} + +void DLL_Remove(Span* span) { + span->prev->next = span->next; + span->next->prev = span->prev; + span->prev = NULL; + span->next = NULL; +} + +int DLL_Length(const Span* list) { + int result = 0; + for (Span* s = list->next; s != list; s = s->next) { + result++; + } + return result; +} + +void DLL_Prepend(Span* list, Span* span) { + ASSERT(span->next == NULL); + ASSERT(span->prev == NULL); + span->next = list->next; + span->prev = list; + list->next->prev = span; + list->next = span; +} + +} // namespace tcmalloc diff --git a/src/third_party/gperftools-2.5/src/span.h b/src/third_party/gperftools-2.5/src/span.h new file mode 100644 index 00000000000..83feda19c5e --- /dev/null +++ b/src/third_party/gperftools-2.5/src/span.h @@ -0,0 +1,102 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Sanjay Ghemawat <opensource@google.com> +// +// A Span is a contiguous run of pages. + +#ifndef TCMALLOC_SPAN_H_ +#define TCMALLOC_SPAN_H_ + +#include <config.h> +#include "common.h" + +namespace tcmalloc { + +// Information kept for a span (a contiguous run of pages). +struct Span { + PageID start; // Starting page number + Length length; // Number of pages in span + Span* next; // Used when in link list + Span* prev; // Used when in link list + void* objects; // Linked list of free objects + unsigned int refcount : 16; // Number of non-free objects + unsigned int sizeclass : 8; // Size-class for small objects (or 0) + unsigned int location : 2; // Is the span on a freelist, and if so, which? + unsigned int sample : 1; // Sampled object? + +#undef SPAN_HISTORY +#ifdef SPAN_HISTORY + // For debugging, we can keep a log events per span + int nexthistory; + char history[64]; + int value[64]; +#endif + + // What freelist the span is on: IN_USE if on none, or normal or returned + enum { IN_USE, ON_NORMAL_FREELIST, ON_RETURNED_FREELIST }; +}; + +#ifdef SPAN_HISTORY +void Event(Span* span, char op, int v = 0); +#else +#define Event(s,o,v) ((void) 0) +#endif + +// Allocator/deallocator for spans +Span* NewSpan(PageID p, Length len); +void DeleteSpan(Span* span); + +// ------------------------------------------------------------------------- +// Doubly linked list of spans. +// ------------------------------------------------------------------------- + +// Initialize *list to an empty list. +void DLL_Init(Span* list); + +// Remove 'span' from the linked list in which it resides, updating the +// pointers of adjacent Spans and setting span's next and prev to NULL. +void DLL_Remove(Span* span); + +// Return true iff "list" is empty. +inline bool DLL_IsEmpty(const Span* list) { + return list->next == list; +} + +// Add span to the front of list. +void DLL_Prepend(Span* list, Span* span); + +// Return the length of the linked list. O(n) +int DLL_Length(const Span* list); + +} // namespace tcmalloc + +#endif // TCMALLOC_SPAN_H_ diff --git a/src/third_party/gperftools-2.5/src/stack_trace_table.cc b/src/third_party/gperftools-2.5/src/stack_trace_table.cc new file mode 100644 index 00000000000..1862124af3c --- /dev/null +++ b/src/third_party/gperftools-2.5/src/stack_trace_table.cc @@ -0,0 +1,160 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2009, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Andrew Fikes + +#include <config.h> +#include "stack_trace_table.h" +#include <string.h> // for NULL, memset +#include "base/spinlock.h" // for SpinLockHolder +#include "common.h" // for StackTrace +#include "internal_logging.h" // for ASSERT, Log +#include "page_heap_allocator.h" // for PageHeapAllocator +#include "static_vars.h" // for Static + +namespace tcmalloc { + +bool StackTraceTable::Bucket::KeyEqual(uintptr_t h, + const StackTrace& t) const { + const bool eq = (this->hash == h && this->trace.depth == t.depth); + for (int i = 0; eq && i < t.depth; ++i) { + if (this->trace.stack[i] != t.stack[i]) { + return false; + } + } + return eq; +} + +StackTraceTable::StackTraceTable() + : error_(false), + depth_total_(0), + bucket_total_(0), + table_(new Bucket*[kHashTableSize]()) { + memset(table_, 0, kHashTableSize * sizeof(Bucket*)); +} + +StackTraceTable::~StackTraceTable() { + delete[] table_; +} + +void StackTraceTable::AddTrace(const StackTrace& t) { + if (error_) { + return; + } + + // Hash function borrowed from base/heap-profile-table.cc + uintptr_t h = 0; + for (int i = 0; i < t.depth; ++i) { + h += reinterpret_cast<uintptr_t>(t.stack[i]); + h += h << 10; + h ^= h >> 6; + } + h += h << 3; + h ^= h >> 11; + + const int idx = h % kHashTableSize; + + Bucket* b = table_[idx]; + while (b != NULL && !b->KeyEqual(h, t)) { + b = b->next; + } + if (b != NULL) { + b->count++; + b->trace.size += t.size; // keep cumulative size + } else { + depth_total_ += t.depth; + bucket_total_++; + b = Static::bucket_allocator()->New(); + if (b == NULL) { + Log(kLog, __FILE__, __LINE__, + "tcmalloc: could not allocate bucket", sizeof(*b)); + error_ = true; + } else { + b->hash = h; + b->trace = t; + b->count = 1; + b->next = table_[idx]; + table_[idx] = b; + } + } +} + +void** StackTraceTable::ReadStackTracesAndClear() { + if (error_) { + return NULL; + } + + // Allocate output array + const int out_len = bucket_total_ * 3 + depth_total_ + 1; + void** out = new void*[out_len]; + if (out == NULL) { + Log(kLog, __FILE__, __LINE__, + "tcmalloc: allocation failed for stack traces", + out_len * sizeof(*out)); + return NULL; + } + + // Fill output array + int idx = 0; + for (int i = 0; i < kHashTableSize; ++i) { + Bucket* b = table_[i]; + while (b != NULL) { + out[idx++] = reinterpret_cast<void*>(static_cast<uintptr_t>(b->count)); + out[idx++] = reinterpret_cast<void*>(b->trace.size); // cumulative size + out[idx++] = reinterpret_cast<void*>(b->trace.depth); + for (int d = 0; d < b->trace.depth; ++d) { + out[idx++] = b->trace.stack[d]; + } + b = b->next; + } + } + out[idx++] = NULL; + ASSERT(idx == out_len); + + // Clear state + error_ = false; + depth_total_ = 0; + bucket_total_ = 0; + SpinLockHolder h(Static::pageheap_lock()); + for (int i = 0; i < kHashTableSize; ++i) { + Bucket* b = table_[i]; + while (b != NULL) { + Bucket* next = b->next; + Static::bucket_allocator()->Delete(b); + b = next; + } + table_[i] = NULL; + } + + return out; +} + +} // namespace tcmalloc diff --git a/src/third_party/gperftools-2.5/src/stack_trace_table.h b/src/third_party/gperftools-2.5/src/stack_trace_table.h new file mode 100644 index 00000000000..e2897715354 --- /dev/null +++ b/src/third_party/gperftools-2.5/src/stack_trace_table.h @@ -0,0 +1,92 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2009, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Andrew Fikes +// +// Utility class for coalescing sampled stack traces. Not thread-safe. + +#ifndef TCMALLOC_STACK_TRACE_TABLE_H_ +#define TCMALLOC_STACK_TRACE_TABLE_H_ + +#include <config.h> +#ifdef HAVE_STDINT_H +#include <stdint.h> // for uintptr_t +#endif +#include "common.h" + +namespace tcmalloc { + +class PERFTOOLS_DLL_DECL StackTraceTable { + public: + // REQUIRES: L < pageheap_lock + StackTraceTable(); + ~StackTraceTable(); + + // Adds stack trace "t" to table. + // + // REQUIRES: L >= pageheap_lock + void AddTrace(const StackTrace& t); + + // Returns stack traces formatted per MallocExtension guidelines. + // May return NULL on error. Clears state before returning. + // + // REQUIRES: L < pageheap_lock + void** ReadStackTracesAndClear(); + + // Exposed for PageHeapAllocator + struct Bucket { + // Key + uintptr_t hash; + StackTrace trace; + + // Payload + int count; + Bucket* next; + + bool KeyEqual(uintptr_t h, const StackTrace& t) const; + }; + + // For testing + int depth_total() const { return depth_total_; } + int bucket_total() const { return bucket_total_; } + + private: + static const int kHashTableSize = 1 << 14; // => table_ is 128k + + bool error_; + int depth_total_; + int bucket_total_; + Bucket** table_; +}; + +} // namespace tcmalloc + +#endif // TCMALLOC_STACK_TRACE_TABLE_H_ diff --git a/src/third_party/gperftools-2.5/src/stacktrace.cc b/src/third_party/gperftools-2.5/src/stacktrace.cc new file mode 100644 index 00000000000..395d569c8ca --- /dev/null +++ b/src/third_party/gperftools-2.5/src/stacktrace.cc @@ -0,0 +1,339 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Sanjay Ghemawat +// +// Produce stack trace. +// +// There are three different ways we can try to get the stack trace: +// +// 1) Our hand-coded stack-unwinder. This depends on a certain stack +// layout, which is used by gcc (and those systems using a +// gcc-compatible ABI) on x86 systems, at least since gcc 2.95. +// It uses the frame pointer to do its work. +// +// 2) The libunwind library. This is still in development, and as a +// separate library adds a new dependency, abut doesn't need a frame +// pointer. It also doesn't call malloc. +// +// 3) The gdb unwinder -- also the one used by the c++ exception code. +// It's obviously well-tested, but has a fatal flaw: it can call +// malloc() from the unwinder. This is a problem because we're +// trying to use the unwinder to instrument malloc(). +// +// Note: if you add a new implementation here, make sure it works +// correctly when GetStackTrace() is called with max_depth == 0. +// Some code may do that. + +#include <config.h> +#include <stdlib.h> // for getenv +#include <string.h> // for strcmp +#include <stdio.h> // for fprintf +#include "gperftools/stacktrace.h" +#include "base/commandlineflags.h" +#include "base/googleinit.h" + + +// we're using plain struct and not class to avoid any possible issues +// during initialization. Struct of pointers is easy to init at +// link-time. +struct GetStackImplementation { + int (*GetStackFramesPtr)(void** result, int* sizes, int max_depth, + int skip_count); + + int (*GetStackFramesWithContextPtr)(void** result, int* sizes, int max_depth, + int skip_count, const void *uc); + + int (*GetStackTracePtr)(void** result, int max_depth, + int skip_count); + + int (*GetStackTraceWithContextPtr)(void** result, int max_depth, + int skip_count, const void *uc); + + const char *name; +}; + +#if HAVE_DECL_BACKTRACE +#define STACKTRACE_INL_HEADER "stacktrace_generic-inl.h" +#define GST_SUFFIX generic +#include "stacktrace_impl_setup-inl.h" +#undef GST_SUFFIX +#undef STACKTRACE_INL_HEADER +#define HAVE_GST_generic +#endif + +#ifdef HAVE_UNWIND_BACKTRACE +#define STACKTRACE_INL_HEADER "stacktrace_libgcc-inl.h" +#define GST_SUFFIX libgcc +#include "stacktrace_impl_setup-inl.h" +#undef GST_SUFFIX +#undef STACKTRACE_INL_HEADER +#define HAVE_GST_libgcc +#endif + +// libunwind uses __thread so we check for both libunwind.h and +// __thread support +#if defined(HAVE_LIBUNWIND_H) && defined(HAVE_TLS) +#define STACKTRACE_INL_HEADER "stacktrace_libunwind-inl.h" +#define GST_SUFFIX libunwind +#include "stacktrace_impl_setup-inl.h" +#undef GST_SUFFIX +#undef STACKTRACE_INL_HEADER +#define HAVE_GST_libunwind +#endif // HAVE_LIBUNWIND_H + +#if defined(__i386__) || defined(__x86_64__) +#define STACKTRACE_INL_HEADER "stacktrace_x86-inl.h" +#define GST_SUFFIX x86 +#include "stacktrace_impl_setup-inl.h" +#undef GST_SUFFIX +#undef STACKTRACE_INL_HEADER +#define HAVE_GST_x86 +#endif // i386 || x86_64 + +#if defined(__ppc__) || defined(__PPC__) +#if defined(__linux__) +#define STACKTRACE_INL_HEADER "stacktrace_powerpc-linux-inl.h" +#else +#define STACKTRACE_INL_HEADER "stacktrace_powerpc-darwin-inl.h" +#endif +#define GST_SUFFIX ppc +#include "stacktrace_impl_setup-inl.h" +#undef GST_SUFFIX +#undef STACKTRACE_INL_HEADER +#define HAVE_GST_ppc +#endif + +#if defined(__arm__) +#define STACKTRACE_INL_HEADER "stacktrace_arm-inl.h" +#define GST_SUFFIX arm +#include "stacktrace_impl_setup-inl.h" +#undef GST_SUFFIX +#undef STACKTRACE_INL_HEADER +#define HAVE_GST_arm +#endif + +#ifdef TCMALLOC_ENABLE_INSTRUMENT_STACKTRACE +#define STACKTRACE_INL_HEADER "stacktrace_instrument-inl.h" +#define GST_SUFFIX instrument +#include "stacktrace_impl_setup-inl.h" +#undef GST_SUFFIX +#undef STACKTRACE_INL_HEADER +#define HAVE_GST_instrument +#endif + +// The Windows case -- probably cygwin and mingw will use one of the +// x86-includes above, but if not, we can fall back to windows intrinsics. +#if defined(_WIN32) || defined(__CYGWIN__) || defined(__CYGWIN32__) || defined(__MINGW32__) +#define STACKTRACE_INL_HEADER "stacktrace_win32-inl.h" +#define GST_SUFFIX win32 +#include "stacktrace_impl_setup-inl.h" +#undef GST_SUFFIX +#undef STACKTRACE_INL_HEADER +#define HAVE_GST_win32 +#endif + +static GetStackImplementation *all_impls[] = { +#ifdef HAVE_GST_libgcc + &impl__libgcc, +#endif +#ifdef HAVE_GST_generic + &impl__generic, +#endif +#ifdef HAVE_GST_libunwind + &impl__libunwind, +#endif +#ifdef HAVE_GST_x86 + &impl__x86, +#endif +#ifdef HAVE_GST_arm + &impl__arm, +#endif +#ifdef HAVE_GST_ppc + &impl__ppc, +#endif +#ifdef HAVE_GST_instrument + &impl__instrument, +#endif +#ifdef HAVE_GST_win32 + &impl__win32, +#endif + NULL +}; + +// ppc and i386 implementations prefer arch-specific asm implementations. +// arm's asm implementation is broken +#if defined(__i386__) || defined(__x86_64__) || defined(__ppc__) || defined(__PPC__) +#if !defined(NO_FRAME_POINTER) +#define TCMALLOC_DONT_PREFER_LIBUNWIND +#endif +#endif + +static bool get_stack_impl_inited; + +#if defined(HAVE_GST_instrument) +static GetStackImplementation *get_stack_impl = &impl__instrument; +#elif defined(HAVE_GST_win32) +static GetStackImplementation *get_stack_impl = &impl__win32; +#elif defined(HAVE_GST_x86) && defined(TCMALLOC_DONT_PREFER_LIBUNWIND) +static GetStackImplementation *get_stack_impl = &impl__x86; +#elif defined(HAVE_GST_ppc) && defined(TCMALLOC_DONT_PREFER_LIBUNWIND) +static GetStackImplementation *get_stack_impl = &impl__ppc; +#elif defined(HAVE_GST_libunwind) +static GetStackImplementation *get_stack_impl = &impl__libunwind; +#elif defined(HAVE_GST_libgcc) +static GetStackImplementation *get_stack_impl = &impl__libgcc; +#elif defined(HAVE_GST_generic) +static GetStackImplementation *get_stack_impl = &impl__generic; +#elif defined(HAVE_GST_arm) +static GetStackImplementation *get_stack_impl = &impl__arm; +#elif 0 +// This is for the benefit of code analysis tools that may have +// trouble with the computed #include above. +# include "stacktrace_x86-inl.h" +# include "stacktrace_libunwind-inl.h" +# include "stacktrace_generic-inl.h" +# include "stacktrace_powerpc-inl.h" +# include "stacktrace_win32-inl.h" +# include "stacktrace_arm-inl.h" +# include "stacktrace_instrument-inl.h" +#else +#error Cannot calculate stack trace: will need to write for your environment +#endif + +static int ATTRIBUTE_NOINLINE frame_forcer(int rv) { + return rv; +} + +static void init_default_stack_impl_inner(void); + +namespace tcmalloc { + bool EnterStacktraceScope(void); + void LeaveStacktraceScope(void); +} + +namespace { + using tcmalloc::EnterStacktraceScope; + using tcmalloc::LeaveStacktraceScope; + + class StacktraceScope { + bool stacktrace_allowed; + public: + StacktraceScope() { + stacktrace_allowed = true; + stacktrace_allowed = EnterStacktraceScope(); + } + bool IsStacktraceAllowed() { + return stacktrace_allowed; + } + ~StacktraceScope() { + if (stacktrace_allowed) { + LeaveStacktraceScope(); + } + } + }; +} + +PERFTOOLS_DLL_DECL int GetStackFrames(void** result, int* sizes, int max_depth, + int skip_count) { + StacktraceScope scope; + if (!scope.IsStacktraceAllowed()) { + return 0; + } + init_default_stack_impl_inner(); + return frame_forcer(get_stack_impl->GetStackFramesPtr(result, sizes, max_depth, skip_count)); +} + +PERFTOOLS_DLL_DECL int GetStackFramesWithContext(void** result, int* sizes, int max_depth, + int skip_count, const void *uc) { + StacktraceScope scope; + if (!scope.IsStacktraceAllowed()) { + return 0; + } + init_default_stack_impl_inner(); + return frame_forcer(get_stack_impl->GetStackFramesWithContextPtr( + result, sizes, max_depth, + skip_count, uc)); +} + +PERFTOOLS_DLL_DECL int GetStackTrace(void** result, int max_depth, + int skip_count) { + StacktraceScope scope; + if (!scope.IsStacktraceAllowed()) { + return 0; + } + init_default_stack_impl_inner(); + return frame_forcer(get_stack_impl->GetStackTracePtr(result, max_depth, skip_count)); +} + +PERFTOOLS_DLL_DECL int GetStackTraceWithContext(void** result, int max_depth, + int skip_count, const void *uc) { + StacktraceScope scope; + if (!scope.IsStacktraceAllowed()) { + return 0; + } + init_default_stack_impl_inner(); + return frame_forcer(get_stack_impl->GetStackTraceWithContextPtr( + result, max_depth, skip_count, uc)); +} + +static void init_default_stack_impl_inner(void) { + if (get_stack_impl_inited) { + return; + } + get_stack_impl_inited = true; + char *val = getenv("TCMALLOC_STACKTRACE_METHOD"); + if (!val || !*val) { + return; + } + for (GetStackImplementation **p = all_impls; *p; p++) { + GetStackImplementation *c = *p; + if (strcmp(c->name, val) == 0) { + get_stack_impl = c; + return; + } + } + fprintf(stderr, "Unknown or unsupported stacktrace method requested: %s. Ignoring it\n", val); +} + +static void init_default_stack_impl(void) { + init_default_stack_impl_inner(); + if (EnvToBool("TCMALLOC_STACKTRACE_METHOD_VERBOSE", false)) { + fprintf(stderr, "Chosen stacktrace method is %s\nSupported methods:\n", get_stack_impl->name); + for (GetStackImplementation **p = all_impls; *p; p++) { + GetStackImplementation *c = *p; + fprintf(stderr, "* %s\n", c->name); + } + fputs("\n", stderr); + } +} + +REGISTER_MODULE_INITIALIZER(stacktrace_init_default_stack_impl, init_default_stack_impl()); diff --git a/src/third_party/gperftools-2.5/src/stacktrace_arm-inl.h b/src/third_party/gperftools-2.5/src/stacktrace_arm-inl.h new file mode 100644 index 00000000000..1586b8fec62 --- /dev/null +++ b/src/third_party/gperftools-2.5/src/stacktrace_arm-inl.h @@ -0,0 +1,148 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2011, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Doug Kwan +// This is inspired by Craig Silverstein's PowerPC stacktrace code. +// + +#ifndef BASE_STACKTRACE_ARM_INL_H_ +#define BASE_STACKTRACE_ARM_INL_H_ +// Note: this file is included into stacktrace.cc more than once. +// Anything that should only be defined once should be here: + +#include <stdint.h> // for uintptr_t +#include "base/basictypes.h" // for NULL +#include <gperftools/stacktrace.h> + +// WARNING: +// This only works if all your code is in either ARM or THUMB mode. With +// interworking, the frame pointer of the caller can either be in r11 (ARM +// mode) or r7 (THUMB mode). A callee only saves the frame pointer of its +// mode in a fixed location on its stack frame. If the caller is a different +// mode, there is no easy way to find the frame pointer. It can either be +// still in the designated register or saved on stack along with other callee +// saved registers. + +// Given a pointer to a stack frame, locate and return the calling +// stackframe, or return NULL if no stackframe can be found. Perform sanity +// checks (the strictness of which is controlled by the boolean parameter +// "STRICT_UNWINDING") to reduce the chance that a bad pointer is returned. +template<bool STRICT_UNWINDING> +static void **NextStackFrame(void **old_sp) { + void **new_sp = (void**) old_sp[-1]; + + // Check that the transition from frame pointer old_sp to frame + // pointer new_sp isn't clearly bogus + if (STRICT_UNWINDING) { + // With the stack growing downwards, older stack frame must be + // at a greater address that the current one. + if (new_sp <= old_sp) return NULL; + // Assume stack frames larger than 100,000 bytes are bogus. + if ((uintptr_t)new_sp - (uintptr_t)old_sp > 100000) return NULL; + } else { + // In the non-strict mode, allow discontiguous stack frames. + // (alternate-signal-stacks for example). + if (new_sp == old_sp) return NULL; + // And allow frames upto about 1MB. + if ((new_sp > old_sp) + && ((uintptr_t)new_sp - (uintptr_t)old_sp > 1000000)) return NULL; + } + if ((uintptr_t)new_sp & (sizeof(void *) - 1)) return NULL; + return new_sp; +} + +// This ensures that GetStackTrace stes up the Link Register properly. +#ifdef __GNUC__ +void StacktraceArmDummyFunction() __attribute__((noinline)); +void StacktraceArmDummyFunction() { __asm__ volatile(""); } +#else +# error StacktraceArmDummyFunction() needs to be ported to this platform. +#endif +#endif // BASE_STACKTRACE_ARM_INL_H_ + +// Note: this part of the file is included several times. +// Do not put globals below. + +// The following 4 functions are generated from the code below: +// GetStack{Trace,Frames}() +// GetStack{Trace,Frames}WithContext() +// +// These functions take the following args: +// void** result: the stack-trace, as an array +// int* sizes: the size of each stack frame, as an array +// (GetStackFrames* only) +// int max_depth: the size of the result (and sizes) array(s) +// int skip_count: how many stack pointers to skip before storing in result +// void* ucp: a ucontext_t* (GetStack{Trace,Frames}WithContext only) +static int GET_STACK_TRACE_OR_FRAMES { +#ifdef __GNUC__ + void **sp = reinterpret_cast<void**>(__builtin_frame_address(0)); +#else +# error reading stack point not yet supported on this platform. +#endif + + // On ARM, the return address is stored in the link register (r14). + // This is not saved on the stack frame of a leaf function. To + // simplify code that reads return addresses, we call a dummy + // function so that the return address of this function is also + // stored in the stack frame. This works at least for gcc. + StacktraceArmDummyFunction(); + + skip_count++; // skip parent frame due to indirection in stacktrace.cc + + int n = 0; + while (sp && n < max_depth) { + // The GetStackFrames routine is called when we are in some + // informational context (the failure signal handler for example). + // Use the non-strict unwinding rules to produce a stack trace + // that is as complete as possible (even if it contains a few bogus + // entries in some rare cases). + void **next_sp = NextStackFrame<IS_STACK_FRAMES == 0>(sp); + + if (skip_count > 0) { + skip_count--; + } else { + result[n] = *sp; + +#if IS_STACK_FRAMES + if (next_sp > sp) { + sizes[n] = (uintptr_t)next_sp - (uintptr_t)sp; + } else { + // A frame-size of 0 is used to indicate unknown frame size. + sizes[n] = 0; + } +#endif + n++; + } + sp = next_sp; + } + return n; +} diff --git a/src/third_party/gperftools-2.5/src/stacktrace_generic-inl.h b/src/third_party/gperftools-2.5/src/stacktrace_generic-inl.h new file mode 100644 index 00000000000..7d7c22d9e45 --- /dev/null +++ b/src/third_party/gperftools-2.5/src/stacktrace_generic-inl.h @@ -0,0 +1,84 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Sanjay Ghemawat +// +// Portable implementation - just use glibc +// +// Note: The glibc implementation may cause a call to malloc. +// This can cause a deadlock in HeapProfiler. + +#ifndef BASE_STACKTRACE_GENERIC_INL_H_ +#define BASE_STACKTRACE_GENERIC_INL_H_ +// Note: this file is included into stacktrace.cc more than once. +// Anything that should only be defined once should be here: + +#include <execinfo.h> +#include <string.h> +#include "gperftools/stacktrace.h" +#endif // BASE_STACKTRACE_GENERIC_INL_H_ + +// Note: this part of the file is included several times. +// Do not put globals below. + +// The following 4 functions are generated from the code below: +// GetStack{Trace,Frames}() +// GetStack{Trace,Frames}WithContext() +// +// These functions take the following args: +// void** result: the stack-trace, as an array +// int* sizes: the size of each stack frame, as an array +// (GetStackFrames* only) +// int max_depth: the size of the result (and sizes) array(s) +// int skip_count: how many stack pointers to skip before storing in result +// void* ucp: a ucontext_t* (GetStack{Trace,Frames}WithContext only) +static int GET_STACK_TRACE_OR_FRAMES { + static const int kStackLength = 64; + void * stack[kStackLength]; + int size; + + size = backtrace(stack, kStackLength); + skip_count += 2; // we want to skip the current and it's parent frame as well + int result_count = size - skip_count; + if (result_count < 0) + result_count = 0; + if (result_count > max_depth) + result_count = max_depth; + for (int i = 0; i < result_count; i++) + result[i] = stack[i + skip_count]; + +#if IS_STACK_FRAMES + // No implementation for finding out the stack frame sizes yet. + memset(sizes, 0, sizeof(*sizes) * result_count); +#endif + + return result_count; +} diff --git a/src/third_party/gperftools-2.5/src/stacktrace_impl_setup-inl.h b/src/third_party/gperftools-2.5/src/stacktrace_impl_setup-inl.h new file mode 100644 index 00000000000..698c5b38196 --- /dev/null +++ b/src/third_party/gperftools-2.5/src/stacktrace_impl_setup-inl.h @@ -0,0 +1,94 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// NOTE: this is NOT to be #include-d normally. It's internal +// implementation detail of stacktrace.cc +// + +// Copyright (c) 2014, gperftools Contributors. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Aliaksey Kandratsenka <alk@tut.by> +// +// based on stacktrace.cc and stacktrace_config.h by Sanjay Ghemawat +// and Paul Pluzhnikov from Google Inc + +#define SIS_CONCAT2(a, b) a##b +#define SIS_CONCAT(a, b) SIS_CONCAT2(a,b) + +#define SIS_STRINGIFY(a) SIS_STRINGIFY2(a) +#define SIS_STRINGIFY2(a) #a + +#define IS_STACK_FRAMES 0 +#define IS_WITH_CONTEXT 0 +#define GET_STACK_TRACE_OR_FRAMES \ + SIS_CONCAT(GetStackTrace_, GST_SUFFIX)(void **result, int max_depth, int skip_count) +#include STACKTRACE_INL_HEADER +#undef IS_STACK_FRAMES +#undef IS_WITH_CONTEXT +#undef GET_STACK_TRACE_OR_FRAMES + +#define IS_STACK_FRAMES 1 +#define IS_WITH_CONTEXT 0 +#define GET_STACK_TRACE_OR_FRAMES \ + SIS_CONCAT(GetStackFrames_, GST_SUFFIX)(void **result, int *sizes, int max_depth, int skip_count) +#include STACKTRACE_INL_HEADER +#undef IS_STACK_FRAMES +#undef IS_WITH_CONTEXT +#undef GET_STACK_TRACE_OR_FRAMES + +#define IS_STACK_FRAMES 0 +#define IS_WITH_CONTEXT 1 +#define GET_STACK_TRACE_OR_FRAMES \ + SIS_CONCAT(GetStackTraceWithContext_, GST_SUFFIX)(void **result, int max_depth, \ + int skip_count, const void *ucp) +#include STACKTRACE_INL_HEADER +#undef IS_STACK_FRAMES +#undef IS_WITH_CONTEXT +#undef GET_STACK_TRACE_OR_FRAMES + +#define IS_STACK_FRAMES 1 +#define IS_WITH_CONTEXT 1 +#define GET_STACK_TRACE_OR_FRAMES \ + SIS_CONCAT(GetStackFramesWithContext_, GST_SUFFIX)(void **result, int *sizes, int max_depth, \ + int skip_count, const void *ucp) +#include STACKTRACE_INL_HEADER +#undef IS_STACK_FRAMES +#undef IS_WITH_CONTEXT +#undef GET_STACK_TRACE_OR_FRAMES + +static GetStackImplementation SIS_CONCAT(impl__,GST_SUFFIX) = { + SIS_CONCAT(GetStackFrames_, GST_SUFFIX), + SIS_CONCAT(GetStackFramesWithContext_, GST_SUFFIX), + SIS_CONCAT(GetStackTrace_, GST_SUFFIX), + SIS_CONCAT(GetStackTraceWithContext_, GST_SUFFIX), + SIS_STRINGIFY(GST_SUFFIX) +}; + +#undef SIS_CONCAT2 +#undef SIS_CONCAT diff --git a/src/third_party/gperftools-2.5/src/stacktrace_instrument-inl.h b/src/third_party/gperftools-2.5/src/stacktrace_instrument-inl.h new file mode 100755 index 00000000000..c631765c8a2 --- /dev/null +++ b/src/third_party/gperftools-2.5/src/stacktrace_instrument-inl.h @@ -0,0 +1,155 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2013, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Jean Lee <xiaoyur347@gmail.com> +// based on gcc Code-Gen-Options "-finstrument-functions" listed in +// http://gcc.gnu.org/onlinedocs/gcc/Code-Gen-Options.html . +// Should run configure with CXXFLAGS="-finstrument-functions". + +// This file is a backtrace implementation for systems : +// * The glibc implementation of backtrace() may cause a call to malloc, +// and cause a deadlock in HeapProfiler. +// * The libunwind implementation prints no backtrace. + +// The backtrace arrays are stored in "thread_back_trace" variable. +// Maybe to use thread local storage is better and should save memorys. + +#ifndef BASE_STACKTRACE_INSTRUMENT_INL_H_ +#define BASE_STACKTRACE_INSTRUMENT_INL_H_ +// Note: this file is included into stacktrace.cc more than once. +// Anything that should only be defined once should be here: + +#include <execinfo.h> +#include <string.h> +#include <unistd.h> +#include <sys/syscall.h> +#include "gperftools/stacktrace.h" + +#define gettid() syscall(__NR_gettid) +#ifndef __x86_64__ +#define MAX_THREAD (32768) +#else +#define MAX_THREAD (65536) +#endif +#define MAX_DEPTH (30) +#define ATTRIBUTE_NOINSTRUMENT __attribute__ ((no_instrument_function)) + +typedef struct { + int stack_depth; + void* frame[MAX_DEPTH]; +}BACK_TRACE; + +static BACK_TRACE thread_back_trace[MAX_THREAD]; +extern "C" { +void __cyg_profile_func_enter(void *func_address, + void *call_site) ATTRIBUTE_NOINSTRUMENT; +void __cyg_profile_func_enter(void *func_address, void *call_site) { + (void)func_address; + + BACK_TRACE* backtrace = thread_back_trace + gettid(); + int stack_depth = backtrace->stack_depth; + backtrace->stack_depth = stack_depth + 1; + if ( stack_depth >= MAX_DEPTH ) { + return; + } + backtrace->frame[stack_depth] = call_site; +} + +void __cyg_profile_func_exit(void *func_address, + void *call_site) ATTRIBUTE_NOINSTRUMENT; +void __cyg_profile_func_exit(void *func_address, void *call_site) { + (void)func_address; + (void)call_site; + + BACK_TRACE* backtrace = thread_back_trace + gettid(); + int stack_depth = backtrace->stack_depth; + backtrace->stack_depth = stack_depth - 1; + if ( stack_depth >= MAX_DEPTH ) { + return; + } + backtrace->frame[stack_depth] = 0; +} +} // extern "C" + +static int cyg_backtrace(void **buffer, int size) { + BACK_TRACE* backtrace = thread_back_trace + gettid(); + int stack_depth = backtrace->stack_depth; + if ( stack_depth >= MAX_DEPTH ) { + stack_depth = MAX_DEPTH; + } + int nSize = (size > stack_depth) ? stack_depth : size; + for (int i = 0; i < nSize; i++) { + buffer[i] = backtrace->frame[nSize - i - 1]; + } + + return nSize; +} + +#endif // BASE_STACKTRACE_INSTRUMENT_INL_H_ + + +// Note: this part of the file is included several times. +// Do not put globals below. + +// The following 4 functions are generated from the code below: +// GetStack{Trace,Frames}() +// GetStack{Trace,Frames}WithContext() +// +// These functions take the following args: +// void** result: the stack-trace, as an array +// int* sizes: the size of each stack frame, as an array +// (GetStackFrames* only) +// int max_depth: the size of the result (and sizes) array(s) +// int skip_count: how many stack pointers to skip before storing in result +// void* ucp: a ucontext_t* (GetStack{Trace,Frames}WithContext only) +static int GET_STACK_TRACE_OR_FRAMES { + static const int kStackLength = 64; + void * stack[kStackLength]; + int size; + memset(stack, 0, sizeof(stack)); + + size = cyg_backtrace(stack, kStackLength); + skip_count += 2; // we want to skip the current and parent frame as well + int result_count = size - skip_count; + if (result_count < 0) + result_count = 0; + if (result_count > max_depth) + result_count = max_depth; + for (int i = 0; i < result_count; i++) + result[i] = stack[i + skip_count]; + +#if IS_STACK_FRAMES + // No implementation for finding out the stack frame sizes yet. + memset(sizes, 0, sizeof(*sizes) * result_count); +#endif + + return result_count; +} diff --git a/src/third_party/gperftools-2.5/src/stacktrace_libgcc-inl.h b/src/third_party/gperftools-2.5/src/stacktrace_libgcc-inl.h new file mode 100644 index 00000000000..ce9cf5196ad --- /dev/null +++ b/src/third_party/gperftools-2.5/src/stacktrace_libgcc-inl.h @@ -0,0 +1,111 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2016, gperftools Contributors +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// This file implements backtrace capturing via libgcc's +// _Unwind_Backtrace. This generally works almost always. It will fail +// sometimes when we're trying to capture backtrace from signal +// handler (i.e. in cpu profiler) while some C++ code is throwing +// exception. + +#ifndef BASE_STACKTRACE_LIBGCC_INL_H_ +#define BASE_STACKTRACE_LIBGCC_INL_H_ +// Note: this file is included into stacktrace.cc more than once. +// Anything that should only be defined once should be here: + +extern "C" { +#include <assert.h> +#include <string.h> // for memset() +} + +#include <unwind.h> + +#include "gperftools/stacktrace.h" + +struct libgcc_backtrace_data { + void **array; + int skip; + int pos; + int limit; +}; + +static _Unwind_Reason_Code libgcc_backtrace_helper(struct _Unwind_Context *ctx, + void *_data) { + libgcc_backtrace_data *data = + reinterpret_cast<libgcc_backtrace_data *>(_data); + + if (data->skip > 0) { + data->skip--; + return _URC_NO_REASON; + } + + if (data->pos < data->limit) { + void *ip = reinterpret_cast<void *>(_Unwind_GetIP(ctx));; + data->array[data->pos++] = ip; + } + + return _URC_NO_REASON; +} + +#endif // BASE_STACKTRACE_LIBGCC_INL_H_ + +// Note: this part of the file is included several times. +// Do not put globals below. + +// The following 4 functions are generated from the code below: +// GetStack{Trace,Frames}() +// GetStack{Trace,Frames}WithContext() +// +// These functions take the following args: +// void** result: the stack-trace, as an array +// int* sizes: the size of each stack frame, as an array +// (GetStackFrames* only) +// int max_depth: the size of the result (and sizes) array(s) +// int skip_count: how many stack pointers to skip before storing in result +// void* ucp: a ucontext_t* (GetStack{Trace,Frames}WithContext only) +static int GET_STACK_TRACE_OR_FRAMES { + libgcc_backtrace_data data; + data.array = result; + // we're also skipping current and parent's frame + data.skip = skip_count + 2; + data.pos = 0; + data.limit = max_depth; + + _Unwind_Backtrace(libgcc_backtrace_helper, &data); + + if (data.pos > 1 && data.array[data.pos - 1] == NULL) + --data.pos; + +#if IS_STACK_FRAMES + // No implementation for finding out the stack frame sizes. + memset(sizes, 0, sizeof(*sizes) * data.pos); +#endif + + return data.pos; +} diff --git a/src/third_party/gperftools-2.5/src/stacktrace_libunwind-inl.h b/src/third_party/gperftools-2.5/src/stacktrace_libunwind-inl.h new file mode 100644 index 00000000000..8a4a731b143 --- /dev/null +++ b/src/third_party/gperftools-2.5/src/stacktrace_libunwind-inl.h @@ -0,0 +1,150 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Arun Sharma +// +// Produce stack trace using libunwind + +#ifndef BASE_STACKTRACE_LIBINWIND_INL_H_ +#define BASE_STACKTRACE_LIBINWIND_INL_H_ +// Note: this file is included into stacktrace.cc more than once. +// Anything that should only be defined once should be here: + +// We only need local unwinder. +#define UNW_LOCAL_ONLY + +extern "C" { +#include <assert.h> +#include <string.h> // for memset() +#include <libunwind.h> +} +#include "gperftools/stacktrace.h" +#include "base/logging.h" + +// Sometimes, we can try to get a stack trace from within a stack +// trace, because libunwind can call mmap (maybe indirectly via an +// internal mmap based memory allocator), and that mmap gets trapped +// and causes a stack-trace request. If were to try to honor that +// recursive request, we'd end up with infinite recursion or deadlock. +// Luckily, it's safe to ignore those subsequent traces. In such +// cases, we return 0 to indicate the situation. +static __thread int recursive; + +#if defined(TCMALLOC_ENABLE_UNWIND_FROM_UCONTEXT) && (defined(__i386__) || defined(__x86_64__)) && defined(__GNU_LIBRARY__) +#define BASE_STACKTRACE_UNW_CONTEXT_IS_UCONTEXT 1 +#endif + +#endif // BASE_STACKTRACE_LIBINWIND_INL_H_ + +// Note: this part of the file is included several times. +// Do not put globals below. + +// The following 4 functions are generated from the code below: +// GetStack{Trace,Frames}() +// GetStack{Trace,Frames}WithContext() +// +// These functions take the following args: +// void** result: the stack-trace, as an array +// int* sizes: the size of each stack frame, as an array +// (GetStackFrames* only) +// int max_depth: the size of the result (and sizes) array(s) +// int skip_count: how many stack pointers to skip before storing in result +// void* ucp: a ucontext_t* (GetStack{Trace,Frames}WithContext only) +static int GET_STACK_TRACE_OR_FRAMES { + void *ip; + int n = 0; + unw_cursor_t cursor; + unw_context_t uc; +#if IS_STACK_FRAMES + unw_word_t sp = 0, next_sp = 0; +#endif + + if (recursive) { + return 0; + } + ++recursive; + +#if (IS_WITH_CONTEXT && defined(BASE_STACKTRACE_UNW_CONTEXT_IS_UCONTEXT)) + if (ucp) { + uc = *(static_cast<unw_context_t *>(const_cast<void *>(ucp))); + /* this is a bit weird. profiler.cc calls us with signal's ucontext + * yet passing us 2 as skip_count and essentially assuming we won't + * use ucontext. */ + /* In order to fix that I'm going to assume that if ucp is + * non-null we're asked to ignore skip_count in case we're + * able to use ucp */ + skip_count = 0; + } else { + unw_getcontext(&uc); + skip_count += 2; // Do not include current and parent frame + } +#else + unw_getcontext(&uc); + skip_count += 2; // Do not include current and parent frame +#endif + + int ret = unw_init_local(&cursor, &uc); + assert(ret >= 0); + + while (skip_count--) { + if (unw_step(&cursor) <= 0) { + goto out; + } +#if IS_STACK_FRAMES + if (unw_get_reg(&cursor, UNW_REG_SP, &next_sp)) { + goto out; + } +#endif + } + + while (n < max_depth) { + if (unw_get_reg(&cursor, UNW_REG_IP, (unw_word_t *) &ip) < 0) { + break; + } +#if IS_STACK_FRAMES + sizes[n] = 0; +#endif + result[n++] = ip; + if (unw_step(&cursor) <= 0) { + break; + } +#if IS_STACK_FRAMES + sp = next_sp; + if (unw_get_reg(&cursor, UNW_REG_SP, &next_sp) , 0) { + break; + } + sizes[n - 1] = next_sp - sp; +#endif + } +out: + --recursive; + return n; +} diff --git a/src/third_party/gperftools-2.5/src/stacktrace_powerpc-darwin-inl.h b/src/third_party/gperftools-2.5/src/stacktrace_powerpc-darwin-inl.h new file mode 100644 index 00000000000..c4c2edbc535 --- /dev/null +++ b/src/third_party/gperftools-2.5/src/stacktrace_powerpc-darwin-inl.h @@ -0,0 +1,158 @@ +// Copyright (c) 2007, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Produce stack trace. ABI documentation reference can be found at: +// * PowerPC32 ABI: https://www.power.org/documentation/ +// power-architecture-32-bit-abi-supplement-1-0-embeddedlinuxunified/ +// * PowerPC64 ABI: +// http://www.linux-foundation.org/spec/ELF/ppc64/PPC-elf64abi-1.9.html#STACK + +#ifndef BASE_STACKTRACE_POWERPC_INL_H_ +#define BASE_STACKTRACE_POWERPC_INL_H_ +// Note: this file is included into stacktrace.cc more than once. +// Anything that should only be defined once should be here: + +#include <stdint.h> // for uintptr_t +#include <stdlib.h> // for NULL +#include <gperftools/stacktrace.h> + +// Given a pointer to a stack frame, locate and return the calling +// stackframe, or return NULL if no stackframe can be found. Perform sanity +// checks (the strictness of which is controlled by the boolean parameter +// "STRICT_UNWINDING") to reduce the chance that a bad pointer is returned. +template<bool STRICT_UNWINDING> +static void **NextStackFrame(void **old_sp) { + void **new_sp = (void **) *old_sp; + + // Check that the transition from frame pointer old_sp to frame + // pointer new_sp isn't clearly bogus + if (STRICT_UNWINDING) { + // With the stack growing downwards, older stack frame must be + // at a greater address that the current one. + if (new_sp <= old_sp) return NULL; + // Assume stack frames larger than 100,000 bytes are bogus. + if ((uintptr_t)new_sp - (uintptr_t)old_sp > 100000) return NULL; + } else { + // In the non-strict mode, allow discontiguous stack frames. + // (alternate-signal-stacks for example). + if (new_sp == old_sp) return NULL; + // And allow frames upto about 1MB. + if ((new_sp > old_sp) + && ((uintptr_t)new_sp - (uintptr_t)old_sp > 1000000)) return NULL; + } + if ((uintptr_t)new_sp & (sizeof(void *) - 1)) return NULL; + return new_sp; +} + +// This ensures that GetStackTrace stes up the Link Register properly. +void StacktracePowerPCDummyFunction() __attribute__((noinline)); +void StacktracePowerPCDummyFunction() { __asm__ volatile(""); } +#endif // BASE_STACKTRACE_POWERPC_INL_H_ + +// Note: this part of the file is included several times. +// Do not put globals below. + +// The following 4 functions are generated from the code below: +// GetStack{Trace,Frames}() +// GetStack{Trace,Frames}WithContext() +// +// These functions take the following args: +// void** result: the stack-trace, as an array +// int* sizes: the size of each stack frame, as an array +// (GetStackFrames* only) +// int max_depth: the size of the result (and sizes) array(s) +// int skip_count: how many stack pointers to skip before storing in result +// void* ucp: a ucontext_t* (GetStack{Trace,Frames}WithContext only) +int GET_STACK_TRACE_OR_FRAMES { + void **sp; + // Apple OS X uses an old version of gnu as -- both Darwin 7.9.0 (Panther) + // and Darwin 8.8.1 (Tiger) use as 1.38. This means we have to use a + // different asm syntax. I don't know quite the best way to discriminate + // systems using the old as from the new one; I've gone with __APPLE__. + // TODO(csilvers): use autoconf instead, to look for 'as --version' == 1 or 2 + __asm__ volatile ("mr %0,r1" : "=r" (sp)); + + // On PowerPC, the "Link Register" or "Link Record" (LR), is a stack + // entry that holds the return address of the subroutine call (what + // instruction we run after our function finishes). This is the + // same as the stack-pointer of our parent routine, which is what we + // want here. While the compiler will always(?) set up LR for + // subroutine calls, it may not for leaf functions (such as this one). + // This routine forces the compiler (at least gcc) to push it anyway. + StacktracePowerPCDummyFunction(); + +#if IS_STACK_FRAMES + // Note we do *not* increment skip_count here for the SYSV ABI. If + // we did, the list of stack frames wouldn't properly match up with + // the list of return addresses. Note this means the top pc entry + // is probably bogus for linux/ppc (and other SYSV-ABI systems). +#else + // The LR save area is used by the callee, so the top entry is bogus. + skip_count++; +#endif + + int n = 0; + while (sp && n < max_depth) { + // The GetStackFrames routine is called when we are in some + // informational context (the failure signal handler for example). + // Use the non-strict unwinding rules to produce a stack trace + // that is as complete as possible (even if it contains a few + // bogus entries in some rare cases). + void **next_sp = NextStackFrame<!IS_STACK_FRAMES>(sp); + + if (skip_count > 0) { + skip_count--; + } else { + // PowerPC has 3 main ABIs, which say where in the stack the + // Link Register is. For DARWIN and AIX (used by apple and + // linux ppc64), it's in sp[2]. For SYSV (used by linux ppc), + // it's in sp[1]. +#if defined(__PPC64__) + // This check is in case the compiler doesn't define _CALL_AIX/etc. + result[n] = *(sp+2); +#elif defined(__linux) + // This check is in case the compiler doesn't define _CALL_SYSV. + result[n] = *(sp+1); +#endif + +#if IS_STACK_FRAMES + if (next_sp > sp) { + sizes[n] = (uintptr_t)next_sp - (uintptr_t)sp; + } else { + // A frame-size of 0 is used to indicate unknown frame size. + sizes[n] = 0; + } +#endif + n++; + } + sp = next_sp; + } + return n; +} diff --git a/src/third_party/gperftools-2.5/src/stacktrace_powerpc-inl.h b/src/third_party/gperftools-2.5/src/stacktrace_powerpc-inl.h new file mode 100644 index 00000000000..811d6cc97ee --- /dev/null +++ b/src/third_party/gperftools-2.5/src/stacktrace_powerpc-inl.h @@ -0,0 +1,176 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2007, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Craig Silverstein +// +// Produce stack trace. I'm guessing (hoping!) the code is much like +// for x86. For apple machines, at least, it seems to be; see +// http://developer.apple.com/documentation/mac/runtimehtml/RTArch-59.html +// http://www.linux-foundation.org/spec/ELF/ppc64/PPC-elf64abi-1.9.html#STACK +// Linux has similar code: http://patchwork.ozlabs.org/linuxppc/patch?id=8882 + +#ifndef BASE_STACKTRACE_POWERPC_INL_H_ +#define BASE_STACKTRACE_POWERPC_INL_H_ +// Note: this file is included into stacktrace.cc more than once. +// Anything that should only be defined once should be here: + +#include <stdint.h> // for uintptr_t +#include <stdlib.h> // for NULL +#include <gperftools/stacktrace.h> + +struct layout_ppc { + struct layout_ppc *next; +#if defined(__APPLE__) || (defined(__linux) && defined(__PPC64__)) + long condition_register; +#endif + void *return_addr; +}; + +// Given a pointer to a stack frame, locate and return the calling +// stackframe, or return NULL if no stackframe can be found. Perform sanity +// checks (the strictness of which is controlled by the boolean parameter +// "STRICT_UNWINDING") to reduce the chance that a bad pointer is returned. +template<bool STRICT_UNWINDING> +static layout_ppc *NextStackFrame(layout_ppc *current) { + uintptr_t old_sp = (uintptr_t)(current); + uintptr_t new_sp = (uintptr_t)(current->next); + + // Check that the transition from frame pointer old_sp to frame + // pointer new_sp isn't clearly bogus + if (STRICT_UNWINDING) { + // With the stack growing downwards, older stack frame must be + // at a greater address that the current one. + if (new_sp <= old_sp) + return NULL; + // Assume stack frames larger than 100,000 bytes are bogus. + if (new_sp - old_sp > 100000) + return NULL; + } else { + // In the non-strict mode, allow discontiguous stack frames. + // (alternate-signal-stacks for example). + if (new_sp == old_sp) + return NULL; + // And allow frames upto about 1MB. + if ((new_sp > old_sp) && (new_sp - old_sp > 1000000)) + return NULL; + } + if (new_sp & (sizeof(void *) - 1)) + return NULL; + return current->next; +} + +// This ensures that GetStackTrace stes up the Link Register properly. +void StacktracePowerPCDummyFunction() __attribute__((noinline)); +void StacktracePowerPCDummyFunction() { __asm__ volatile(""); } +#endif // BASE_STACKTRACE_POWERPC_INL_H_ + +// Note: this part of the file is included several times. +// Do not put globals below. + +// Load instruction used on top-of-stack get. +#if defined(__PPC64__) || defined(__LP64__) +# define LOAD "ld" +#else +# define LOAD "lwz" +#endif + +#if defined(__linux__) && defined(__PPC__) +# define TOP_STACK "%0,0(1)" +#elif defined(__MACH__) && defined(__APPLE__) +// Apple OS X uses an old version of gnu as -- both Darwin 7.9.0 (Panther) +// and Darwin 8.8.1 (Tiger) use as 1.38. This means we have to use a +// different asm syntax. I don't know quite the best way to discriminate +// systems using the old as from the new one; I've gone with __APPLE__. +// TODO(csilvers): use autoconf instead, to look for 'as --version' == 1 or 2 +# define TOP_STACK "%0,0(r1)" +#endif + + + +// The following 4 functions are generated from the code below: +// GetStack{Trace,Frames}() +// GetStack{Trace,Frames}WithContext() +// +// These functions take the following args: +// void** result: the stack-trace, as an array +// int* sizes: the size of each stack frame, as an array +// (GetStackFrames* only) +// int max_depth: the size of the result (and sizes) array(s) +// int skip_count: how many stack pointers to skip before storing in result +// void* ucp: a ucontext_t* (GetStack{Trace,Frames}WithContext only) +static int GET_STACK_TRACE_OR_FRAMES { + layout_ppc *current; + int n; + + // Force GCC to spill LR. + asm volatile ("" : "=l"(current)); + + // Get the address on top-of-stack + asm volatile (LOAD " " TOP_STACK : "=r"(current)); + + StacktracePowerPCDummyFunction(); + + n = 0; + skip_count++; // skip parent's frame due to indirection in + // stacktrace.cc + while (current && n < max_depth) { + + // The GetStackFrames routine is called when we are in some + // informational context (the failure signal handler for example). + // Use the non-strict unwinding rules to produce a stack trace + // that is as complete as possible (even if it contains a few + // bogus entries in some rare cases). + layout_ppc *next = NextStackFrame<!IS_STACK_FRAMES>(current); + if (skip_count > 0) { + skip_count--; + } else { + result[n] = current->return_addr; +#if IS_STACK_FRAMES + if (next > current) { + sizes[n] = (uintptr_t)next - (uintptr_t)current; + } else { + // A frame-size of 0 is used to indicate unknown frame size. + sizes[n] = 0; + } +#endif + n++; + } + current = next; + } + + // It's possible the second-last stack frame can't return + // (that is, it's __libc_start_main), in which case + // the CRT startup code will have set its LR to 'NULL'. + if (n > 0 && result[n-1] == NULL) + n--; + + return n; +} diff --git a/src/third_party/gperftools-2.5/src/stacktrace_powerpc-linux-inl.h b/src/third_party/gperftools-2.5/src/stacktrace_powerpc-linux-inl.h new file mode 100644 index 00000000000..5d16fa1283b --- /dev/null +++ b/src/third_party/gperftools-2.5/src/stacktrace_powerpc-linux-inl.h @@ -0,0 +1,231 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2007, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Craig Silverstein +// +// Produce stack trace. ABI documentation reference can be found at: +// * PowerPC32 ABI: https://www.power.org/documentation/ +// power-architecture-32-bit-abi-supplement-1-0-embeddedlinuxunified/ +// * PowerPC64 ABI: +// http://www.linux-foundation.org/spec/ELF/ppc64/PPC-elf64abi-1.9.html#STACK + +#ifndef BASE_STACKTRACE_POWERPC_INL_H_ +#define BASE_STACKTRACE_POWERPC_INL_H_ +// Note: this file is included into stacktrace.cc more than once. +// Anything that should only be defined once should be here: + +#include <stdint.h> // for uintptr_t +#include <stdlib.h> // for NULL +#include <gperftools/stacktrace.h> +#include <base/vdso_support.h> + +#if defined(HAVE_SYS_UCONTEXT_H) +#include <sys/ucontext.h> +#elif defined(HAVE_UCONTEXT_H) +#include <ucontext.h> // for ucontext_t +#endif +typedef ucontext ucontext_t; + +// PowerPC64 Little Endian follows BE wrt. backchain, condition register, +// and LR save area, so no need to adjust the reading struct. +struct layout_ppc { + struct layout_ppc *next; +#ifdef __PPC64__ + long condition_register; +#endif + void *return_addr; +}; + +// Signal callbacks are handled by the vDSO symbol: +// +// * PowerPC64 Linux (arch/powerpc/kernel/vdso64/sigtramp.S): +// __kernel_sigtramp_rt64 +// * PowerPC32 Linux (arch/powerpc/kernel/vdso32/sigtramp.S): +// __kernel_sigtramp32 +// __kernel_sigtramp_rt32 +// +// So a backtrace may need to specially handling if the symbol readed is +// the signal trampoline. + +// Given a pointer to a stack frame, locate and return the calling +// stackframe, or return NULL if no stackframe can be found. Perform sanity +// checks (the strictness of which is controlled by the boolean parameter +// "STRICT_UNWINDING") to reduce the chance that a bad pointer is returned. +template<bool STRICT_UNWINDING> +static layout_ppc *NextStackFrame(layout_ppc *current) { + uintptr_t old_sp = (uintptr_t)(current); + uintptr_t new_sp = (uintptr_t)(current->next); + + // Check that the transition from frame pointer old_sp to frame + // pointer new_sp isn't clearly bogus + if (STRICT_UNWINDING) { + // With the stack growing downwards, older stack frame must be + // at a greater address that the current one. + if (new_sp <= old_sp) + return NULL; + // Assume stack frames larger than 100,000 bytes are bogus. + if (new_sp - old_sp > 100000) + return NULL; + } else { + // In the non-strict mode, allow discontiguous stack frames. + // (alternate-signal-stacks for example). + if (new_sp == old_sp) + return NULL; + // And allow frames upto about 1MB. + if ((new_sp > old_sp) && (new_sp - old_sp > 1000000)) + return NULL; + } + if (new_sp & (sizeof(void *) - 1)) + return NULL; + return current->next; +} + +// This ensures that GetStackTrace stes up the Link Register properly. +void StacktracePowerPCDummyFunction() __attribute__((noinline)); +void StacktracePowerPCDummyFunction() { __asm__ volatile(""); } +#endif // BASE_STACKTRACE_POWERPC_INL_H_ + +// Note: this part of the file is included several times. +// Do not put globals below. + +// Load instruction used on top-of-stack get. +#if defined(__PPC64__) || defined(__LP64__) +# define LOAD "ld" +#else +# define LOAD "lwz" +#endif + +// The following 4 functions are generated from the code below: +// GetStack{Trace,Frames}() +// GetStack{Trace,Frames}WithContext() +// +// These functions take the following args: +// void** result: the stack-trace, as an array +// int* sizes: the size of each stack frame, as an array +// (GetStackFrames* only) +// int max_depth: the size of the result (and sizes) array(s) +// int skip_count: how many stack pointers to skip before storing in result +// void* ucp: a ucontext_t* (GetStack{Trace,Frames}WithContext only) +static int GET_STACK_TRACE_OR_FRAMES { + layout_ppc *current; + int n; + + // Get the address on top-of-stack + current = reinterpret_cast<layout_ppc*> (__builtin_frame_address (0)); + // And ignore the current symbol + current = current->next; + + StacktracePowerPCDummyFunction(); + + n = 0; + skip_count++; // skip parent's frame due to indirection in + // stacktrace.cc + + base::VDSOSupport vdso; + base::ElfMemImage::SymbolInfo rt_sigreturn_symbol_info; +#ifdef __PPC64__ + const void *sigtramp64_vdso = 0; + if (vdso.LookupSymbol("__kernel_sigtramp_rt64", "LINUX_2.6.15", STT_NOTYPE, + &rt_sigreturn_symbol_info)) + sigtramp64_vdso = rt_sigreturn_symbol_info.address; +#else + const void *sigtramp32_vdso = 0; + if (vdso.LookupSymbol("__kernel_sigtramp32", "LINUX_2.6.15", STT_NOTYPE, + &rt_sigreturn_symbol_info)) + sigtramp32_vdso = rt_sigreturn_symbol_info.address; + const void *sigtramp32_rt_vdso = 0; + if (vdso.LookupSymbol("__kernel_sigtramp_rt32", "LINUX_2.6.15", STT_NOTYPE, + &rt_sigreturn_symbol_info)) + sigtramp32_rt_vdso = rt_sigreturn_symbol_info.address; +#endif + + while (current && n < max_depth) { + + // The GetStackFrames routine is called when we are in some + // informational context (the failure signal handler for example). + // Use the non-strict unwinding rules to produce a stack trace + // that is as complete as possible (even if it contains a few + // bogus entries in some rare cases). + layout_ppc *next = NextStackFrame<!IS_STACK_FRAMES>(current); + if (skip_count > 0) { + skip_count--; + } else { + result[n] = current->return_addr; +#ifdef __PPC64__ + if (sigtramp64_vdso && (sigtramp64_vdso == current->return_addr)) { + struct signal_frame_64 { + char dummy[128]; + ucontext_t uc; + // We don't care about the rest, since the IP value is at 'uc' field. + } *sigframe = reinterpret_cast<signal_frame_64*>(current); + result[n] = (void*) sigframe->uc.uc_mcontext.gp_regs[PT_NIP]; + } +#else + if (sigtramp32_vdso && (sigtramp32_vdso == current->return_addr)) { + struct signal_frame_32 { + char dummy[64]; + struct sigcontext sctx; + mcontext_t mctx; + // We don't care about the rest, since IP value is at 'mctx' field. + } *sigframe = reinterpret_cast<signal_frame_32*>(current); + result[n] = (void*) sigframe->mctx.gregs[PT_NIP]; + } else if (sigtramp32_rt_vdso && (sigtramp32_rt_vdso == current->return_addr)) { + struct rt_signal_frame_32 { + char dummy[64 + 16]; + siginfo_t info; + struct ucontext uc; + // We don't care about the rest, since IP value is at 'uc' field.A + } *sigframe = reinterpret_cast<rt_signal_frame_32*>(current); + result[n] = (void*) sigframe->uc.uc_mcontext.uc_regs->gregs[PT_NIP]; + } +#endif + +#if IS_STACK_FRAMES + if (next > current) { + sizes[n] = (uintptr_t)next - (uintptr_t)current; + } else { + // A frame-size of 0 is used to indicate unknown frame size. + sizes[n] = 0; + } +#endif + n++; + } + current = next; + } + + // It's possible the second-last stack frame can't return + // (that is, it's __libc_start_main), in which case + // the CRT startup code will have set its LR to 'NULL'. + if (n > 0 && result[n-1] == NULL) + n--; + + return n; +} diff --git a/src/third_party/gperftools-2.5/src/stacktrace_win32-inl.h b/src/third_party/gperftools-2.5/src/stacktrace_win32-inl.h new file mode 100644 index 00000000000..663e9a5bfb6 --- /dev/null +++ b/src/third_party/gperftools-2.5/src/stacktrace_win32-inl.h @@ -0,0 +1,107 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// --- +// Produces a stack trace for Windows. Normally, one could use +// stacktrace_x86-inl.h or stacktrace_x86_64-inl.h -- and indeed, that +// should work for binaries compiled using MSVC in "debug" mode. +// However, in "release" mode, Windows uses frame-pointer +// optimization, which makes getting a stack trace very difficult. +// +// There are several approaches one can take. One is to use Windows +// intrinsics like StackWalk64. These can work, but have restrictions +// on how successful they can be. Another attempt is to write a +// version of stacktrace_x86-inl.h that has heuristic support for +// dealing with FPO, similar to what WinDbg does (see +// http://www.nynaeve.net/?p=97). +// +// The solution we've ended up doing is to call the undocumented +// windows function RtlCaptureStackBackTrace, which probably doesn't +// work with FPO but at least is fast, and doesn't require a symbol +// server. +// +// This code is inspired by a patch from David Vitek: +// http://code.google.com/p/gperftools/issues/detail?id=83 + +#ifndef BASE_STACKTRACE_WIN32_INL_H_ +#define BASE_STACKTRACE_WIN32_INL_H_ +// Note: this file is included into stacktrace.cc more than once. +// Anything that should only be defined once should be here: + +#include "config.h" +#include <windows.h> // for GetProcAddress and GetModuleHandle +#include <assert.h> + +typedef USHORT NTAPI RtlCaptureStackBackTrace_Function( + IN ULONG frames_to_skip, + IN ULONG frames_to_capture, + OUT PVOID *backtrace, + OUT PULONG backtrace_hash); + +// Load the function we need at static init time, where we don't have +// to worry about someone else holding the loader's lock. +static RtlCaptureStackBackTrace_Function* const RtlCaptureStackBackTrace_fn = + (RtlCaptureStackBackTrace_Function*) + GetProcAddress(GetModuleHandleA("ntdll.dll"), "RtlCaptureStackBackTrace"); + +static int GetStackTrace_win32(void** result, int max_depth, + int skip_count) { + if (!RtlCaptureStackBackTrace_fn) { + // TODO(csilvers): should we log an error here? + return 0; // can't find a stacktrace with no function to call + } + return (int)RtlCaptureStackBackTrace_fn(skip_count + 3, max_depth, + result, 0); +} + +static int not_implemented(void) { + assert(0 == "Not yet implemented"); + return 0; +} + +static int GetStackFrames_win32(void** /* pcs */, + int* /* sizes */, + int /* max_depth */, + int /* skip_count */) { + return not_implemented(); +} + +static int GetStackFramesWithContext_win32(void** result, int* sizes, int max_depth, + int skip_count, const void *uc) { + return not_implemented(); +} + +static int GetStackTraceWithContext_win32(void** result, int max_depth, + int skip_count, const void *uc) { + return not_implemented(); +} + + +#endif // BASE_STACKTRACE_WIN32_INL_H_ diff --git a/src/third_party/gperftools-2.5/src/stacktrace_x86-inl.h b/src/third_party/gperftools-2.5/src/stacktrace_x86-inl.h new file mode 100644 index 00000000000..46eb5d82d71 --- /dev/null +++ b/src/third_party/gperftools-2.5/src/stacktrace_x86-inl.h @@ -0,0 +1,354 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Sanjay Ghemawat +// +// Produce stack trace + +#ifndef BASE_STACKTRACE_X86_INL_H_ +#define BASE_STACKTRACE_X86_INL_H_ +// Note: this file is included into stacktrace.cc more than once. +// Anything that should only be defined once should be here: + +#include "config.h" +#include <stdlib.h> // for NULL +#include <assert.h> +#if defined(HAVE_SYS_UCONTEXT_H) +#include <sys/ucontext.h> +#elif defined(HAVE_UCONTEXT_H) +#include <ucontext.h> // for ucontext_t +#elif defined(HAVE_CYGWIN_SIGNAL_H) +// cygwin/signal.h has a buglet where it uses pthread_attr_t without +// #including <pthread.h> itself. So we have to do it. +# ifdef HAVE_PTHREAD +# include <pthread.h> +# endif +#include <cygwin/signal.h> +typedef ucontext ucontext_t; +#endif +#ifdef HAVE_STDINT_H +#include <stdint.h> // for uintptr_t +#endif +#ifdef HAVE_UNISTD_H +#include <unistd.h> +#endif +#ifdef HAVE_MMAP +#include <sys/mman.h> // for msync +#include "base/vdso_support.h" +#endif + +#include "gperftools/stacktrace.h" + +#if defined(__linux__) && defined(__i386__) && defined(__ELF__) && defined(HAVE_MMAP) +// Count "push %reg" instructions in VDSO __kernel_vsyscall(), +// preceding "syscall" or "sysenter". +// If __kernel_vsyscall uses frame pointer, answer 0. +// +// kMaxBytes tells how many instruction bytes of __kernel_vsyscall +// to analyze before giving up. Up to kMaxBytes+1 bytes of +// instructions could be accessed. +// +// Here are known __kernel_vsyscall instruction sequences: +// +// SYSENTER (linux-2.6.26/arch/x86/vdso/vdso32/sysenter.S). +// Used on Intel. +// 0xffffe400 <__kernel_vsyscall+0>: push %ecx +// 0xffffe401 <__kernel_vsyscall+1>: push %edx +// 0xffffe402 <__kernel_vsyscall+2>: push %ebp +// 0xffffe403 <__kernel_vsyscall+3>: mov %esp,%ebp +// 0xffffe405 <__kernel_vsyscall+5>: sysenter +// +// SYSCALL (see linux-2.6.26/arch/x86/vdso/vdso32/syscall.S). +// Used on AMD. +// 0xffffe400 <__kernel_vsyscall+0>: push %ebp +// 0xffffe401 <__kernel_vsyscall+1>: mov %ecx,%ebp +// 0xffffe403 <__kernel_vsyscall+3>: syscall +// +// i386 (see linux-2.6.26/arch/x86/vdso/vdso32/int80.S) +// 0xffffe400 <__kernel_vsyscall+0>: int $0x80 +// 0xffffe401 <__kernel_vsyscall+1>: ret +// +static const int kMaxBytes = 10; + +// We use assert()s instead of DCHECK()s -- this is too low level +// for DCHECK(). + +static int CountPushInstructions(const unsigned char *const addr) { + int result = 0; + for (int i = 0; i < kMaxBytes; ++i) { + if (addr[i] == 0x89) { + // "mov reg,reg" + if (addr[i + 1] == 0xE5) { + // Found "mov %esp,%ebp". + return 0; + } + ++i; // Skip register encoding byte. + } else if (addr[i] == 0x0F && + (addr[i + 1] == 0x34 || addr[i + 1] == 0x05)) { + // Found "sysenter" or "syscall". + return result; + } else if ((addr[i] & 0xF0) == 0x50) { + // Found "push %reg". + ++result; + } else if (addr[i] == 0xCD && addr[i + 1] == 0x80) { + // Found "int $0x80" + assert(result == 0); + return 0; + } else { + // Unexpected instruction. + assert(0 == "unexpected instruction in __kernel_vsyscall"); + return 0; + } + } + // Unexpected: didn't find SYSENTER or SYSCALL in + // [__kernel_vsyscall, __kernel_vsyscall + kMaxBytes) interval. + assert(0 == "did not find SYSENTER or SYSCALL in __kernel_vsyscall"); + return 0; +} +#endif + +// Given a pointer to a stack frame, locate and return the calling +// stackframe, or return NULL if no stackframe can be found. Perform sanity +// checks (the strictness of which is controlled by the boolean parameter +// "STRICT_UNWINDING") to reduce the chance that a bad pointer is returned. +template<bool STRICT_UNWINDING, bool WITH_CONTEXT> +static void **NextStackFrame(void **old_sp, const void *uc) { + void **new_sp = (void **) *old_sp; + +#if defined(__linux__) && defined(__i386__) && defined(HAVE_VDSO_SUPPORT) + if (WITH_CONTEXT && uc != NULL) { + // How many "push %reg" instructions are there at __kernel_vsyscall? + // This is constant for a given kernel and processor, so compute + // it only once. + static int num_push_instructions = -1; // Sentinel: not computed yet. + // Initialize with sentinel value: __kernel_rt_sigreturn can not possibly + // be there. + static const unsigned char *kernel_rt_sigreturn_address = NULL; + static const unsigned char *kernel_vsyscall_address = NULL; + if (num_push_instructions == -1) { + base::VDSOSupport vdso; + if (vdso.IsPresent()) { + base::VDSOSupport::SymbolInfo rt_sigreturn_symbol_info; + base::VDSOSupport::SymbolInfo vsyscall_symbol_info; + if (!vdso.LookupSymbol("__kernel_rt_sigreturn", "LINUX_2.5", + STT_FUNC, &rt_sigreturn_symbol_info) || + !vdso.LookupSymbol("__kernel_vsyscall", "LINUX_2.5", + STT_FUNC, &vsyscall_symbol_info) || + rt_sigreturn_symbol_info.address == NULL || + vsyscall_symbol_info.address == NULL) { + // Unexpected: 32-bit VDSO is present, yet one of the expected + // symbols is missing or NULL. + assert(0 == "VDSO is present, but doesn't have expected symbols"); + num_push_instructions = 0; + } else { + kernel_rt_sigreturn_address = + reinterpret_cast<const unsigned char *>( + rt_sigreturn_symbol_info.address); + kernel_vsyscall_address = + reinterpret_cast<const unsigned char *>( + vsyscall_symbol_info.address); + num_push_instructions = + CountPushInstructions(kernel_vsyscall_address); + } + } else { + num_push_instructions = 0; + } + } + if (num_push_instructions != 0 && kernel_rt_sigreturn_address != NULL && + old_sp[1] == kernel_rt_sigreturn_address) { + const ucontext_t *ucv = static_cast<const ucontext_t *>(uc); + // This kernel does not use frame pointer in its VDSO code, + // and so %ebp is not suitable for unwinding. + void **const reg_ebp = + reinterpret_cast<void **>(ucv->uc_mcontext.gregs[REG_EBP]); + const unsigned char *const reg_eip = + reinterpret_cast<unsigned char *>(ucv->uc_mcontext.gregs[REG_EIP]); + if (new_sp == reg_ebp && + kernel_vsyscall_address <= reg_eip && + reg_eip - kernel_vsyscall_address < kMaxBytes) { + // We "stepped up" to __kernel_vsyscall, but %ebp is not usable. + // Restore from 'ucv' instead. + void **const reg_esp = + reinterpret_cast<void **>(ucv->uc_mcontext.gregs[REG_ESP]); + // Check that alleged %esp is not NULL and is reasonably aligned. + if (reg_esp && + ((uintptr_t)reg_esp & (sizeof(reg_esp) - 1)) == 0) { + // Check that alleged %esp is actually readable. This is to prevent + // "double fault" in case we hit the first fault due to e.g. stack + // corruption. + // + // page_size is linker-initalized to avoid async-unsafe locking + // that GCC would otherwise insert (__cxa_guard_acquire etc). + static int page_size; + if (page_size == 0) { + // First time through. + page_size = getpagesize(); + } + void *const reg_esp_aligned = + reinterpret_cast<void *>( + (uintptr_t)(reg_esp + num_push_instructions - 1) & + ~(page_size - 1)); + if (msync(reg_esp_aligned, page_size, MS_ASYNC) == 0) { + // Alleged %esp is readable, use it for further unwinding. + new_sp = reinterpret_cast<void **>( + reg_esp[num_push_instructions - 1]); + } + } + } + } + } +#endif + + // Check that the transition from frame pointer old_sp to frame + // pointer new_sp isn't clearly bogus + if (STRICT_UNWINDING) { + // With the stack growing downwards, older stack frame must be + // at a greater address that the current one. + if (new_sp <= old_sp) return NULL; + // Assume stack frames larger than 100,000 bytes are bogus. + if ((uintptr_t)new_sp - (uintptr_t)old_sp > 100000) return NULL; + } else { + // In the non-strict mode, allow discontiguous stack frames. + // (alternate-signal-stacks for example). + if (new_sp == old_sp) return NULL; + if (new_sp > old_sp) { + // And allow frames upto about 1MB. + const uintptr_t delta = (uintptr_t)new_sp - (uintptr_t)old_sp; + const uintptr_t acceptable_delta = 1000000; + if (delta > acceptable_delta) { + return NULL; + } + } + } + if ((uintptr_t)new_sp & (sizeof(void *) - 1)) return NULL; +#ifdef __i386__ + // On 64-bit machines, the stack pointer can be very close to + // 0xffffffff, so we explicitly check for a pointer into the + // last two pages in the address space + if ((uintptr_t)new_sp >= 0xffffe000) return NULL; +#endif +#ifdef HAVE_MMAP + if (!STRICT_UNWINDING) { + // Lax sanity checks cause a crash on AMD-based machines with + // VDSO-enabled kernels. + // Make an extra sanity check to insure new_sp is readable. + // Note: NextStackFrame<false>() is only called while the program + // is already on its last leg, so it's ok to be slow here. + static int page_size = getpagesize(); + void *new_sp_aligned = (void *)((uintptr_t)new_sp & ~(page_size - 1)); + if (msync(new_sp_aligned, page_size, MS_ASYNC) == -1) + return NULL; + } +#endif + return new_sp; +} + +#endif // BASE_STACKTRACE_X86_INL_H_ + +// Note: this part of the file is included several times. +// Do not put globals below. + +// The following 4 functions are generated from the code below: +// GetStack{Trace,Frames}() +// GetStack{Trace,Frames}WithContext() +// +// These functions take the following args: +// void** result: the stack-trace, as an array +// int* sizes: the size of each stack frame, as an array +// (GetStackFrames* only) +// int max_depth: the size of the result (and sizes) array(s) +// int skip_count: how many stack pointers to skip before storing in result +// void* ucp: a ucontext_t* (GetStack{Trace,Frames}WithContext only) + +static int GET_STACK_TRACE_OR_FRAMES { + void **sp; +#if (__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 2) || __llvm__ + // __builtin_frame_address(0) can return the wrong address on gcc-4.1.0-k8. + // It's always correct on llvm, and the techniques below aren't (in + // particular, llvm-gcc will make a copy of pcs, so it's not in sp[2]), + // so we also prefer __builtin_frame_address when running under llvm. + sp = reinterpret_cast<void**>(__builtin_frame_address(0)); +#elif defined(__i386__) + // Stack frame format: + // sp[0] pointer to previous frame + // sp[1] caller address + // sp[2] first argument + // ... + // NOTE: This will break under llvm, since result is a copy and not in sp[2] + sp = (void **)&result - 2; +#elif defined(__x86_64__) + unsigned long rbp; + // Move the value of the register %rbp into the local variable rbp. + // We need 'volatile' to prevent this instruction from getting moved + // around during optimization to before function prologue is done. + // An alternative way to achieve this + // would be (before this __asm__ instruction) to call Noop() defined as + // static void Noop() __attribute__ ((noinline)); // prevent inlining + // static void Noop() { asm(""); } // prevent optimizing-away + __asm__ volatile ("mov %%rbp, %0" : "=r" (rbp)); + // Arguments are passed in registers on x86-64, so we can't just + // offset from &result + sp = (void **) rbp; +#else +# error Using stacktrace_x86-inl.h on a non x86 architecture! +#endif + + skip_count++; // skip parent's frame due to indirection in stacktrace.cc + + int n = 0; + while (sp && n < max_depth) { + if (*(sp+1) == reinterpret_cast<void *>(0)) { + // In 64-bit code, we often see a frame that + // points to itself and has a return address of 0. + break; + } +#if !IS_WITH_CONTEXT + const void *const ucp = NULL; +#endif + void **next_sp = NextStackFrame<!IS_STACK_FRAMES, IS_WITH_CONTEXT>(sp, ucp); + if (skip_count > 0) { + skip_count--; + } else { + result[n] = *(sp+1); +#if IS_STACK_FRAMES + if (next_sp > sp) { + sizes[n] = (uintptr_t)next_sp - (uintptr_t)sp; + } else { + // A frame-size of 0 is used to indicate unknown frame size. + sizes[n] = 0; + } +#endif + n++; + } + sp = next_sp; + } + return n; +} diff --git a/src/third_party/gperftools-2.5/src/static_vars.cc b/src/third_party/gperftools-2.5/src/static_vars.cc new file mode 100644 index 00000000000..79de97e8ba7 --- /dev/null +++ b/src/third_party/gperftools-2.5/src/static_vars.cc @@ -0,0 +1,125 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Ken Ashcraft <opensource@google.com> + +#include <config.h> +#include "static_vars.h" +#include <stddef.h> // for NULL +#include <new> // for operator new +#ifdef HAVE_PTHREAD +#include <pthread.h> // for pthread_atfork +#endif +#include "internal_logging.h" // for CHECK_CONDITION +#include "common.h" +#include "sampler.h" // for Sampler +#include "getenv_safe.h" // TCMallocGetenvSafe +#include "base/googleinit.h" +#include "maybe_threads.h" + +namespace tcmalloc { + +#if defined(HAVE_FORK) && defined(HAVE_PTHREAD) +// These following two functions are registered via pthread_atfork to make +// sure the central_cache locks remain in a consisten state in the forked +// version of the thread. + +void CentralCacheLockAll() +{ + Static::pageheap_lock()->Lock(); + for (int i = 0; i < kNumClasses; ++i) + Static::central_cache()[i].Lock(); +} + +void CentralCacheUnlockAll() +{ + for (int i = 0; i < kNumClasses; ++i) + Static::central_cache()[i].Unlock(); + Static::pageheap_lock()->Unlock(); +} +#endif + +SpinLock Static::pageheap_lock_(SpinLock::LINKER_INITIALIZED); +SizeMap Static::sizemap_; +CentralFreeListPadded Static::central_cache_[kNumClasses]; +PageHeapAllocator<Span> Static::span_allocator_; +PageHeapAllocator<StackTrace> Static::stacktrace_allocator_; +Span Static::sampled_objects_; +PageHeapAllocator<StackTraceTable::Bucket> Static::bucket_allocator_; +StackTrace* Static::growth_stacks_ = NULL; +PageHeap* Static::pageheap_ = NULL; + + +void Static::InitStaticVars() { + sizemap_.Init(); + span_allocator_.Init(); + span_allocator_.New(); // Reduce cache conflicts + span_allocator_.New(); // Reduce cache conflicts + stacktrace_allocator_.Init(); + bucket_allocator_.Init(); + // Do a bit of sanitizing: make sure central_cache is aligned properly + CHECK_CONDITION((sizeof(central_cache_[0]) % 64) == 0); + for (int i = 0; i < kNumClasses; ++i) { + central_cache_[i].Init(i); + } + + // It's important to have PageHeap allocated, not in static storage, + // so that HeapLeakChecker does not consider all the byte patterns stored + // in is caches as pointers that are sources of heap object liveness, + // which leads to it missing some memory leaks. + pageheap_ = new (MetaDataAlloc(sizeof(PageHeap))) PageHeap; + + bool aggressive_decommit = + tcmalloc::commandlineflags::StringToBool( + TCMallocGetenvSafe("TCMALLOC_AGGRESSIVE_DECOMMIT"), true); + + pageheap_->SetAggressiveDecommit(aggressive_decommit); + + DLL_Init(&sampled_objects_); + Sampler::InitStatics(); +} + + +#if defined(HAVE_FORK) && defined(HAVE_PTHREAD) && !defined(__APPLE__) + +static inline +void SetupAtForkLocksHandler() +{ + perftools_pthread_atfork( + CentralCacheLockAll, // parent calls before fork + CentralCacheUnlockAll, // parent calls after fork + CentralCacheUnlockAll); // child calls after fork +} +REGISTER_MODULE_INITIALIZER(tcmalloc_fork_handler, SetupAtForkLocksHandler()); + +#endif + +} // namespace tcmalloc diff --git a/src/third_party/gperftools-2.5/src/static_vars.h b/src/third_party/gperftools-2.5/src/static_vars.h new file mode 100644 index 00000000000..c662e40cd73 --- /dev/null +++ b/src/third_party/gperftools-2.5/src/static_vars.h @@ -0,0 +1,115 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Ken Ashcraft <opensource@google.com> +// +// Static variables shared by multiple classes. + +#ifndef TCMALLOC_STATIC_VARS_H_ +#define TCMALLOC_STATIC_VARS_H_ + +#include <config.h> +#include "base/spinlock.h" +#include "central_freelist.h" +#include "common.h" +#include "page_heap.h" +#include "page_heap_allocator.h" +#include "span.h" +#include "stack_trace_table.h" + +namespace tcmalloc { + +class Static { + public: + // Linker initialized, so this lock can be accessed at any time. + static SpinLock* pageheap_lock() { return &pageheap_lock_; } + + // Must be called before calling any of the accessors below. + static void InitStaticVars(); + + // Central cache -- an array of free-lists, one per size-class. + // We have a separate lock per free-list to reduce contention. + static CentralFreeListPadded* central_cache() { return central_cache_; } + + static SizeMap* sizemap() { return &sizemap_; } + + ////////////////////////////////////////////////////////////////////// + // In addition to the explicit initialization comment, the variables below + // must be protected by pageheap_lock. + + // Page-level allocator. + static PageHeap* pageheap() { return pageheap_; } + + static PageHeapAllocator<Span>* span_allocator() { return &span_allocator_; } + + static PageHeapAllocator<StackTrace>* stacktrace_allocator() { + return &stacktrace_allocator_; + } + + static StackTrace* growth_stacks() { return growth_stacks_; } + static void set_growth_stacks(StackTrace* s) { growth_stacks_ = s; } + + // State kept for sampled allocations (/pprof/heap support) + static Span* sampled_objects() { return &sampled_objects_; } + static PageHeapAllocator<StackTraceTable::Bucket>* bucket_allocator() { + return &bucket_allocator_; + } + + // Check if InitStaticVars() has been run. + static bool IsInited() { return pageheap() != NULL; } + + private: + static SpinLock pageheap_lock_; + + // These static variables require explicit initialization. We cannot + // count on their constructors to do any initialization because other + // static variables may try to allocate memory before these variables + // can run their constructors. + + static SizeMap sizemap_; + static CentralFreeListPadded central_cache_[kNumClasses]; + static PageHeapAllocator<Span> span_allocator_; + static PageHeapAllocator<StackTrace> stacktrace_allocator_; + static Span sampled_objects_; + static PageHeapAllocator<StackTraceTable::Bucket> bucket_allocator_; + + // Linked list of stack traces recorded every time we allocated memory + // from the system. Useful for finding allocation sites that cause + // increase in the footprint of the system. The linked list pointer + // is stored in trace->stack[kMaxStackDepth-1]. + static StackTrace* growth_stacks_; + + static PageHeap* pageheap_; +}; + +} // namespace tcmalloc + +#endif // TCMALLOC_STATIC_VARS_H_ diff --git a/src/third_party/gperftools-2.5/src/symbolize.cc b/src/third_party/gperftools-2.5/src/symbolize.cc new file mode 100755 index 00000000000..a27106e8bce --- /dev/null +++ b/src/third_party/gperftools-2.5/src/symbolize.cc @@ -0,0 +1,285 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2009, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Craig Silverstein +// +// This forks out to pprof to do the actual symbolizing. We might +// be better off writing our own in C++. + +#include "config.h" +#include "symbolize.h" +#include <stdlib.h> +#ifdef HAVE_UNISTD_H +#include <unistd.h> // for write() +#endif +#ifdef HAVE_SYS_SOCKET_H +#include <sys/socket.h> // for socketpair() -- needed by Symbolize +#endif +#ifdef HAVE_SYS_WAIT_H +#include <sys/wait.h> // for wait() -- needed by Symbolize +#endif +#ifdef HAVE_POLL_H +#include <poll.h> +#endif +#ifdef __MACH__ +#include <mach-o/dyld.h> // for GetProgramInvocationName() +#include <limits.h> // for PATH_MAX +#endif +#if defined(__CYGWIN__) || defined(__CYGWIN32__) +#include <io.h> // for get_osfhandle() +#endif +#include <string> +#include "base/commandlineflags.h" +#include "base/logging.h" +#include "base/sysinfo.h" + +using std::string; +using tcmalloc::DumpProcSelfMaps; // from sysinfo.h + + +DEFINE_string(symbolize_pprof, + EnvToString("PPROF_PATH", "pprof"), + "Path to pprof to call for reporting function names."); + +// heap_profile_table_pprof may be referenced after destructors are +// called (since that's when leak-checking is done), so we make +// a more-permanent copy that won't ever get destroyed. +static string* g_pprof_path = new string(FLAGS_symbolize_pprof); + +// Returns NULL if we're on an OS where we can't get the invocation name. +// Using a static var is ok because we're not called from a thread. +static const char* GetProgramInvocationName() { +#if defined(HAVE_PROGRAM_INVOCATION_NAME) +#ifdef __UCLIBC__ + extern const char* program_invocation_name; // uclibc provides this +#else + extern char* program_invocation_name; // gcc provides this +#endif + return program_invocation_name; +#elif defined(__MACH__) + // We don't want to allocate memory for this since we may be + // calculating it when memory is corrupted. + static char program_invocation_name[PATH_MAX]; + if (program_invocation_name[0] == '\0') { // first time calculating + uint32_t length = sizeof(program_invocation_name); + if (_NSGetExecutablePath(program_invocation_name, &length)) + return NULL; + } + return program_invocation_name; +#else + return NULL; // figure out a way to get argv[0] +#endif +} + +// Prints an error message when you can't run Symbolize(). +static void PrintError(const char* reason) { + RAW_LOG(ERROR, + "*** WARNING: Cannot convert addresses to symbols in output below.\n" + "*** Reason: %s\n" + "*** If you cannot fix this, try running pprof directly.\n", + reason); +} + +void SymbolTable::Add(const void* addr) { + symbolization_table_[addr] = ""; +} + +const char* SymbolTable::GetSymbol(const void* addr) { + return symbolization_table_[addr]; +} + +// Updates symbolization_table with the pointers to symbol names corresponding +// to its keys. The symbol names are stored in out, which is allocated and +// freed by the caller of this routine. +// Note that the forking/etc is not thread-safe or re-entrant. That's +// ok for the purpose we need -- reporting leaks detected by heap-checker +// -- but be careful if you decide to use this routine for other purposes. +// Returns number of symbols read on error. If can't symbolize, returns 0 +// and emits an error message about why. +int SymbolTable::Symbolize() { +#if !defined(HAVE_UNISTD_H) || !defined(HAVE_SYS_SOCKET_H) || !defined(HAVE_SYS_WAIT_H) + PrintError("Perftools does not know how to call a sub-process on this O/S"); + return 0; +#else + const char* argv0 = GetProgramInvocationName(); + if (argv0 == NULL) { // can't call symbolize if we can't figure out our name + PrintError("Cannot figure out the name of this executable (argv0)"); + return 0; + } + if (access(g_pprof_path->c_str(), R_OK) != 0) { + PrintError("Cannot find 'pprof' (is PPROF_PATH set correctly?)"); + return 0; + } + + // All this work is to do two-way communication. ugh. + int *child_in = NULL; // file descriptors + int *child_out = NULL; // for now, we don't worry about child_err + int child_fds[5][2]; // socketpair may be called up to five times below + + // The client program may close its stdin and/or stdout and/or stderr + // thus allowing socketpair to reuse file descriptors 0, 1 or 2. + // In this case the communication between the forked processes may be broken + // if either the parent or the child tries to close or duplicate these + // descriptors. The loop below produces two pairs of file descriptors, each + // greater than 2 (stderr). + for (int i = 0; i < 5; i++) { + if (socketpair(AF_UNIX, SOCK_STREAM, 0, child_fds[i]) == -1) { + for (int j = 0; j < i; j++) { + close(child_fds[j][0]); + close(child_fds[j][1]); + PrintError("Cannot create a socket pair"); + } + return 0; + } else { + if ((child_fds[i][0] > 2) && (child_fds[i][1] > 2)) { + if (child_in == NULL) { + child_in = child_fds[i]; + } else { + child_out = child_fds[i]; + for (int j = 0; j < i; j++) { + if (child_fds[j] == child_in) continue; + close(child_fds[j][0]); + close(child_fds[j][1]); + } + break; + } + } + } + } + + switch (fork()) { + case -1: { // error + close(child_in[0]); + close(child_in[1]); + close(child_out[0]); + close(child_out[1]); + PrintError("Unknown error calling fork()"); + return 0; + } + case 0: { // child + close(child_in[1]); // child uses the 0's, parent uses the 1's + close(child_out[1]); // child uses the 0's, parent uses the 1's + close(0); + close(1); + if (dup2(child_in[0], 0) == -1) _exit(1); + if (dup2(child_out[0], 1) == -1) _exit(2); + // Unset vars that might cause trouble when we fork + unsetenv("CPUPROFILE"); + unsetenv("HEAPPROFILE"); + unsetenv("HEAPCHECK"); + unsetenv("PERFTOOLS_VERBOSE"); + execlp(g_pprof_path->c_str(), g_pprof_path->c_str(), + "--symbols", argv0, NULL); + _exit(3); // if execvp fails, it's bad news for us + } + default: { // parent + close(child_in[0]); // child uses the 0's, parent uses the 1's + close(child_out[0]); // child uses the 0's, parent uses the 1's +#ifdef HAVE_POLL_H + // Waiting for 1ms seems to give the OS time to notice any errors. + poll(0, 0, 1); + // For maximum safety, we check to make sure the execlp + // succeeded before trying to write. (Otherwise we'll get a + // SIGPIPE.) For systems without poll.h, we'll just skip this + // check, and trust that the user set PPROF_PATH correctly! + struct pollfd pfd = { child_in[1], POLLOUT, 0 }; + if (!poll(&pfd, 1, 0) || !(pfd.revents & POLLOUT) || + (pfd.revents & (POLLHUP|POLLERR))) { + PrintError("Cannot run 'pprof' (is PPROF_PATH set correctly?)"); + return 0; + } +#endif +#if defined(__CYGWIN__) || defined(__CYGWIN32__) + // On cygwin, DumpProcSelfMaps() takes a HANDLE, not an fd. Convert. + const HANDLE symbols_handle = (HANDLE) get_osfhandle(child_in[1]); + DumpProcSelfMaps(symbols_handle); +#else + DumpProcSelfMaps(child_in[1]); // what pprof expects on stdin +#endif + + // Allocate 24 bytes = ("0x" + 8 bytes + "\n" + overhead) for each + // address to feed to pprof. + const int kOutBufSize = 24 * symbolization_table_.size(); + char *pprof_buffer = new char[kOutBufSize]; + int written = 0; + for (SymbolMap::const_iterator iter = symbolization_table_.begin(); + iter != symbolization_table_.end(); ++iter) { + written += snprintf(pprof_buffer + written, kOutBufSize - written, + // pprof expects format to be 0xXXXXXX + "0x%" PRIxPTR "\n", reinterpret_cast<uintptr_t>(iter->first)); + } + write(child_in[1], pprof_buffer, strlen(pprof_buffer)); + close(child_in[1]); // that's all we need to write + + const int kSymbolBufferSize = kSymbolSize * symbolization_table_.size(); + int total_bytes_read = 0; + delete[] symbol_buffer_; + symbol_buffer_ = new char[kSymbolBufferSize]; + memset(symbol_buffer_, '\0', kSymbolBufferSize); + while (1) { + int bytes_read = read(child_out[1], symbol_buffer_ + total_bytes_read, + kSymbolBufferSize - total_bytes_read); + if (bytes_read < 0) { + close(child_out[1]); + PrintError("Cannot read data from pprof"); + return 0; + } else if (bytes_read == 0) { + close(child_out[1]); + wait(NULL); + break; + } else { + total_bytes_read += bytes_read; + } + } + // We have successfully read the output of pprof into out. Make sure + // the last symbol is full (we can tell because it ends with a \n). + if (total_bytes_read == 0 || symbol_buffer_[total_bytes_read - 1] != '\n') + return 0; + // make the symbolization_table_ values point to the output vector + SymbolMap::iterator fill = symbolization_table_.begin(); + int num_symbols = 0; + const char *current_name = symbol_buffer_; + for (int i = 0; i < total_bytes_read; i++) { + if (symbol_buffer_[i] == '\n') { + fill->second = current_name; + symbol_buffer_[i] = '\0'; + current_name = symbol_buffer_ + i + 1; + fill++; + num_symbols++; + } + } + return num_symbols; + } + } + PrintError("Unkown error (should never occur!)"); + return 0; // shouldn't be reachable +#endif +} diff --git a/src/third_party/gperftools-2.5/src/symbolize.h b/src/third_party/gperftools-2.5/src/symbolize.h new file mode 100644 index 00000000000..728d073308a --- /dev/null +++ b/src/third_party/gperftools-2.5/src/symbolize.h @@ -0,0 +1,84 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2009, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Craig Silverstein + +#ifndef TCMALLOC_SYMBOLIZE_H_ +#define TCMALLOC_SYMBOLIZE_H_ + +#include "config.h" +#ifdef HAVE_STDINT_H +#include <stdint.h> // for uintptr_t +#endif +#include <stddef.h> // for NULL +#include <map> + +using std::map; + +// SymbolTable encapsulates the address operations necessary for stack trace +// symbolization. A common use-case is to Add() the addresses from one or +// several stack traces to a table, call Symbolize() once and use GetSymbol() +// to get the symbol names for pretty-printing the stack traces. +class SymbolTable { + public: + SymbolTable() + : symbol_buffer_(NULL) {} + ~SymbolTable() { + delete[] symbol_buffer_; + } + + // Adds an address to the table. This may overwrite a currently known symbol + // name, so Add() should not generally be called after Symbolize(). + void Add(const void* addr); + + // Returns the symbol name for addr, if the given address was added before + // the last successful call to Symbolize(). Otherwise may return an empty + // c-string. + const char* GetSymbol(const void* addr); + + // Obtains the symbol names for the addresses stored in the table and returns + // the number of addresses actually symbolized. + int Symbolize(); + + private: + typedef map<const void*, const char*> SymbolMap; + + // An average size of memory allocated for a stack trace symbol. + static const int kSymbolSize = 1024; + + // Map from addresses to symbol names. + SymbolMap symbolization_table_; + + // Pointer to the buffer that stores the symbol names. + char *symbol_buffer_; +}; + +#endif // TCMALLOC_SYMBOLIZE_H_ diff --git a/src/third_party/gperftools-2.5/src/system-alloc.cc b/src/third_party/gperftools-2.5/src/system-alloc.cc new file mode 100755 index 00000000000..59cd03dcd8d --- /dev/null +++ b/src/third_party/gperftools-2.5/src/system-alloc.cc @@ -0,0 +1,554 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Sanjay Ghemawat + +#include <config.h> +#include <errno.h> // for EAGAIN, errno +#include <fcntl.h> // for open, O_RDWR +#include <stddef.h> // for size_t, NULL, ptrdiff_t +#if defined HAVE_STDINT_H +#include <stdint.h> // for uintptr_t, intptr_t +#elif defined HAVE_INTTYPES_H +#include <inttypes.h> +#else +#include <sys/types.h> +#endif +#ifdef HAVE_MMAP +#include <sys/mman.h> // for munmap, mmap, MADV_DONTNEED, etc +#endif +#ifdef HAVE_UNISTD_H +#include <unistd.h> // for sbrk, getpagesize, off_t +#endif +#include <new> // for operator new +#include <gperftools/malloc_extension.h> +#include "base/basictypes.h" +#include "base/commandlineflags.h" +#include "base/spinlock.h" // for SpinLockHolder, SpinLock, etc +#include "common.h" +#include "internal_logging.h" + +// On systems (like freebsd) that don't define MAP_ANONYMOUS, use the old +// form of the name instead. +#ifndef MAP_ANONYMOUS +# define MAP_ANONYMOUS MAP_ANON +#endif + +// MADV_FREE is specifically designed for use by malloc(), but only +// FreeBSD supports it; in linux we fall back to the somewhat inferior +// MADV_DONTNEED. +#if !defined(MADV_FREE) && defined(MADV_DONTNEED) +# define MADV_FREE MADV_DONTNEED +#endif + +// Solaris has a bug where it doesn't declare madvise() for C++. +// http://www.opensolaris.org/jive/thread.jspa?threadID=21035&tstart=0 +#if defined(__sun) && defined(__SVR4) +# include <sys/types.h> // for caddr_t + extern "C" { extern int madvise(caddr_t, size_t, int); } +#endif + +// Set kDebugMode mode so that we can have use C++ conditionals +// instead of preprocessor conditionals. +#ifdef NDEBUG +static const bool kDebugMode = false; +#else +static const bool kDebugMode = true; +#endif + +// TODO(sanjay): Move the code below into the tcmalloc namespace +using tcmalloc::kLog; +using tcmalloc::Log; + +// Anonymous namespace to avoid name conflicts on "CheckAddressBits". +namespace { + +// Check that no bit is set at position ADDRESS_BITS or higher. +template <int ADDRESS_BITS> bool CheckAddressBits(uintptr_t ptr) { + return (ptr >> ADDRESS_BITS) == 0; +} + +// Specialize for the bit width of a pointer to avoid undefined shift. +template <> bool CheckAddressBits<8 * sizeof(void*)>(uintptr_t ptr) { + return true; +} + +} // Anonymous namespace to avoid name conflicts on "CheckAddressBits". + +COMPILE_ASSERT(kAddressBits <= 8 * sizeof(void*), + address_bits_larger_than_pointer_size); + +static SpinLock spinlock(SpinLock::LINKER_INITIALIZED); + +#if defined(HAVE_MMAP) || defined(MADV_FREE) +// Page size is initialized on demand (only needed for mmap-based allocators) +static size_t pagesize = 0; +#endif + +// The current system allocator +SysAllocator* sys_alloc = NULL; + +// Number of bytes taken from system. +size_t TCMalloc_SystemTaken = 0; + +// Configuration parameters. +DEFINE_int32(malloc_devmem_start, + EnvToInt("TCMALLOC_DEVMEM_START", 0), + "Physical memory starting location in MB for /dev/mem allocation." + " Setting this to 0 disables /dev/mem allocation"); +DEFINE_int32(malloc_devmem_limit, + EnvToInt("TCMALLOC_DEVMEM_LIMIT", 0), + "Physical memory limit location in MB for /dev/mem allocation." + " Setting this to 0 means no limit."); +DEFINE_bool(malloc_skip_sbrk, + EnvToBool("TCMALLOC_SKIP_SBRK", false), + "Whether sbrk can be used to obtain memory."); +DEFINE_bool(malloc_skip_mmap, + EnvToBool("TCMALLOC_SKIP_MMAP", false), + "Whether mmap can be used to obtain memory."); +DEFINE_bool(malloc_disable_memory_release, + EnvToBool("TCMALLOC_DISABLE_MEMORY_RELEASE", false), + "Whether MADV_FREE/MADV_DONTNEED should be used" + " to return unused memory to the system."); + +// static allocators +class SbrkSysAllocator : public SysAllocator { +public: + SbrkSysAllocator() : SysAllocator() { + } + void* Alloc(size_t size, size_t *actual_size, size_t alignment); +}; +static union { + char buf[sizeof(SbrkSysAllocator)]; + void *ptr; +} sbrk_space; + +class MmapSysAllocator : public SysAllocator { +public: + MmapSysAllocator() : SysAllocator() { + } + void* Alloc(size_t size, size_t *actual_size, size_t alignment); +}; +static union { + char buf[sizeof(MmapSysAllocator)]; + void *ptr; +} mmap_space; + +class DevMemSysAllocator : public SysAllocator { +public: + DevMemSysAllocator() : SysAllocator() { + } + void* Alloc(size_t size, size_t *actual_size, size_t alignment); +}; + +class DefaultSysAllocator : public SysAllocator { + public: + DefaultSysAllocator() : SysAllocator() { + for (int i = 0; i < kMaxAllocators; i++) { + failed_[i] = true; + allocs_[i] = NULL; + names_[i] = NULL; + } + } + void SetChildAllocator(SysAllocator* alloc, unsigned int index, + const char* name) { + if (index < kMaxAllocators && alloc != NULL) { + allocs_[index] = alloc; + failed_[index] = false; + names_[index] = name; + } + } + void* Alloc(size_t size, size_t *actual_size, size_t alignment); + + private: + static const int kMaxAllocators = 2; + bool failed_[kMaxAllocators]; + SysAllocator* allocs_[kMaxAllocators]; + const char* names_[kMaxAllocators]; +}; +static union { + char buf[sizeof(DefaultSysAllocator)]; + void *ptr; +} default_space; +static const char sbrk_name[] = "SbrkSysAllocator"; +static const char mmap_name[] = "MmapSysAllocator"; + + +void* SbrkSysAllocator::Alloc(size_t size, size_t *actual_size, + size_t alignment) { +#if !defined(HAVE_SBRK) || defined(__UCLIBC__) + return NULL; +#else + // Check if we should use sbrk allocation. + // FLAGS_malloc_skip_sbrk starts out as false (its uninitialized + // state) and eventually gets initialized to the specified value. Note + // that this code runs for a while before the flags are initialized. + // That means that even if this flag is set to true, some (initial) + // memory will be allocated with sbrk before the flag takes effect. + if (FLAGS_malloc_skip_sbrk) { + return NULL; + } + + // sbrk will release memory if passed a negative number, so we do + // a strict check here + if (static_cast<ptrdiff_t>(size + alignment) < 0) return NULL; + + // This doesn't overflow because TCMalloc_SystemAlloc has already + // tested for overflow at the alignment boundary. + size = ((size + alignment - 1) / alignment) * alignment; + + // "actual_size" indicates that the bytes from the returned pointer + // p up to and including (p + actual_size - 1) have been allocated. + if (actual_size) { + *actual_size = size; + } + + // Check that we we're not asking for so much more memory that we'd + // wrap around the end of the virtual address space. (This seems + // like something sbrk() should check for us, and indeed opensolaris + // does, but glibc does not: + // http://src.opensolaris.org/source/xref/onnv/onnv-gate/usr/src/lib/libc/port/sys/sbrk.c?a=true + // http://sourceware.org/cgi-bin/cvsweb.cgi/~checkout~/libc/misc/sbrk.c?rev=1.1.2.1&content-type=text/plain&cvsroot=glibc + // Without this check, sbrk may succeed when it ought to fail.) + if (reinterpret_cast<intptr_t>(sbrk(0)) + size < size) { + return NULL; + } + + void* result = sbrk(size); + if (result == reinterpret_cast<void*>(-1)) { + return NULL; + } + + // Is it aligned? + uintptr_t ptr = reinterpret_cast<uintptr_t>(result); + if ((ptr & (alignment-1)) == 0) return result; + + // Try to get more memory for alignment + size_t extra = alignment - (ptr & (alignment-1)); + void* r2 = sbrk(extra); + if (reinterpret_cast<uintptr_t>(r2) == (ptr + size)) { + // Contiguous with previous result + return reinterpret_cast<void*>(ptr + extra); + } + + // Give up and ask for "size + alignment - 1" bytes so + // that we can find an aligned region within it. + result = sbrk(size + alignment - 1); + if (result == reinterpret_cast<void*>(-1)) { + return NULL; + } + ptr = reinterpret_cast<uintptr_t>(result); + if ((ptr & (alignment-1)) != 0) { + ptr += alignment - (ptr & (alignment-1)); + } + return reinterpret_cast<void*>(ptr); +#endif // HAVE_SBRK +} + +void* MmapSysAllocator::Alloc(size_t size, size_t *actual_size, + size_t alignment) { +#ifndef HAVE_MMAP + return NULL; +#else + // Check if we should use mmap allocation. + // FLAGS_malloc_skip_mmap starts out as false (its uninitialized + // state) and eventually gets initialized to the specified value. Note + // that this code runs for a while before the flags are initialized. + // Chances are we never get here before the flags are initialized since + // sbrk is used until the heap is exhausted (before mmap is used). + if (FLAGS_malloc_skip_mmap) { + return NULL; + } + + // Enforce page alignment + if (pagesize == 0) pagesize = getpagesize(); + if (alignment < pagesize) alignment = pagesize; + size_t aligned_size = ((size + alignment - 1) / alignment) * alignment; + if (aligned_size < size) { + return NULL; + } + size = aligned_size; + + // "actual_size" indicates that the bytes from the returned pointer + // p up to and including (p + actual_size - 1) have been allocated. + if (actual_size) { + *actual_size = size; + } + + // Ask for extra memory if alignment > pagesize + size_t extra = 0; + if (alignment > pagesize) { + extra = alignment - pagesize; + } + + // Note: size + extra does not overflow since: + // size + alignment < (1<<NBITS). + // and extra <= alignment + // therefore size + extra < (1<<NBITS) + void* result = mmap(NULL, size + extra, + PROT_READ|PROT_WRITE, + MAP_PRIVATE|MAP_ANONYMOUS, + -1, 0); + if (result == reinterpret_cast<void*>(MAP_FAILED)) { + return NULL; + } + + // Adjust the return memory so it is aligned + uintptr_t ptr = reinterpret_cast<uintptr_t>(result); + size_t adjust = 0; + if ((ptr & (alignment - 1)) != 0) { + adjust = alignment - (ptr & (alignment - 1)); + } + + // Return the unused memory to the system + if (adjust > 0) { + munmap(reinterpret_cast<void*>(ptr), adjust); + } + if (adjust < extra) { + munmap(reinterpret_cast<void*>(ptr + adjust + size), extra - adjust); + } + + ptr += adjust; + return reinterpret_cast<void*>(ptr); +#endif // HAVE_MMAP +} + +void* DevMemSysAllocator::Alloc(size_t size, size_t *actual_size, + size_t alignment) { +#ifndef HAVE_MMAP + return NULL; +#else + static bool initialized = false; + static off_t physmem_base; // next physical memory address to allocate + static off_t physmem_limit; // maximum physical address allowed + static int physmem_fd; // file descriptor for /dev/mem + + // Check if we should use /dev/mem allocation. Note that it may take + // a while to get this flag initialized, so meanwhile we fall back to + // the next allocator. (It looks like 7MB gets allocated before + // this flag gets initialized -khr.) + if (FLAGS_malloc_devmem_start == 0) { + // NOTE: not a devmem_failure - we'd like TCMalloc_SystemAlloc to + // try us again next time. + return NULL; + } + + if (!initialized) { + physmem_fd = open("/dev/mem", O_RDWR); + if (physmem_fd < 0) { + return NULL; + } + physmem_base = FLAGS_malloc_devmem_start*1024LL*1024LL; + physmem_limit = FLAGS_malloc_devmem_limit*1024LL*1024LL; + initialized = true; + } + + // Enforce page alignment + if (pagesize == 0) pagesize = getpagesize(); + if (alignment < pagesize) alignment = pagesize; + size_t aligned_size = ((size + alignment - 1) / alignment) * alignment; + if (aligned_size < size) { + return NULL; + } + size = aligned_size; + + // "actual_size" indicates that the bytes from the returned pointer + // p up to and including (p + actual_size - 1) have been allocated. + if (actual_size) { + *actual_size = size; + } + + // Ask for extra memory if alignment > pagesize + size_t extra = 0; + if (alignment > pagesize) { + extra = alignment - pagesize; + } + + // check to see if we have any memory left + if (physmem_limit != 0 && + ((size + extra) > (physmem_limit - physmem_base))) { + return NULL; + } + + // Note: size + extra does not overflow since: + // size + alignment < (1<<NBITS). + // and extra <= alignment + // therefore size + extra < (1<<NBITS) + void *result = mmap(0, size + extra, PROT_WRITE|PROT_READ, + MAP_SHARED, physmem_fd, physmem_base); + if (result == reinterpret_cast<void*>(MAP_FAILED)) { + return NULL; + } + uintptr_t ptr = reinterpret_cast<uintptr_t>(result); + + // Adjust the return memory so it is aligned + size_t adjust = 0; + if ((ptr & (alignment - 1)) != 0) { + adjust = alignment - (ptr & (alignment - 1)); + } + + // Return the unused virtual memory to the system + if (adjust > 0) { + munmap(reinterpret_cast<void*>(ptr), adjust); + } + if (adjust < extra) { + munmap(reinterpret_cast<void*>(ptr + adjust + size), extra - adjust); + } + + ptr += adjust; + physmem_base += adjust + size; + + return reinterpret_cast<void*>(ptr); +#endif // HAVE_MMAP +} + +void* DefaultSysAllocator::Alloc(size_t size, size_t *actual_size, + size_t alignment) { + for (int i = 0; i < kMaxAllocators; i++) { + if (!failed_[i] && allocs_[i] != NULL) { + void* result = allocs_[i]->Alloc(size, actual_size, alignment); + if (result != NULL) { + return result; + } + failed_[i] = true; + } + } + // After both failed, reset "failed_" to false so that a single failed + // allocation won't make the allocator never work again. + for (int i = 0; i < kMaxAllocators; i++) { + failed_[i] = false; + } + return NULL; +} + +ATTRIBUTE_WEAK ATTRIBUTE_NOINLINE +SysAllocator *tc_get_sysalloc_override(SysAllocator *def) +{ + return def; +} + +static bool system_alloc_inited = false; +void InitSystemAllocators(void) { + MmapSysAllocator *mmap = new (mmap_space.buf) MmapSysAllocator(); + SbrkSysAllocator *sbrk = new (sbrk_space.buf) SbrkSysAllocator(); + + // In 64-bit debug mode, place the mmap allocator first since it + // allocates pointers that do not fit in 32 bits and therefore gives + // us better testing of code's 64-bit correctness. It also leads to + // less false negatives in heap-checking code. (Numbers are less + // likely to look like pointers and therefore the conservative gc in + // the heap-checker is less likely to misinterpret a number as a + // pointer). + DefaultSysAllocator *sdef = new (default_space.buf) DefaultSysAllocator(); + if (kDebugMode && sizeof(void*) > 4) { + sdef->SetChildAllocator(mmap, 0, mmap_name); + sdef->SetChildAllocator(sbrk, 1, sbrk_name); + } else { + sdef->SetChildAllocator(sbrk, 0, sbrk_name); + sdef->SetChildAllocator(mmap, 1, mmap_name); + } + + sys_alloc = tc_get_sysalloc_override(sdef); +} + +void* TCMalloc_SystemAlloc(size_t size, size_t *actual_size, + size_t alignment) { + // Discard requests that overflow + if (size + alignment < size) return NULL; + + SpinLockHolder lock_holder(&spinlock); + + if (!system_alloc_inited) { + InitSystemAllocators(); + system_alloc_inited = true; + } + + // Enforce minimum alignment + if (alignment < sizeof(MemoryAligner)) alignment = sizeof(MemoryAligner); + + size_t actual_size_storage; + if (actual_size == NULL) { + actual_size = &actual_size_storage; + } + + void* result = sys_alloc->Alloc(size, actual_size, alignment); + if (result != NULL) { + CHECK_CONDITION( + CheckAddressBits<kAddressBits>( + reinterpret_cast<uintptr_t>(result) + *actual_size - 1)); + TCMalloc_SystemTaken += *actual_size; + } + return result; +} + +bool TCMalloc_SystemRelease(void* start, size_t length) { +#ifdef MADV_FREE + if (FLAGS_malloc_devmem_start) { + // It's not safe to use MADV_FREE/MADV_DONTNEED if we've been + // mapping /dev/mem for heap memory. + return false; + } + if (FLAGS_malloc_disable_memory_release) return false; + if (pagesize == 0) pagesize = getpagesize(); + const size_t pagemask = pagesize - 1; + + size_t new_start = reinterpret_cast<size_t>(start); + size_t end = new_start + length; + size_t new_end = end; + + // Round up the starting address and round down the ending address + // to be page aligned: + new_start = (new_start + pagesize - 1) & ~pagemask; + new_end = new_end & ~pagemask; + + ASSERT((new_start & pagemask) == 0); + ASSERT((new_end & pagemask) == 0); + ASSERT(new_start >= reinterpret_cast<size_t>(start)); + ASSERT(new_end <= end); + + if (new_end > new_start) { + int result; + do { + result = madvise(reinterpret_cast<char*>(new_start), + new_end - new_start, MADV_FREE); + } while (result == -1 && errno == EAGAIN); + + return result != -1; + } +#endif + return false; +} + +void TCMalloc_SystemCommit(void* start, size_t length) { + // Nothing to do here. TCMalloc_SystemRelease does not alter pages + // such that they need to be re-committed before they can be used by the + // application. +} diff --git a/src/third_party/gperftools-2.5/src/system-alloc.h b/src/third_party/gperftools-2.5/src/system-alloc.h new file mode 100644 index 00000000000..8233f96e5ff --- /dev/null +++ b/src/third_party/gperftools-2.5/src/system-alloc.h @@ -0,0 +1,92 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Sanjay Ghemawat +// +// Routine that uses sbrk/mmap to allocate memory from the system. +// Useful for implementing malloc. + +#ifndef TCMALLOC_SYSTEM_ALLOC_H_ +#define TCMALLOC_SYSTEM_ALLOC_H_ + +#include <config.h> +#include <stddef.h> // for size_t + +class SysAllocator; + +// REQUIRES: "alignment" is a power of two or "0" to indicate default alignment +// +// Allocate and return "N" bytes of zeroed memory. +// +// If actual_bytes is NULL then the returned memory is exactly the +// requested size. If actual bytes is non-NULL then the allocator +// may optionally return more bytes than asked for (i.e. return an +// entire "huge" page if a huge page allocator is in use). +// +// The returned pointer is a multiple of "alignment" if non-zero. The +// returned pointer will always be aligned suitably for holding a +// void*, double, or size_t. In addition, if this platform defines +// CACHELINE_ALIGNED, the return pointer will always be cacheline +// aligned. +// +// Returns NULL when out of memory. +extern PERFTOOLS_DLL_DECL +void* TCMalloc_SystemAlloc(size_t bytes, size_t *actual_bytes, + size_t alignment = 0); + +// This call is a hint to the operating system that the pages +// contained in the specified range of memory will not be used for a +// while, and can be released for use by other processes or the OS. +// Pages which are released in this way may be destroyed (zeroed) by +// the OS. The benefit of this function is that it frees memory for +// use by the system, the cost is that the pages are faulted back into +// the address space next time they are touched, which can impact +// performance. (Only pages fully covered by the memory region will +// be released, partial pages will not.) +// +// Returns false if release failed or not supported. +extern PERFTOOLS_DLL_DECL +bool TCMalloc_SystemRelease(void* start, size_t length); + +// Called to ressurect memory which has been previously released +// to the system via TCMalloc_SystemRelease. An attempt to +// commit a page that is already committed does not cause this +// function to fail. +extern PERFTOOLS_DLL_DECL +void TCMalloc_SystemCommit(void* start, size_t length); + +// The current system allocator. +extern PERFTOOLS_DLL_DECL SysAllocator* sys_alloc; + +// Number of bytes taken from system. +extern PERFTOOLS_DLL_DECL size_t TCMalloc_SystemTaken; + +#endif /* TCMALLOC_SYSTEM_ALLOC_H_ */ diff --git a/src/third_party/gperftools-2.5/src/tcmalloc.cc b/src/third_party/gperftools-2.5/src/tcmalloc.cc new file mode 100644 index 00000000000..f5198f0241b --- /dev/null +++ b/src/third_party/gperftools-2.5/src/tcmalloc.cc @@ -0,0 +1,1837 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Sanjay Ghemawat <opensource@google.com> +// +// A malloc that uses a per-thread cache to satisfy small malloc requests. +// (The time for malloc/free of a small object drops from 300 ns to 50 ns.) +// +// See doc/tcmalloc.html for a high-level +// description of how this malloc works. +// +// SYNCHRONIZATION +// 1. The thread-specific lists are accessed without acquiring any locks. +// This is safe because each such list is only accessed by one thread. +// 2. We have a lock per central free-list, and hold it while manipulating +// the central free list for a particular size. +// 3. The central page allocator is protected by "pageheap_lock". +// 4. The pagemap (which maps from page-number to descriptor), +// can be read without holding any locks, and written while holding +// the "pageheap_lock". +// 5. To improve performance, a subset of the information one can get +// from the pagemap is cached in a data structure, pagemap_cache_, +// that atomically reads and writes its entries. This cache can be +// read and written without locking. +// +// This multi-threaded access to the pagemap is safe for fairly +// subtle reasons. We basically assume that when an object X is +// allocated by thread A and deallocated by thread B, there must +// have been appropriate synchronization in the handoff of object +// X from thread A to thread B. The same logic applies to pagemap_cache_. +// +// THE PAGEID-TO-SIZECLASS CACHE +// Hot PageID-to-sizeclass mappings are held by pagemap_cache_. If this cache +// returns 0 for a particular PageID then that means "no information," not that +// the sizeclass is 0. The cache may have stale information for pages that do +// not hold the beginning of any free()'able object. Staleness is eliminated +// in Populate() for pages with sizeclass > 0 objects, and in do_malloc() and +// do_memalign() for all other relevant pages. +// +// PAGEMAP +// ------- +// Page map contains a mapping from page id to Span. +// +// If Span s occupies pages [p..q], +// pagemap[p] == s +// pagemap[q] == s +// pagemap[p+1..q-1] are undefined +// pagemap[p-1] and pagemap[q+1] are defined: +// NULL if the corresponding page is not yet in the address space. +// Otherwise it points to a Span. This span may be free +// or allocated. If free, it is in one of pageheap's freelist. +// +// TODO: Bias reclamation to larger addresses +// TODO: implement mallinfo/mallopt +// TODO: Better testing +// +// 9/28/2003 (new page-level allocator replaces ptmalloc2): +// * malloc/free of small objects goes from ~300 ns to ~50 ns. +// * allocation of a reasonably complicated struct +// goes from about 1100 ns to about 300 ns. + +#include "config.h" +#include <gperftools/tcmalloc.h> + +#include <errno.h> // for ENOMEM, EINVAL, errno +#if defined HAVE_STDINT_H +#include <stdint.h> +#elif defined HAVE_INTTYPES_H +#include <inttypes.h> +#else +#include <sys/types.h> +#endif +#include <stddef.h> // for size_t, NULL +#include <stdlib.h> // for getenv +#include <string.h> // for strcmp, memset, strlen, etc +#ifdef HAVE_UNISTD_H +#include <unistd.h> // for getpagesize, write, etc +#endif +#include <algorithm> // for max, min +#include <limits> // for numeric_limits +#include <new> // for nothrow_t (ptr only), etc +#include <vector> // for vector + +#include <gperftools/malloc_extension.h> +#include <gperftools/malloc_hook.h> // for MallocHook +#include "base/basictypes.h" // for int64 +#include "base/commandlineflags.h" // for RegisterFlagValidator, etc +#include "base/dynamic_annotations.h" // for RunningOnValgrind +#include "base/spinlock.h" // for SpinLockHolder +#include "central_freelist.h" // for CentralFreeListPadded +#include "common.h" // for StackTrace, kPageShift, etc +#include "internal_logging.h" // for ASSERT, TCMalloc_Printer, etc +#include "linked_list.h" // for SLL_SetNext +#include "malloc_hook-inl.h" // for MallocHook::InvokeNewHook, etc +#include "page_heap.h" // for PageHeap, PageHeap::Stats +#include "page_heap_allocator.h" // for PageHeapAllocator +#include "span.h" // for Span, DLL_Prepend, etc +#include "stack_trace_table.h" // for StackTraceTable +#include "static_vars.h" // for Static +#include "system-alloc.h" // for DumpSystemAllocatorStats, etc +#include "tcmalloc_guard.h" // for TCMallocGuard +#include "thread_cache.h" // for ThreadCache + +#ifdef __clang__ +// clang's apparent focus on code size somehow causes it to ignore +// normal inline directives even for few functions which inlining is +// key for performance. In order to get performance of clang's +// generated code closer to normal, we're forcing inlining via +// attribute. +#define ALWAYS_INLINE inline __attribute__((always_inline)) +#else +#define ALWAYS_INLINE inline +#endif + +#include "maybe_emergency_malloc.h" + +#if (defined(_WIN32) && !defined(__CYGWIN__) && !defined(__CYGWIN32__)) && !defined(WIN32_OVERRIDE_ALLOCATORS) +# define WIN32_DO_PATCHING 1 +#endif + +// Some windows file somewhere (at least on cygwin) #define's small (!) +#undef small + +using STL_NAMESPACE::max; +using STL_NAMESPACE::numeric_limits; +using STL_NAMESPACE::vector; + +#include "libc_override.h" + +using tcmalloc::AlignmentForSize; +using tcmalloc::kLog; +using tcmalloc::kCrash; +using tcmalloc::kCrashWithStats; +using tcmalloc::Log; +using tcmalloc::PageHeap; +using tcmalloc::PageHeapAllocator; +using tcmalloc::SizeMap; +using tcmalloc::Span; +using tcmalloc::StackTrace; +using tcmalloc::Static; +using tcmalloc::ThreadCache; + +DECLARE_double(tcmalloc_release_rate); + +// For windows, the printf we use to report large allocs is +// potentially dangerous: it could cause a malloc that would cause an +// infinite loop. So by default we set the threshold to a huge number +// on windows, so this bad situation will never trigger. You can +// always set TCMALLOC_LARGE_ALLOC_REPORT_THRESHOLD manually if you +// want this functionality. +#ifdef _WIN32 +const int64 kDefaultLargeAllocReportThreshold = static_cast<int64>(1) << 62; +#else +const int64 kDefaultLargeAllocReportThreshold = static_cast<int64>(1) << 30; +#endif +DEFINE_int64(tcmalloc_large_alloc_report_threshold, + EnvToInt64("TCMALLOC_LARGE_ALLOC_REPORT_THRESHOLD", + kDefaultLargeAllocReportThreshold), + "Allocations larger than this value cause a stack " + "trace to be dumped to stderr. The threshold for " + "dumping stack traces is increased by a factor of 1.125 " + "every time we print a message so that the threshold " + "automatically goes up by a factor of ~1000 every 60 " + "messages. This bounds the amount of extra logging " + "generated by this flag. Default value of this flag " + "is very large and therefore you should see no extra " + "logging unless the flag is overridden. Set to 0 to " + "disable reporting entirely."); + + +// We already declared these functions in tcmalloc.h, but we have to +// declare them again to give them an ATTRIBUTE_SECTION: we want to +// put all callers of MallocHook::Invoke* in this module into +// ATTRIBUTE_SECTION(google_malloc) section, so that +// MallocHook::GetCallerStackTrace can function accurately. +#ifndef _WIN32 // windows doesn't have attribute_section, so don't bother +extern "C" { + void* tc_malloc(size_t size) PERFTOOLS_THROW + ATTRIBUTE_SECTION(google_malloc); + void tc_free(void* ptr) PERFTOOLS_THROW + ATTRIBUTE_SECTION(google_malloc); + void* tc_realloc(void* ptr, size_t size) PERFTOOLS_THROW + ATTRIBUTE_SECTION(google_malloc); + void* tc_calloc(size_t nmemb, size_t size) PERFTOOLS_THROW + ATTRIBUTE_SECTION(google_malloc); + void tc_cfree(void* ptr) PERFTOOLS_THROW + ATTRIBUTE_SECTION(google_malloc); + + void* tc_memalign(size_t __alignment, size_t __size) PERFTOOLS_THROW + ATTRIBUTE_SECTION(google_malloc); + int tc_posix_memalign(void** ptr, size_t align, size_t size) PERFTOOLS_THROW + ATTRIBUTE_SECTION(google_malloc); + void* tc_valloc(size_t __size) PERFTOOLS_THROW + ATTRIBUTE_SECTION(google_malloc); + void* tc_pvalloc(size_t __size) PERFTOOLS_THROW + ATTRIBUTE_SECTION(google_malloc); + + void tc_malloc_stats(void) PERFTOOLS_THROW + ATTRIBUTE_SECTION(google_malloc); + int tc_mallopt(int cmd, int value) PERFTOOLS_THROW + ATTRIBUTE_SECTION(google_malloc); +#ifdef HAVE_STRUCT_MALLINFO + struct mallinfo tc_mallinfo(void) PERFTOOLS_THROW + ATTRIBUTE_SECTION(google_malloc); +#endif + + void* tc_new(size_t size) + ATTRIBUTE_SECTION(google_malloc); + void tc_delete(void* p) PERFTOOLS_THROW + ATTRIBUTE_SECTION(google_malloc); + void* tc_newarray(size_t size) + ATTRIBUTE_SECTION(google_malloc); + void tc_deletearray(void* p) PERFTOOLS_THROW + ATTRIBUTE_SECTION(google_malloc); + + // And the nothrow variants of these: + void* tc_new_nothrow(size_t size, const std::nothrow_t&) PERFTOOLS_THROW + ATTRIBUTE_SECTION(google_malloc); + void* tc_newarray_nothrow(size_t size, const std::nothrow_t&) PERFTOOLS_THROW + ATTRIBUTE_SECTION(google_malloc); + // Surprisingly, standard C++ library implementations use a + // nothrow-delete internally. See, eg: + // http://www.dinkumware.com/manuals/?manual=compleat&page=new.html + void tc_delete_nothrow(void* ptr, const std::nothrow_t&) PERFTOOLS_THROW + ATTRIBUTE_SECTION(google_malloc); + void tc_deletearray_nothrow(void* ptr, const std::nothrow_t&) PERFTOOLS_THROW + ATTRIBUTE_SECTION(google_malloc); + + // Some non-standard extensions that we support. + + // This is equivalent to + // OS X: malloc_size() + // glibc: malloc_usable_size() + // Windows: _msize() + size_t tc_malloc_size(void* p) PERFTOOLS_THROW + ATTRIBUTE_SECTION(google_malloc); +} // extern "C" +#endif // #ifndef _WIN32 + +// ----------------------- IMPLEMENTATION ------------------------------- + +static int tc_new_mode = 0; // See tc_set_new_mode(). + +// Routines such as free() and realloc() catch some erroneous pointers +// passed to them, and invoke the below when they do. (An erroneous pointer +// won't be caught if it's within a valid span or a stale span for which +// the pagemap cache has a non-zero sizeclass.) This is a cheap (source-editing +// required) kind of exception handling for these routines. +namespace { +void InvalidFree(void* ptr) { + if (tcmalloc::IsEmergencyPtr(ptr)) { + tcmalloc::EmergencyFree(ptr); + return; + } + Log(kCrash, __FILE__, __LINE__, "Attempt to free invalid pointer", ptr); +} + +size_t InvalidGetSizeForRealloc(const void* old_ptr) { + Log(kCrash, __FILE__, __LINE__, + "Attempt to realloc invalid pointer", old_ptr); + return 0; +} + +size_t InvalidGetAllocatedSize(const void* ptr) { + Log(kCrash, __FILE__, __LINE__, + "Attempt to get the size of an invalid pointer", ptr); + return 0; +} +} // unnamed namespace + +// Extract interesting stats +struct TCMallocStats { + uint64_t thread_bytes; // Bytes in thread caches + uint64_t central_bytes; // Bytes in central cache + uint64_t transfer_bytes; // Bytes in central transfer cache + uint64_t metadata_bytes; // Bytes alloced for metadata + PageHeap::Stats pageheap; // Stats from page heap +}; + +// Get stats into "r". Also, if class_count != NULL, class_count[k] +// will be set to the total number of objects of size class k in the +// central cache, transfer cache, and per-thread caches. If small_spans +// is non-NULL, it is filled. Same for large_spans. +static void ExtractStats(TCMallocStats* r, uint64_t* class_count, + PageHeap::SmallSpanStats* small_spans, + PageHeap::LargeSpanStats* large_spans) { + r->central_bytes = 0; + r->transfer_bytes = 0; + for (int cl = 0; cl < kNumClasses; ++cl) { + const int length = Static::central_cache()[cl].length(); + const int tc_length = Static::central_cache()[cl].tc_length(); + const size_t cache_overhead = Static::central_cache()[cl].OverheadBytes(); + const size_t size = static_cast<uint64_t>( + Static::sizemap()->ByteSizeForClass(cl)); + r->central_bytes += (size * length) + cache_overhead; + r->transfer_bytes += (size * tc_length); + if (class_count) { + // Sum the lengths of all per-class freelists, except the per-thread + // freelists, which get counted when we call GetThreadStats(), below. + class_count[cl] = length + tc_length; + } + + } + + // Add stats from per-thread heaps + r->thread_bytes = 0; + { // scope + SpinLockHolder h(Static::pageheap_lock()); + ThreadCache::GetThreadStats(&r->thread_bytes, class_count); + r->metadata_bytes = tcmalloc::metadata_system_bytes(); + r->pageheap = Static::pageheap()->stats(); + if (small_spans != NULL) { + Static::pageheap()->GetSmallSpanStats(small_spans); + } + if (large_spans != NULL) { + Static::pageheap()->GetLargeSpanStats(large_spans); + } + } +} + +static double PagesToMiB(uint64_t pages) { + return (pages << kPageShift) / 1048576.0; +} + +// WRITE stats to "out" +static void DumpStats(TCMalloc_Printer* out, int level) { + TCMallocStats stats; + uint64_t class_count[kNumClasses]; + PageHeap::SmallSpanStats small; + PageHeap::LargeSpanStats large; + if (level >= 2) { + ExtractStats(&stats, class_count, &small, &large); + } else { + ExtractStats(&stats, NULL, NULL, NULL); + } + + static const double MiB = 1048576.0; + + const uint64_t virtual_memory_used = (stats.pageheap.system_bytes + + stats.metadata_bytes); + const uint64_t physical_memory_used = (virtual_memory_used + - stats.pageheap.unmapped_bytes); + const uint64_t bytes_in_use_by_app = (physical_memory_used + - stats.metadata_bytes + - stats.pageheap.free_bytes + - stats.central_bytes + - stats.transfer_bytes + - stats.thread_bytes); + +#ifdef TCMALLOC_SMALL_BUT_SLOW + out->printf( + "NOTE: SMALL MEMORY MODEL IS IN USE, PERFORMANCE MAY SUFFER.\n"); +#endif + out->printf( + "------------------------------------------------\n" + "MALLOC: %12" PRIu64 " (%7.1f MiB) Bytes in use by application\n" + "MALLOC: + %12" PRIu64 " (%7.1f MiB) Bytes in page heap freelist\n" + "MALLOC: + %12" PRIu64 " (%7.1f MiB) Bytes in central cache freelist\n" + "MALLOC: + %12" PRIu64 " (%7.1f MiB) Bytes in transfer cache freelist\n" + "MALLOC: + %12" PRIu64 " (%7.1f MiB) Bytes in thread cache freelists\n" + "MALLOC: + %12" PRIu64 " (%7.1f MiB) Bytes in malloc metadata\n" + "MALLOC: ------------\n" + "MALLOC: = %12" PRIu64 " (%7.1f MiB) Actual memory used (physical + swap)\n" + "MALLOC: + %12" PRIu64 " (%7.1f MiB) Bytes released to OS (aka unmapped)\n" + "MALLOC: ------------\n" + "MALLOC: = %12" PRIu64 " (%7.1f MiB) Virtual address space used\n" + "MALLOC:\n" + "MALLOC: %12" PRIu64 " Spans in use\n" + "MALLOC: %12" PRIu64 " Thread heaps in use\n" + "MALLOC: %12" PRIu64 " Tcmalloc page size\n" + "------------------------------------------------\n" + "Call ReleaseFreeMemory() to release freelist memory to the OS" + " (via madvise()).\n" + "Bytes released to the OS take up virtual address space" + " but no physical memory.\n", + bytes_in_use_by_app, bytes_in_use_by_app / MiB, + stats.pageheap.free_bytes, stats.pageheap.free_bytes / MiB, + stats.central_bytes, stats.central_bytes / MiB, + stats.transfer_bytes, stats.transfer_bytes / MiB, + stats.thread_bytes, stats.thread_bytes / MiB, + stats.metadata_bytes, stats.metadata_bytes / MiB, + physical_memory_used, physical_memory_used / MiB, + stats.pageheap.unmapped_bytes, stats.pageheap.unmapped_bytes / MiB, + virtual_memory_used, virtual_memory_used / MiB, + uint64_t(Static::span_allocator()->inuse()), + uint64_t(ThreadCache::HeapsInUse()), + uint64_t(kPageSize)); + + if (level >= 2) { + out->printf("------------------------------------------------\n"); + out->printf("Total size of freelists for per-thread caches,\n"); + out->printf("transfer cache, and central cache, by size class\n"); + out->printf("------------------------------------------------\n"); + uint64_t cumulative = 0; + for (int cl = 0; cl < kNumClasses; ++cl) { + if (class_count[cl] > 0) { + uint64_t class_bytes = + class_count[cl] * Static::sizemap()->ByteSizeForClass(cl); + cumulative += class_bytes; + out->printf("class %3d [ %8" PRIuS " bytes ] : " + "%8" PRIu64 " objs; %5.1f MiB; %5.1f cum MiB\n", + cl, Static::sizemap()->ByteSizeForClass(cl), + class_count[cl], + class_bytes / MiB, + cumulative / MiB); + } + } + + // append page heap info + int nonempty_sizes = 0; + for (int s = 0; s < kMaxPages; s++) { + if (small.normal_length[s] + small.returned_length[s] > 0) { + nonempty_sizes++; + } + } + out->printf("------------------------------------------------\n"); + out->printf("PageHeap: %d sizes; %6.1f MiB free; %6.1f MiB unmapped\n", + nonempty_sizes, stats.pageheap.free_bytes / MiB, + stats.pageheap.unmapped_bytes / MiB); + out->printf("------------------------------------------------\n"); + uint64_t total_normal = 0; + uint64_t total_returned = 0; + for (int s = 0; s < kMaxPages; s++) { + const int n_length = small.normal_length[s]; + const int r_length = small.returned_length[s]; + if (n_length + r_length > 0) { + uint64_t n_pages = s * n_length; + uint64_t r_pages = s * r_length; + total_normal += n_pages; + total_returned += r_pages; + out->printf("%6u pages * %6u spans ~ %6.1f MiB; %6.1f MiB cum" + "; unmapped: %6.1f MiB; %6.1f MiB cum\n", + s, + (n_length + r_length), + PagesToMiB(n_pages + r_pages), + PagesToMiB(total_normal + total_returned), + PagesToMiB(r_pages), + PagesToMiB(total_returned)); + } + } + + total_normal += large.normal_pages; + total_returned += large.returned_pages; + out->printf(">255 large * %6u spans ~ %6.1f MiB; %6.1f MiB cum" + "; unmapped: %6.1f MiB; %6.1f MiB cum\n", + static_cast<unsigned int>(large.spans), + PagesToMiB(large.normal_pages + large.returned_pages), + PagesToMiB(total_normal + total_returned), + PagesToMiB(large.returned_pages), + PagesToMiB(total_returned)); + } +} + +static void PrintStats(int level) { + const int kBufferSize = 16 << 10; + char* buffer = new char[kBufferSize]; + TCMalloc_Printer printer(buffer, kBufferSize); + DumpStats(&printer, level); + write(STDERR_FILENO, buffer, strlen(buffer)); + delete[] buffer; +} + +static void** DumpHeapGrowthStackTraces() { + // Count how much space we need + int needed_slots = 0; + { + SpinLockHolder h(Static::pageheap_lock()); + for (StackTrace* t = Static::growth_stacks(); + t != NULL; + t = reinterpret_cast<StackTrace*>( + t->stack[tcmalloc::kMaxStackDepth-1])) { + needed_slots += 3 + t->depth; + } + needed_slots += 100; // Slop in case list grows + needed_slots += needed_slots/8; // An extra 12.5% slop + } + + void** result = new void*[needed_slots]; + if (result == NULL) { + Log(kLog, __FILE__, __LINE__, + "tcmalloc: allocation failed for stack trace slots", + needed_slots * sizeof(*result)); + return NULL; + } + + SpinLockHolder h(Static::pageheap_lock()); + int used_slots = 0; + for (StackTrace* t = Static::growth_stacks(); + t != NULL; + t = reinterpret_cast<StackTrace*>( + t->stack[tcmalloc::kMaxStackDepth-1])) { + ASSERT(used_slots < needed_slots); // Need to leave room for terminator + if (used_slots + 3 + t->depth >= needed_slots) { + // No more room + break; + } + + result[used_slots+0] = reinterpret_cast<void*>(static_cast<uintptr_t>(1)); + result[used_slots+1] = reinterpret_cast<void*>(t->size); + result[used_slots+2] = reinterpret_cast<void*>(t->depth); + for (int d = 0; d < t->depth; d++) { + result[used_slots+3+d] = t->stack[d]; + } + used_slots += 3 + t->depth; + } + result[used_slots] = reinterpret_cast<void*>(static_cast<uintptr_t>(0)); + return result; +} + +static void IterateOverRanges(void* arg, MallocExtension::RangeFunction func) { + PageID page = 1; // Some code may assume that page==0 is never used + bool done = false; + while (!done) { + // Accumulate a small number of ranges in a local buffer + static const int kNumRanges = 16; + static base::MallocRange ranges[kNumRanges]; + int n = 0; + { + SpinLockHolder h(Static::pageheap_lock()); + while (n < kNumRanges) { + if (!Static::pageheap()->GetNextRange(page, &ranges[n])) { + done = true; + break; + } else { + uintptr_t limit = ranges[n].address + ranges[n].length; + page = (limit + kPageSize - 1) >> kPageShift; + n++; + } + } + } + + for (int i = 0; i < n; i++) { + (*func)(arg, &ranges[i]); + } + } +} + +// TCMalloc's support for extra malloc interfaces +class TCMallocImplementation : public MallocExtension { + private: + // ReleaseToSystem() might release more than the requested bytes because + // the page heap releases at the span granularity, and spans are of wildly + // different sizes. This member keeps track of the extra bytes bytes + // released so that the app can periodically call ReleaseToSystem() to + // release memory at a constant rate. + // NOTE: Protected by Static::pageheap_lock(). + size_t extra_bytes_released_; + + public: + TCMallocImplementation() + : extra_bytes_released_(0) { + } + + virtual void GetStats(char* buffer, int buffer_length) { + ASSERT(buffer_length > 0); + TCMalloc_Printer printer(buffer, buffer_length); + + // Print level one stats unless lots of space is available + if (buffer_length < 10000) { + DumpStats(&printer, 1); + } else { + DumpStats(&printer, 2); + } + } + + // We may print an extra, tcmalloc-specific warning message here. + virtual void GetHeapSample(MallocExtensionWriter* writer) { + if (FLAGS_tcmalloc_sample_parameter == 0) { + const char* const kWarningMsg = + "%warn\n" + "%warn This heap profile does not have any data in it, because\n" + "%warn the application was run with heap sampling turned off.\n" + "%warn To get useful data from GetHeapSample(), you must\n" + "%warn set the environment variable TCMALLOC_SAMPLE_PARAMETER to\n" + "%warn a positive sampling period, such as 524288.\n" + "%warn\n"; + writer->append(kWarningMsg, strlen(kWarningMsg)); + } + MallocExtension::GetHeapSample(writer); + } + + virtual void** ReadStackTraces(int* sample_period) { + tcmalloc::StackTraceTable table; + { + SpinLockHolder h(Static::pageheap_lock()); + Span* sampled = Static::sampled_objects(); + for (Span* s = sampled->next; s != sampled; s = s->next) { + table.AddTrace(*reinterpret_cast<StackTrace*>(s->objects)); + } + } + *sample_period = ThreadCache::GetCache()->GetSamplePeriod(); + return table.ReadStackTracesAndClear(); // grabs and releases pageheap_lock + } + + virtual void** ReadHeapGrowthStackTraces() { + return DumpHeapGrowthStackTraces(); + } + + virtual size_t GetThreadCacheSize() { + ThreadCache* tc = ThreadCache::GetCacheIfPresent(); + if (!tc) + return 0; + return tc->Size(); + } + + virtual void MarkThreadTemporarilyIdle() { + ThreadCache::BecomeTemporarilyIdle(); + } + + virtual void Ranges(void* arg, RangeFunction func) { + IterateOverRanges(arg, func); + } + + virtual bool GetNumericProperty(const char* name, size_t* value) { + ASSERT(name != NULL); + + if (strcmp(name, "generic.current_allocated_bytes") == 0) { + TCMallocStats stats; + ExtractStats(&stats, NULL, NULL, NULL); + *value = stats.pageheap.system_bytes + - stats.thread_bytes + - stats.central_bytes + - stats.transfer_bytes + - stats.pageheap.free_bytes + - stats.pageheap.unmapped_bytes; + return true; + } + + if (strcmp(name, "generic.heap_size") == 0) { + TCMallocStats stats; + ExtractStats(&stats, NULL, NULL, NULL); + *value = stats.pageheap.system_bytes; + return true; + } + + if (strcmp(name, "tcmalloc.slack_bytes") == 0) { + // Kept for backwards compatibility. Now defined externally as: + // pageheap_free_bytes + pageheap_unmapped_bytes. + SpinLockHolder l(Static::pageheap_lock()); + PageHeap::Stats stats = Static::pageheap()->stats(); + *value = stats.free_bytes + stats.unmapped_bytes; + return true; + } + + if (strcmp(name, "tcmalloc.central_cache_free_bytes") == 0) { + TCMallocStats stats; + ExtractStats(&stats, NULL, NULL, NULL); + *value = stats.central_bytes; + return true; + } + + if (strcmp(name, "tcmalloc.transfer_cache_free_bytes") == 0) { + TCMallocStats stats; + ExtractStats(&stats, NULL, NULL, NULL); + *value = stats.transfer_bytes; + return true; + } + + if (strcmp(name, "tcmalloc.thread_cache_free_bytes") == 0) { + TCMallocStats stats; + ExtractStats(&stats, NULL, NULL, NULL); + *value = stats.thread_bytes; + return true; + } + + if (strcmp(name, "tcmalloc.pageheap_free_bytes") == 0) { + SpinLockHolder l(Static::pageheap_lock()); + *value = Static::pageheap()->stats().free_bytes; + return true; + } + + if (strcmp(name, "tcmalloc.pageheap_unmapped_bytes") == 0) { + SpinLockHolder l(Static::pageheap_lock()); + *value = Static::pageheap()->stats().unmapped_bytes; + return true; + } + + if (strcmp(name, "tcmalloc.max_total_thread_cache_bytes") == 0) { + SpinLockHolder l(Static::pageheap_lock()); + *value = ThreadCache::overall_thread_cache_size(); + return true; + } + + if (strcmp(name, "tcmalloc.current_total_thread_cache_bytes") == 0) { + TCMallocStats stats; + ExtractStats(&stats, NULL, NULL, NULL); + *value = stats.thread_bytes; + return true; + } + + if (strcmp(name, "tcmalloc.aggressive_memory_decommit") == 0) { + *value = size_t(Static::pageheap()->GetAggressiveDecommit()); + return true; + } + + return false; + } + + virtual bool SetNumericProperty(const char* name, size_t value) { + ASSERT(name != NULL); + + if (strcmp(name, "tcmalloc.max_total_thread_cache_bytes") == 0) { + SpinLockHolder l(Static::pageheap_lock()); + ThreadCache::set_overall_thread_cache_size(value); + return true; + } + + if (strcmp(name, "tcmalloc.aggressive_memory_decommit") == 0) { + Static::pageheap()->SetAggressiveDecommit(value != 0); + return true; + } + + return false; + } + + virtual void MarkThreadIdle() { + ThreadCache::BecomeIdle(); + } + + virtual void MarkThreadBusy(); // Implemented below + + virtual SysAllocator* GetSystemAllocator() { + SpinLockHolder h(Static::pageheap_lock()); + return sys_alloc; + } + + virtual void SetSystemAllocator(SysAllocator* alloc) { + SpinLockHolder h(Static::pageheap_lock()); + sys_alloc = alloc; + } + + virtual void ReleaseToSystem(size_t num_bytes) { + SpinLockHolder h(Static::pageheap_lock()); + if (num_bytes <= extra_bytes_released_) { + // We released too much on a prior call, so don't release any + // more this time. + extra_bytes_released_ = extra_bytes_released_ - num_bytes; + return; + } + num_bytes = num_bytes - extra_bytes_released_; + // num_bytes might be less than one page. If we pass zero to + // ReleaseAtLeastNPages, it won't do anything, so we release a whole + // page now and let extra_bytes_released_ smooth it out over time. + Length num_pages = max<Length>(num_bytes >> kPageShift, 1); + size_t bytes_released = Static::pageheap()->ReleaseAtLeastNPages( + num_pages) << kPageShift; + if (bytes_released > num_bytes) { + extra_bytes_released_ = bytes_released - num_bytes; + } else { + // The PageHeap wasn't able to release num_bytes. Don't try to + // compensate with a big release next time. Specifically, + // ReleaseFreeMemory() calls ReleaseToSystem(LONG_MAX). + extra_bytes_released_ = 0; + } + } + + virtual void SetMemoryReleaseRate(double rate) { + FLAGS_tcmalloc_release_rate = rate; + } + + virtual double GetMemoryReleaseRate() { + return FLAGS_tcmalloc_release_rate; + } + virtual size_t GetEstimatedAllocatedSize(size_t size) { + if (size <= kMaxSize) { + const size_t cl = Static::sizemap()->SizeClass(size); + const size_t alloc_size = Static::sizemap()->ByteSizeForClass(cl); + return alloc_size; + } else { + return tcmalloc::pages(size) << kPageShift; + } + } + + // This just calls GetSizeWithCallback, but because that's in an + // unnamed namespace, we need to move the definition below it in the + // file. + virtual size_t GetAllocatedSize(const void* ptr); + + // This duplicates some of the logic in GetSizeWithCallback, but is + // faster. This is important on OS X, where this function is called + // on every allocation operation. + virtual Ownership GetOwnership(const void* ptr) { + const PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift; + // The rest of tcmalloc assumes that all allocated pointers use at + // most kAddressBits bits. If ptr doesn't, then it definitely + // wasn't alloacted by tcmalloc. + if ((p >> (kAddressBits - kPageShift)) > 0) { + return kNotOwned; + } + size_t cl = Static::pageheap()->GetSizeClassIfCached(p); + if (cl != 0) { + return kOwned; + } + const Span *span = Static::pageheap()->GetDescriptor(p); + return span ? kOwned : kNotOwned; + } + + virtual void GetFreeListSizes(vector<MallocExtension::FreeListInfo>* v) { + static const char* kCentralCacheType = "tcmalloc.central"; + static const char* kTransferCacheType = "tcmalloc.transfer"; + static const char* kThreadCacheType = "tcmalloc.thread"; + static const char* kPageHeapType = "tcmalloc.page"; + static const char* kPageHeapUnmappedType = "tcmalloc.page_unmapped"; + static const char* kLargeSpanType = "tcmalloc.large"; + static const char* kLargeUnmappedSpanType = "tcmalloc.large_unmapped"; + + v->clear(); + + // central class information + int64 prev_class_size = 0; + for (int cl = 1; cl < kNumClasses; ++cl) { + size_t class_size = Static::sizemap()->ByteSizeForClass(cl); + MallocExtension::FreeListInfo i; + i.min_object_size = prev_class_size + 1; + i.max_object_size = class_size; + i.total_bytes_free = + Static::central_cache()[cl].length() * class_size; + i.type = kCentralCacheType; + v->push_back(i); + + // transfer cache + i.total_bytes_free = + Static::central_cache()[cl].tc_length() * class_size; + i.type = kTransferCacheType; + v->push_back(i); + + prev_class_size = Static::sizemap()->ByteSizeForClass(cl); + } + + // Add stats from per-thread heaps + uint64_t class_count[kNumClasses]; + memset(class_count, 0, sizeof(class_count)); + { + SpinLockHolder h(Static::pageheap_lock()); + uint64_t thread_bytes = 0; + ThreadCache::GetThreadStats(&thread_bytes, class_count); + } + + prev_class_size = 0; + for (int cl = 1; cl < kNumClasses; ++cl) { + MallocExtension::FreeListInfo i; + i.min_object_size = prev_class_size + 1; + i.max_object_size = Static::sizemap()->ByteSizeForClass(cl); + i.total_bytes_free = + class_count[cl] * Static::sizemap()->ByteSizeForClass(cl); + i.type = kThreadCacheType; + v->push_back(i); + } + + // append page heap info + PageHeap::SmallSpanStats small; + PageHeap::LargeSpanStats large; + { + SpinLockHolder h(Static::pageheap_lock()); + Static::pageheap()->GetSmallSpanStats(&small); + Static::pageheap()->GetLargeSpanStats(&large); + } + + // large spans: mapped + MallocExtension::FreeListInfo span_info; + span_info.type = kLargeSpanType; + span_info.max_object_size = (numeric_limits<size_t>::max)(); + span_info.min_object_size = kMaxPages << kPageShift; + span_info.total_bytes_free = large.normal_pages << kPageShift; + v->push_back(span_info); + + // large spans: unmapped + span_info.type = kLargeUnmappedSpanType; + span_info.total_bytes_free = large.returned_pages << kPageShift; + v->push_back(span_info); + + // small spans + for (int s = 1; s < kMaxPages; s++) { + MallocExtension::FreeListInfo i; + i.max_object_size = (s << kPageShift); + i.min_object_size = ((s - 1) << kPageShift); + + i.type = kPageHeapType; + i.total_bytes_free = (s << kPageShift) * small.normal_length[s]; + v->push_back(i); + + i.type = kPageHeapUnmappedType; + i.total_bytes_free = (s << kPageShift) * small.returned_length[s]; + v->push_back(i); + } + } +}; + +// The constructor allocates an object to ensure that initialization +// runs before main(), and therefore we do not have a chance to become +// multi-threaded before initialization. We also create the TSD key +// here. Presumably by the time this constructor runs, glibc is in +// good enough shape to handle pthread_key_create(). +// +// The constructor also takes the opportunity to tell STL to use +// tcmalloc. We want to do this early, before construct time, so +// all user STL allocations go through tcmalloc (which works really +// well for STL). +// +// The destructor prints stats when the program exits. +static int tcmallocguard_refcount = 0; // no lock needed: runs before main() +TCMallocGuard::TCMallocGuard() { + if (tcmallocguard_refcount++ == 0) { + ReplaceSystemAlloc(); // defined in libc_override_*.h + tc_free(tc_malloc(1)); + ThreadCache::InitTSD(); + tc_free(tc_malloc(1)); + // Either we, or debugallocation.cc, or valgrind will control memory + // management. We register our extension if we're the winner. +#ifdef TCMALLOC_USING_DEBUGALLOCATION + // Let debugallocation register its extension. +#else + if (RunningOnValgrind()) { + // Let Valgrind uses its own malloc (so don't register our extension). + } else { + MallocExtension::Register(new TCMallocImplementation); + } +#endif + } +} + +TCMallocGuard::~TCMallocGuard() { + if (--tcmallocguard_refcount == 0) { + const char* env = NULL; + if (!RunningOnValgrind()) { + // Valgrind uses it's own malloc so we cannot do MALLOCSTATS + env = getenv("MALLOCSTATS"); + } + if (env != NULL) { + int level = atoi(env); + if (level < 1) level = 1; + PrintStats(level); + } + } +} +#ifndef WIN32_OVERRIDE_ALLOCATORS +static TCMallocGuard module_enter_exit_hook; +#endif + +//------------------------------------------------------------------- +// Helpers for the exported routines below +//------------------------------------------------------------------- + +static inline bool CheckCachedSizeClass(void *ptr) { + PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift; + size_t cached_value = Static::pageheap()->GetSizeClassIfCached(p); + return cached_value == 0 || + cached_value == Static::pageheap()->GetDescriptor(p)->sizeclass; +} + +static inline void* CheckedMallocResult(void *result) { + ASSERT(result == NULL || CheckCachedSizeClass(result)); + return result; +} + +static inline void* SpanToMallocResult(Span *span) { + Static::pageheap()->CacheSizeClass(span->start, 0); + return + CheckedMallocResult(reinterpret_cast<void*>(span->start << kPageShift)); +} + +static void* DoSampledAllocation(size_t size) { +#ifndef NO_TCMALLOC_SAMPLES + // Grab the stack trace outside the heap lock + StackTrace tmp; + tmp.depth = GetStackTrace(tmp.stack, tcmalloc::kMaxStackDepth, 1); + tmp.size = size; + + SpinLockHolder h(Static::pageheap_lock()); + // Allocate span + Span *span = Static::pageheap()->New(tcmalloc::pages(size == 0 ? 1 : size)); + if (UNLIKELY(span == NULL)) { + return NULL; + } + + // Allocate stack trace + StackTrace *stack = Static::stacktrace_allocator()->New(); + if (UNLIKELY(stack == NULL)) { + // Sampling failed because of lack of memory + return span; + } + *stack = tmp; + span->sample = 1; + span->objects = stack; + tcmalloc::DLL_Prepend(Static::sampled_objects(), span); + + return SpanToMallocResult(span); +#else + abort(); +#endif +} + +namespace { + +typedef void* (*malloc_fn)(void *arg); + +SpinLock set_new_handler_lock(SpinLock::LINKER_INITIALIZED); + +void* handle_oom(malloc_fn retry_fn, + void* retry_arg, + bool from_operator, + bool nothrow) { + if (!from_operator && !tc_new_mode) { + // we're out of memory in C library function (malloc etc) and no + // "new mode" forced on us. Just return NULL + return NULL; + } + // we're OOM in operator new or "new mode" is set. We might have to + // call new_handle and maybe retry allocation. + + for (;;) { + // Get the current new handler. NB: this function is not + // thread-safe. We make a feeble stab at making it so here, but + // this lock only protects against tcmalloc interfering with + // itself, not with other libraries calling set_new_handler. + std::new_handler nh; + { + SpinLockHolder h(&set_new_handler_lock); + nh = std::set_new_handler(0); + (void) std::set_new_handler(nh); + } +#if (defined(__GNUC__) && !defined(__EXCEPTIONS)) || (defined(_HAS_EXCEPTIONS) && !_HAS_EXCEPTIONS) + if (!nh) { + return NULL; + } + // Since exceptions are disabled, we don't really know if new_handler + // failed. Assume it will abort if it fails. + (*nh)(); +#else + // If no new_handler is established, the allocation failed. + if (!nh) { + if (nothrow) { + return NULL; + } + throw std::bad_alloc(); + } + // Otherwise, try the new_handler. If it returns, retry the + // allocation. If it throws std::bad_alloc, fail the allocation. + // if it throws something else, don't interfere. + try { + (*nh)(); + } catch (const std::bad_alloc&) { + if (!nothrow) throw; + return NULL; + } +#endif // (defined(__GNUC__) && !defined(__EXCEPTIONS)) || (defined(_HAS_EXCEPTIONS) && !_HAS_EXCEPTIONS) + + // we get here if new_handler returns successfully. So we retry + // allocation. + void* rv = retry_fn(retry_arg); + if (rv != NULL) { + return rv; + } + + // if allocation failed again we go to next loop iteration + } +} + +// Copy of FLAGS_tcmalloc_large_alloc_report_threshold with +// automatic increases factored in. +static int64_t large_alloc_threshold = + (kPageSize > FLAGS_tcmalloc_large_alloc_report_threshold + ? kPageSize : FLAGS_tcmalloc_large_alloc_report_threshold); + +static void ReportLargeAlloc(Length num_pages, void* result) { + StackTrace stack; + stack.depth = GetStackTrace(stack.stack, tcmalloc::kMaxStackDepth, 1); + + static const int N = 1000; + char buffer[N]; + TCMalloc_Printer printer(buffer, N); + printer.printf("tcmalloc: large alloc %" PRIu64 " bytes == %p @ ", + static_cast<uint64>(num_pages) << kPageShift, + result); + for (int i = 0; i < stack.depth; i++) { + printer.printf(" %p", stack.stack[i]); + } + printer.printf("\n"); + write(STDERR_FILENO, buffer, strlen(buffer)); +} + +void* do_memalign(size_t align, size_t size); + +struct retry_memaligh_data { + size_t align; + size_t size; +}; + +static void *retry_do_memalign(void *arg) { + retry_memaligh_data *data = static_cast<retry_memaligh_data *>(arg); + return do_memalign(data->align, data->size); +} + +static void *maybe_do_cpp_memalign_slow(size_t align, size_t size) { + retry_memaligh_data data; + data.align = align; + data.size = size; + return handle_oom(retry_do_memalign, &data, + false, true); +} + +inline void* do_memalign_or_cpp_memalign(size_t align, size_t size) { + void *rv = do_memalign(align, size); + if (LIKELY(rv != NULL)) { + return rv; + } + return maybe_do_cpp_memalign_slow(align, size); +} + +// Must be called with the page lock held. +inline bool should_report_large(Length num_pages) { + const int64 threshold = large_alloc_threshold; + if (threshold > 0 && num_pages >= (threshold >> kPageShift)) { + // Increase the threshold by 1/8 every time we generate a report. + // We cap the threshold at 8GiB to avoid overflow problems. + large_alloc_threshold = (threshold + threshold/8 < 8ll<<30 + ? threshold + threshold/8 : 8ll<<30); + return true; + } + return false; +} + +// Helper for do_malloc(). +inline void* do_malloc_pages(ThreadCache* heap, size_t size) { + void* result; + bool report_large; + + Length num_pages = tcmalloc::pages(size); + + // NOTE: we're passing original size here as opposed to rounded-up + // size as we do in do_malloc_small. The difference is small here + // (at most 4k out of at least 256k). And not rounding up saves us + // from possibility of overflow, which rounding up could produce. + // + // See https://github.com/gperftools/gperftools/issues/723 + if (heap->SampleAllocation(size)) { + result = DoSampledAllocation(size); + + SpinLockHolder h(Static::pageheap_lock()); + report_large = should_report_large(num_pages); + } else { + SpinLockHolder h(Static::pageheap_lock()); + Span* span = Static::pageheap()->New(num_pages); + result = (UNLIKELY(span == NULL) ? NULL : SpanToMallocResult(span)); + report_large = should_report_large(num_pages); + } + + if (report_large) { + ReportLargeAlloc(num_pages, result); + } + return result; +} + +ALWAYS_INLINE void* do_malloc_small(ThreadCache* heap, size_t size) { + ASSERT(Static::IsInited()); + ASSERT(heap != NULL); + size_t cl = Static::sizemap()->SizeClass(size); + size = Static::sizemap()->class_to_size(cl); + + if (UNLIKELY(heap->SampleAllocation(size))) { + return DoSampledAllocation(size); + } else { + // The common case, and also the simplest. This just pops the + // size-appropriate freelist, after replenishing it if it's empty. + return CheckedMallocResult(heap->Allocate(size, cl)); + } +} + +ALWAYS_INLINE void* do_malloc(size_t size) { + if (ThreadCache::have_tls) { + if (LIKELY(size < ThreadCache::MinSizeForSlowPath())) { + return do_malloc_small(ThreadCache::GetCacheWhichMustBePresent(), size); + } + if (UNLIKELY(ThreadCache::IsUseEmergencyMalloc())) { + return tcmalloc::EmergencyMalloc(size); + } + } + + if (size <= kMaxSize) { + return do_malloc_small(ThreadCache::GetCache(), size); + } else { + return do_malloc_pages(ThreadCache::GetCache(), size); + } +} + +static void *retry_malloc(void* size) { + return do_malloc(reinterpret_cast<size_t>(size)); +} + +ALWAYS_INLINE void* do_malloc_or_cpp_alloc(size_t size) { + void *rv = do_malloc(size); + if (LIKELY(rv != NULL)) { + return rv; + } + return handle_oom(retry_malloc, reinterpret_cast<void *>(size), + false, true); +} + +ALWAYS_INLINE void* do_calloc(size_t n, size_t elem_size) { + // Overflow check + const size_t size = n * elem_size; + if (elem_size != 0 && size / elem_size != n) return NULL; + + void* result = do_malloc_or_cpp_alloc(size); + if (result != NULL) { + memset(result, 0, size); + } + return result; +} + +// If ptr is NULL, do nothing. Otherwise invoke the given function. +inline void free_null_or_invalid(void* ptr, void (*invalid_free_fn)(void*)) { + if (ptr != NULL) { + (*invalid_free_fn)(ptr); + } +} + +// Helper for do_free_with_callback(), below. Inputs: +// ptr is object to be freed +// invalid_free_fn is a function that gets invoked on certain "bad frees" +// heap is the ThreadCache for this thread, or NULL if it isn't known +// heap_must_be_valid is whether heap is known to be non-NULL +// +// This function may only be used after Static::IsInited() is true. +// +// We can usually detect the case where ptr is not pointing to a page that +// tcmalloc is using, and in those cases we invoke invalid_free_fn. +// +// To maximize speed in the common case, we usually get here with +// heap_must_be_valid being a manifest constant equal to true. +ALWAYS_INLINE void do_free_helper(void* ptr, + void (*invalid_free_fn)(void*), + ThreadCache* heap, + bool heap_must_be_valid, + bool use_hint, + size_t size_hint) { + ASSERT((Static::IsInited() && heap != NULL) || !heap_must_be_valid); + if (!heap_must_be_valid && !Static::IsInited()) { + // We called free() before malloc(). This can occur if the + // (system) malloc() is called before tcmalloc is loaded, and then + // free() is called after tcmalloc is loaded (and tc_free has + // replaced free), but before the global constructor has run that + // sets up the tcmalloc data structures. + free_null_or_invalid(ptr, invalid_free_fn); + return; + } + Span* span = NULL; + const PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift; + size_t cl; + if (use_hint && Static::sizemap()->MaybeSizeClass(size_hint, &cl)) { + goto non_zero; + } + + cl = Static::pageheap()->GetSizeClassIfCached(p); + if (UNLIKELY(cl == 0)) { + span = Static::pageheap()->GetDescriptor(p); + if (UNLIKELY(!span)) { + // span can be NULL because the pointer passed in is NULL or invalid + // (not something returned by malloc or friends), or because the + // pointer was allocated with some other allocator besides + // tcmalloc. The latter can happen if tcmalloc is linked in via + // a dynamic library, but is not listed last on the link line. + // In that case, libraries after it on the link line will + // allocate with libc malloc, but free with tcmalloc's free. + free_null_or_invalid(ptr, invalid_free_fn); + return; + } + cl = span->sizeclass; + Static::pageheap()->CacheSizeClass(p, cl); + } + + ASSERT(ptr != NULL); + if (LIKELY(cl != 0)) { + non_zero: + ASSERT(!Static::pageheap()->GetDescriptor(p)->sample); + if (heap_must_be_valid || heap != NULL) { + heap->Deallocate(ptr, cl); + } else { + // Delete directly into central cache + tcmalloc::SLL_SetNext(ptr, NULL); + Static::central_cache()[cl].InsertRange(ptr, ptr, 1); + } + } else { + SpinLockHolder h(Static::pageheap_lock()); + ASSERT(reinterpret_cast<uintptr_t>(ptr) % kPageSize == 0); + ASSERT(span != NULL && span->start == p); + if (span->sample) { + StackTrace* st = reinterpret_cast<StackTrace*>(span->objects); + tcmalloc::DLL_Remove(span); + Static::stacktrace_allocator()->Delete(st); + span->objects = NULL; + } + Static::pageheap()->Delete(span); + } +} + +// Helper for the object deletion (free, delete, etc.). Inputs: +// ptr is object to be freed +// invalid_free_fn is a function that gets invoked on certain "bad frees" +// +// We can usually detect the case where ptr is not pointing to a page that +// tcmalloc is using, and in those cases we invoke invalid_free_fn. +ALWAYS_INLINE void do_free_with_callback(void* ptr, + void (*invalid_free_fn)(void*), + bool use_hint, size_t size_hint) { + ThreadCache* heap = NULL; + heap = ThreadCache::GetCacheIfPresent(); + if (LIKELY(heap)) { + do_free_helper(ptr, invalid_free_fn, heap, true, use_hint, size_hint); + } else { + do_free_helper(ptr, invalid_free_fn, heap, false, use_hint, size_hint); + } +} + +// The default "do_free" that uses the default callback. +ALWAYS_INLINE void do_free(void* ptr) { + return do_free_with_callback(ptr, &InvalidFree, false, 0); +} + +// NOTE: some logic here is duplicated in GetOwnership (above), for +// speed. If you change this function, look at that one too. +inline size_t GetSizeWithCallback(const void* ptr, + size_t (*invalid_getsize_fn)(const void*)) { + if (ptr == NULL) + return 0; + const PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift; + size_t cl = Static::pageheap()->GetSizeClassIfCached(p); + if (cl != 0) { + return Static::sizemap()->ByteSizeForClass(cl); + } else { + const Span *span = Static::pageheap()->GetDescriptor(p); + if (UNLIKELY(span == NULL)) { // means we do not own this memory + return (*invalid_getsize_fn)(ptr); + } else if (span->sizeclass != 0) { + Static::pageheap()->CacheSizeClass(p, span->sizeclass); + return Static::sizemap()->ByteSizeForClass(span->sizeclass); + } else { + return span->length << kPageShift; + } + } +} + +// This lets you call back to a given function pointer if ptr is invalid. +// It is used primarily by windows code which wants a specialized callback. +ALWAYS_INLINE void* do_realloc_with_callback( + void* old_ptr, size_t new_size, + void (*invalid_free_fn)(void*), + size_t (*invalid_get_size_fn)(const void*)) { + // Get the size of the old entry + const size_t old_size = GetSizeWithCallback(old_ptr, invalid_get_size_fn); + + // Reallocate if the new size is larger than the old size, + // or if the new size is significantly smaller than the old size. + // We do hysteresis to avoid resizing ping-pongs: + // . If we need to grow, grow to max(new_size, old_size * 1.X) + // . Don't shrink unless new_size < old_size * 0.Y + // X and Y trade-off time for wasted space. For now we do 1.25 and 0.5. + const size_t lower_bound_to_grow = old_size + old_size / 4ul; + const size_t upper_bound_to_shrink = old_size / 2ul; + if ((new_size > old_size) || (new_size < upper_bound_to_shrink)) { + // Need to reallocate. + void* new_ptr = NULL; + + if (new_size > old_size && new_size < lower_bound_to_grow) { + new_ptr = do_malloc_or_cpp_alloc(lower_bound_to_grow); + } + if (new_ptr == NULL) { + // Either new_size is not a tiny increment, or last do_malloc failed. + new_ptr = do_malloc_or_cpp_alloc(new_size); + } + if (UNLIKELY(new_ptr == NULL)) { + return NULL; + } + MallocHook::InvokeNewHook(new_ptr, new_size); + memcpy(new_ptr, old_ptr, ((old_size < new_size) ? old_size : new_size)); + MallocHook::InvokeDeleteHook(old_ptr); + // We could use a variant of do_free() that leverages the fact + // that we already know the sizeclass of old_ptr. The benefit + // would be small, so don't bother. + do_free_with_callback(old_ptr, invalid_free_fn, false, 0); + return new_ptr; + } else { + // We still need to call hooks to report the updated size: + MallocHook::InvokeDeleteHook(old_ptr); + MallocHook::InvokeNewHook(old_ptr, new_size); + return old_ptr; + } +} + +ALWAYS_INLINE void* do_realloc(void* old_ptr, size_t new_size) { + return do_realloc_with_callback(old_ptr, new_size, + &InvalidFree, &InvalidGetSizeForRealloc); +} + +// For use by exported routines below that want specific alignments +// +// Note: this code can be slow for alignments > 16, and can +// significantly fragment memory. The expectation is that +// memalign/posix_memalign/valloc/pvalloc will not be invoked very +// often. This requirement simplifies our implementation and allows +// us to tune for expected allocation patterns. +void* do_memalign(size_t align, size_t size) { + ASSERT((align & (align - 1)) == 0); + ASSERT(align > 0); + if (size + align < size) return NULL; // Overflow + + // Fall back to malloc if we would already align this memory access properly. + if (align <= AlignmentForSize(size)) { + void* p = do_malloc(size); + ASSERT((reinterpret_cast<uintptr_t>(p) % align) == 0); + return p; + } + + if (UNLIKELY(Static::pageheap() == NULL)) ThreadCache::InitModule(); + + // Allocate at least one byte to avoid boundary conditions below + if (size == 0) size = 1; + + if (size <= kMaxSize && align < kPageSize) { + // Search through acceptable size classes looking for one with + // enough alignment. This depends on the fact that + // InitSizeClasses() currently produces several size classes that + // are aligned at powers of two. We will waste time and space if + // we miss in the size class array, but that is deemed acceptable + // since memalign() should be used rarely. + int cl = Static::sizemap()->SizeClass(size); + while (cl < kNumClasses && + ((Static::sizemap()->class_to_size(cl) & (align - 1)) != 0)) { + cl++; + } + if (cl < kNumClasses) { + ThreadCache* heap = ThreadCache::GetCache(); + size = Static::sizemap()->class_to_size(cl); + return CheckedMallocResult(heap->Allocate(size, cl)); + } + } + + // We will allocate directly from the page heap + SpinLockHolder h(Static::pageheap_lock()); + + if (align <= kPageSize) { + // Any page-level allocation will be fine + // TODO: We could put the rest of this page in the appropriate + // TODO: cache but it does not seem worth it. + Span* span = Static::pageheap()->New(tcmalloc::pages(size)); + return UNLIKELY(span == NULL) ? NULL : SpanToMallocResult(span); + } + + // Allocate extra pages and carve off an aligned portion + const Length alloc = tcmalloc::pages(size + align); + Span* span = Static::pageheap()->New(alloc); + if (UNLIKELY(span == NULL)) return NULL; + + // Skip starting portion so that we end up aligned + Length skip = 0; + while ((((span->start+skip) << kPageShift) & (align - 1)) != 0) { + skip++; + } + ASSERT(skip < alloc); + if (skip > 0) { + Span* rest = Static::pageheap()->Split(span, skip); + Static::pageheap()->Delete(span); + span = rest; + } + + // Skip trailing portion that we do not need to return + const Length needed = tcmalloc::pages(size); + ASSERT(span->length >= needed); + if (span->length > needed) { + Span* trailer = Static::pageheap()->Split(span, needed); + Static::pageheap()->Delete(trailer); + } + return SpanToMallocResult(span); +} + +// Helpers for use by exported routines below: + +inline void do_malloc_stats() { + PrintStats(1); +} + +inline int do_mallopt(int cmd, int value) { + return 1; // Indicates error +} + +#ifdef HAVE_STRUCT_MALLINFO +inline struct mallinfo do_mallinfo() { + TCMallocStats stats; + ExtractStats(&stats, NULL, NULL, NULL); + + // Just some of the fields are filled in. + struct mallinfo info; + memset(&info, 0, sizeof(info)); + + // Unfortunately, the struct contains "int" field, so some of the + // size values will be truncated. + info.arena = static_cast<int>(stats.pageheap.system_bytes); + info.fsmblks = static_cast<int>(stats.thread_bytes + + stats.central_bytes + + stats.transfer_bytes); + info.fordblks = static_cast<int>(stats.pageheap.free_bytes + + stats.pageheap.unmapped_bytes); + info.uordblks = static_cast<int>(stats.pageheap.system_bytes + - stats.thread_bytes + - stats.central_bytes + - stats.transfer_bytes + - stats.pageheap.free_bytes + - stats.pageheap.unmapped_bytes); + + return info; +} +#endif // HAVE_STRUCT_MALLINFO + +inline void* cpp_alloc(size_t size, bool nothrow) { + void* p = do_malloc(size); + if (LIKELY(p)) { + return p; + } + return handle_oom(retry_malloc, reinterpret_cast<void *>(size), + true, nothrow); +} + +} // end unnamed namespace + +// As promised, the definition of this function, declared above. +size_t TCMallocImplementation::GetAllocatedSize(const void* ptr) { + if (ptr == NULL) + return 0; + ASSERT(TCMallocImplementation::GetOwnership(ptr) + != TCMallocImplementation::kNotOwned); + return GetSizeWithCallback(ptr, &InvalidGetAllocatedSize); +} + +void TCMallocImplementation::MarkThreadBusy() { + // Allocate to force the creation of a thread cache, but avoid + // invoking any hooks. + do_free(do_malloc(0)); +} + +//------------------------------------------------------------------- +// Exported routines +//------------------------------------------------------------------- + +extern "C" PERFTOOLS_DLL_DECL const char* tc_version( + int* major, int* minor, const char** patch) PERFTOOLS_THROW { + if (major) *major = TC_VERSION_MAJOR; + if (minor) *minor = TC_VERSION_MINOR; + if (patch) *patch = TC_VERSION_PATCH; + return TC_VERSION_STRING; +} + +// This function behaves similarly to MSVC's _set_new_mode. +// If flag is 0 (default), calls to malloc will behave normally. +// If flag is 1, calls to malloc will behave like calls to new, +// and the std_new_handler will be invoked on failure. +// Returns the previous mode. +extern "C" PERFTOOLS_DLL_DECL int tc_set_new_mode(int flag) PERFTOOLS_THROW { + int old_mode = tc_new_mode; + tc_new_mode = flag; + return old_mode; +} + +#ifndef TCMALLOC_USING_DEBUGALLOCATION // debugallocation.cc defines its own + +#if defined(__GNUC__) && defined(__ELF__) && !defined(TCMALLOC_NO_ALIASES) +#define TC_ALIAS(name) __attribute__((alias(#name))) +#endif + +// CAVEAT: The code structure below ensures that MallocHook methods are always +// called from the stack frame of the invoked allocation function. +// heap-checker.cc depends on this to start a stack trace from +// the call to the (de)allocation function. + +extern "C" PERFTOOLS_DLL_DECL void* tc_malloc(size_t size) PERFTOOLS_THROW { + void* result = do_malloc_or_cpp_alloc(size); + MallocHook::InvokeNewHook(result, size); + return result; +} + +extern "C" PERFTOOLS_DLL_DECL void tc_free(void* ptr) PERFTOOLS_THROW { + MallocHook::InvokeDeleteHook(ptr); + do_free(ptr); +} + +extern "C" PERFTOOLS_DLL_DECL void tc_free_sized(void *ptr, size_t size) PERFTOOLS_THROW { + if ((reinterpret_cast<uintptr_t>(ptr) & (kPageSize-1)) == 0) { + tc_free(ptr); + return; + } + MallocHook::InvokeDeleteHook(ptr); + do_free_with_callback(ptr, &InvalidFree, true, size); +} + +#ifdef TC_ALIAS + +extern "C" PERFTOOLS_DLL_DECL void tc_delete_sized(void *p, size_t size) throw() + TC_ALIAS(tc_free_sized); +extern "C" PERFTOOLS_DLL_DECL void tc_deletearray_sized(void *p, size_t size) throw() + TC_ALIAS(tc_free_sized); + +#else + +extern "C" PERFTOOLS_DLL_DECL void tc_delete_sized(void *p, size_t size) throw() { + tc_free_sized(p, size); +} +extern "C" PERFTOOLS_DLL_DECL void tc_deletearray_sized(void *p, size_t size) throw() { + tc_free_sized(p, size); +} + +#endif + +extern "C" PERFTOOLS_DLL_DECL void* tc_calloc(size_t n, + size_t elem_size) PERFTOOLS_THROW { + if (ThreadCache::IsUseEmergencyMalloc()) { + return tcmalloc::EmergencyCalloc(n, elem_size); + } + void* result = do_calloc(n, elem_size); + MallocHook::InvokeNewHook(result, n * elem_size); + return result; +} + +extern "C" PERFTOOLS_DLL_DECL void tc_cfree(void* ptr) PERFTOOLS_THROW +#ifdef TC_ALIAS +TC_ALIAS(tc_free); +#else +{ + MallocHook::InvokeDeleteHook(ptr); + do_free(ptr); +} +#endif + +extern "C" PERFTOOLS_DLL_DECL void* tc_realloc(void* old_ptr, + size_t new_size) PERFTOOLS_THROW { + if (old_ptr == NULL) { + void* result = do_malloc_or_cpp_alloc(new_size); + MallocHook::InvokeNewHook(result, new_size); + return result; + } + if (new_size == 0) { + MallocHook::InvokeDeleteHook(old_ptr); + do_free(old_ptr); + return NULL; + } + if (UNLIKELY(tcmalloc::IsEmergencyPtr(old_ptr))) { + return tcmalloc::EmergencyRealloc(old_ptr, new_size); + } + return do_realloc(old_ptr, new_size); +} + +extern "C" PERFTOOLS_DLL_DECL void* tc_new(size_t size) { + void* p = cpp_alloc(size, false); + // We keep this next instruction out of cpp_alloc for a reason: when + // it's in, and new just calls cpp_alloc, the optimizer may fold the + // new call into cpp_alloc, which messes up our whole section-based + // stacktracing (see ATTRIBUTE_SECTION, above). This ensures cpp_alloc + // isn't the last thing this fn calls, and prevents the folding. + MallocHook::InvokeNewHook(p, size); + return p; +} + +extern "C" PERFTOOLS_DLL_DECL void* tc_new_nothrow(size_t size, const std::nothrow_t&) PERFTOOLS_THROW { + void* p = cpp_alloc(size, true); + MallocHook::InvokeNewHook(p, size); + return p; +} + +extern "C" PERFTOOLS_DLL_DECL void tc_delete(void* p) PERFTOOLS_THROW +#ifdef TC_ALIAS +TC_ALIAS(tc_free); +#else +{ + MallocHook::InvokeDeleteHook(p); + do_free(p); +} +#endif + +// Standard C++ library implementations define and use this +// (via ::operator delete(ptr, nothrow)). +// But it's really the same as normal delete, so we just do the same thing. +extern "C" PERFTOOLS_DLL_DECL void tc_delete_nothrow(void* p, const std::nothrow_t&) PERFTOOLS_THROW +#ifdef TC_ALIAS +TC_ALIAS(tc_free); +#else +{ + MallocHook::InvokeDeleteHook(p); + do_free(p); +} +#endif + +extern "C" PERFTOOLS_DLL_DECL void* tc_newarray(size_t size) +#ifdef TC_ALIAS +TC_ALIAS(tc_new); +#else +{ + void* p = cpp_alloc(size, false); + // We keep this next instruction out of cpp_alloc for a reason: when + // it's in, and new just calls cpp_alloc, the optimizer may fold the + // new call into cpp_alloc, which messes up our whole section-based + // stacktracing (see ATTRIBUTE_SECTION, above). This ensures cpp_alloc + // isn't the last thing this fn calls, and prevents the folding. + MallocHook::InvokeNewHook(p, size); + return p; +} +#endif + +extern "C" PERFTOOLS_DLL_DECL void* tc_newarray_nothrow(size_t size, const std::nothrow_t&) + PERFTOOLS_THROW +#ifdef TC_ALIAS +TC_ALIAS(tc_new_nothrow); +#else +{ + void* p = cpp_alloc(size, true); + MallocHook::InvokeNewHook(p, size); + return p; +} +#endif + +extern "C" PERFTOOLS_DLL_DECL void tc_deletearray(void* p) PERFTOOLS_THROW +#ifdef TC_ALIAS +TC_ALIAS(tc_free); +#else +{ + MallocHook::InvokeDeleteHook(p); + do_free(p); +} +#endif + +extern "C" PERFTOOLS_DLL_DECL void tc_deletearray_nothrow(void* p, const std::nothrow_t&) PERFTOOLS_THROW +#ifdef TC_ALIAS +TC_ALIAS(tc_free); +#else +{ + MallocHook::InvokeDeleteHook(p); + do_free(p); +} +#endif + +extern "C" PERFTOOLS_DLL_DECL void* tc_memalign(size_t align, + size_t size) PERFTOOLS_THROW { + void* result = do_memalign_or_cpp_memalign(align, size); + MallocHook::InvokeNewHook(result, size); + return result; +} + +extern "C" PERFTOOLS_DLL_DECL int tc_posix_memalign( + void** result_ptr, size_t align, size_t size) PERFTOOLS_THROW { + if (((align % sizeof(void*)) != 0) || + ((align & (align - 1)) != 0) || + (align == 0)) { + return EINVAL; + } + + void* result = do_memalign_or_cpp_memalign(align, size); + MallocHook::InvokeNewHook(result, size); + if (UNLIKELY(result == NULL)) { + return ENOMEM; + } else { + *result_ptr = result; + return 0; + } +} + +static size_t pagesize = 0; + +extern "C" PERFTOOLS_DLL_DECL void* tc_valloc(size_t size) PERFTOOLS_THROW { + // Allocate page-aligned object of length >= size bytes + if (pagesize == 0) pagesize = getpagesize(); + void* result = do_memalign_or_cpp_memalign(pagesize, size); + MallocHook::InvokeNewHook(result, size); + return result; +} + +extern "C" PERFTOOLS_DLL_DECL void* tc_pvalloc(size_t size) PERFTOOLS_THROW { + // Round up size to a multiple of pagesize + if (pagesize == 0) pagesize = getpagesize(); + if (size == 0) { // pvalloc(0) should allocate one page, according to + size = pagesize; // http://man.free4web.biz/man3/libmpatrol.3.html + } + size = (size + pagesize - 1) & ~(pagesize - 1); + void* result = do_memalign_or_cpp_memalign(pagesize, size); + MallocHook::InvokeNewHook(result, size); + return result; +} + +extern "C" PERFTOOLS_DLL_DECL void tc_malloc_stats(void) PERFTOOLS_THROW { + do_malloc_stats(); +} + +extern "C" PERFTOOLS_DLL_DECL int tc_mallopt(int cmd, int value) PERFTOOLS_THROW { + return do_mallopt(cmd, value); +} + +#ifdef HAVE_STRUCT_MALLINFO +extern "C" PERFTOOLS_DLL_DECL struct mallinfo tc_mallinfo(void) PERFTOOLS_THROW { + return do_mallinfo(); +} +#endif + +extern "C" PERFTOOLS_DLL_DECL size_t tc_malloc_size(void* ptr) PERFTOOLS_THROW { + return MallocExtension::instance()->GetAllocatedSize(ptr); +} + +extern "C" PERFTOOLS_DLL_DECL void* tc_malloc_skip_new_handler(size_t size) PERFTOOLS_THROW { + void* result = do_malloc(size); + MallocHook::InvokeNewHook(result, size); + return result; +} + +#endif // TCMALLOC_USING_DEBUGALLOCATION diff --git a/src/third_party/gperftools-2.5/src/tcmalloc.h b/src/third_party/gperftools-2.5/src/tcmalloc.h new file mode 100644 index 00000000000..2d64f4eb40b --- /dev/null +++ b/src/third_party/gperftools-2.5/src/tcmalloc.h @@ -0,0 +1,70 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2007, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Craig Silverstein <opensource@google.com> +// +// Some obscure memory-allocation routines may not be declared on all +// systems. In those cases, we'll just declare them ourselves. +// This file is meant to be used only internally, for unittests. + +#include <config.h> + +#ifndef _XOPEN_SOURCE +# define _XOPEN_SOURCE 600 // for posix_memalign +#endif +#include <stdlib.h> // for posix_memalign +// FreeBSD has malloc.h, but complains if you use it +#if defined(HAVE_MALLOC_H) && !defined(__FreeBSD__) +#include <malloc.h> // for memalign, valloc, pvalloc +#endif + +// __THROW is defined in glibc systems. It means, counter-intuitively, +// "This function will never throw an exception." It's an optional +// optimization tool, but we may need to use it to match glibc prototypes. +#ifndef __THROW // I guess we're not on a glibc system +# define __THROW // __THROW is just an optimization, so ok to make it "" +#endif + +#if !HAVE_CFREE_SYMBOL +extern "C" void cfree(void* ptr) __THROW; +#endif +#if !HAVE_POSIX_MEMALIGN_SYMBOL +extern "C" int posix_memalign(void** ptr, size_t align, size_t size) __THROW; +#endif +#if !HAVE_MEMALIGN_SYMBOL +extern "C" void* memalign(size_t __alignment, size_t __size) __THROW; +#endif +#if !HAVE_VALLOC_SYMBOL +extern "C" void* valloc(size_t __size) __THROW; +#endif +#if !HAVE_PVALLOC_SYMBOL +extern "C" void* pvalloc(size_t __size) __THROW; +#endif diff --git a/src/third_party/gperftools-2.5/src/tcmalloc_guard.h b/src/third_party/gperftools-2.5/src/tcmalloc_guard.h new file mode 100644 index 00000000000..84952bac2ea --- /dev/null +++ b/src/third_party/gperftools-2.5/src/tcmalloc_guard.h @@ -0,0 +1,49 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Craig Silverstein +// +// We expose the TCMallocGuard class -- which initializes the tcmalloc +// allocator -- so classes that need to be sure tcmalloc is loaded +// before they do stuff -- notably heap-profiler -- can. To use this +// create a static TCMallocGuard instance at the top of a file where +// you need tcmalloc to be initialized before global constructors run. + +#ifndef TCMALLOC_TCMALLOC_GUARD_H_ +#define TCMALLOC_TCMALLOC_GUARD_H_ + +class TCMallocGuard { + public: + TCMallocGuard(); + ~TCMallocGuard(); +}; + +#endif // TCMALLOC_TCMALLOC_GUARD_H_ diff --git a/src/third_party/gperftools-2.5/src/third_party/valgrind.h b/src/third_party/gperftools-2.5/src/third_party/valgrind.h new file mode 100644 index 00000000000..577c59ab0cd --- /dev/null +++ b/src/third_party/gperftools-2.5/src/third_party/valgrind.h @@ -0,0 +1,3924 @@ +/* -*- c -*- + ---------------------------------------------------------------- + + Notice that the following BSD-style license applies to this one + file (valgrind.h) only. The rest of Valgrind is licensed under the + terms of the GNU General Public License, version 2, unless + otherwise indicated. See the COPYING file in the source + distribution for details. + + ---------------------------------------------------------------- + + This file is part of Valgrind, a dynamic binary instrumentation + framework. + + Copyright (C) 2000-2008 Julian Seward. All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. The origin of this software must not be misrepresented; you must + not claim that you wrote the original software. If you use this + software in a product, an acknowledgment in the product + documentation would be appreciated but is not required. + + 3. Altered source versions must be plainly marked as such, and must + not be misrepresented as being the original software. + + 4. The name of the author may not be used to endorse or promote + products derived from this software without specific prior written + permission. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS + OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE + GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + ---------------------------------------------------------------- + + Notice that the above BSD-style license applies to this one file + (valgrind.h) only. The entire rest of Valgrind is licensed under + the terms of the GNU General Public License, version 2. See the + COPYING file in the source distribution for details. + + ---------------------------------------------------------------- +*/ + + +/* This file is for inclusion into client (your!) code. + + You can use these macros to manipulate and query Valgrind's + execution inside your own programs. + + The resulting executables will still run without Valgrind, just a + little bit more slowly than they otherwise would, but otherwise + unchanged. When not running on valgrind, each client request + consumes very few (eg. 7) instructions, so the resulting performance + loss is negligible unless you plan to execute client requests + millions of times per second. Nevertheless, if that is still a + problem, you can compile with the NVALGRIND symbol defined (gcc + -DNVALGRIND) so that client requests are not even compiled in. */ + +#ifndef __VALGRIND_H +#define __VALGRIND_H + +#include <stdarg.h> + +/* Nb: this file might be included in a file compiled with -ansi. So + we can't use C++ style "//" comments nor the "asm" keyword (instead + use "__asm__"). */ + +/* Derive some tags indicating what the target platform is. Note + that in this file we're using the compiler's CPP symbols for + identifying architectures, which are different to the ones we use + within the rest of Valgrind. Note, __powerpc__ is active for both + 32 and 64-bit PPC, whereas __powerpc64__ is only active for the + latter (on Linux, that is). */ +#undef PLAT_x86_linux +#undef PLAT_amd64_linux +#undef PLAT_ppc32_linux +#undef PLAT_ppc64_linux +#undef PLAT_ppc32_aix5 +#undef PLAT_ppc64_aix5 + +#if !defined(_AIX) && defined(__i386__) +# define PLAT_x86_linux 1 +#elif !defined(_AIX) && defined(__x86_64__) +# define PLAT_amd64_linux 1 +#elif !defined(_AIX) && defined(__powerpc__) && !defined(__powerpc64__) +# define PLAT_ppc32_linux 1 +#elif !defined(_AIX) && defined(__powerpc__) && defined(__powerpc64__) +# define PLAT_ppc64_linux 1 +#elif defined(_AIX) && defined(__64BIT__) +# define PLAT_ppc64_aix5 1 +#elif defined(_AIX) && !defined(__64BIT__) +# define PLAT_ppc32_aix5 1 +#endif + + +/* If we're not compiling for our target platform, don't generate + any inline asms. */ +#if !defined(PLAT_x86_linux) && !defined(PLAT_amd64_linux) \ + && !defined(PLAT_ppc32_linux) && !defined(PLAT_ppc64_linux) \ + && !defined(PLAT_ppc32_aix5) && !defined(PLAT_ppc64_aix5) +# if !defined(NVALGRIND) +# define NVALGRIND 1 +# endif +#endif + + +/* ------------------------------------------------------------------ */ +/* ARCHITECTURE SPECIFICS for SPECIAL INSTRUCTIONS. There is nothing */ +/* in here of use to end-users -- skip to the next section. */ +/* ------------------------------------------------------------------ */ + +#if defined(NVALGRIND) + +/* Define NVALGRIND to completely remove the Valgrind magic sequence + from the compiled code (analogous to NDEBUG's effects on + assert()) */ +#define VALGRIND_DO_CLIENT_REQUEST( \ + _zzq_rlval, _zzq_default, _zzq_request, \ + _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5) \ + { \ + (_zzq_rlval) = (_zzq_default); \ + } + +#else /* ! NVALGRIND */ + +/* The following defines the magic code sequences which the JITter + spots and handles magically. Don't look too closely at them as + they will rot your brain. + + The assembly code sequences for all architectures is in this one + file. This is because this file must be stand-alone, and we don't + want to have multiple files. + + For VALGRIND_DO_CLIENT_REQUEST, we must ensure that the default + value gets put in the return slot, so that everything works when + this is executed not under Valgrind. Args are passed in a memory + block, and so there's no intrinsic limit to the number that could + be passed, but it's currently five. + + The macro args are: + _zzq_rlval result lvalue + _zzq_default default value (result returned when running on real CPU) + _zzq_request request code + _zzq_arg1..5 request params + + The other two macros are used to support function wrapping, and are + a lot simpler. VALGRIND_GET_NR_CONTEXT returns the value of the + guest's NRADDR pseudo-register and whatever other information is + needed to safely run the call original from the wrapper: on + ppc64-linux, the R2 value at the divert point is also needed. This + information is abstracted into a user-visible type, OrigFn. + + VALGRIND_CALL_NOREDIR_* behaves the same as the following on the + guest, but guarantees that the branch instruction will not be + redirected: x86: call *%eax, amd64: call *%rax, ppc32/ppc64: + branch-and-link-to-r11. VALGRIND_CALL_NOREDIR is just text, not a + complete inline asm, since it needs to be combined with more magic + inline asm stuff to be useful. +*/ + +/* ------------------------- x86-linux ------------------------- */ + +#if defined(PLAT_x86_linux) + +typedef + struct { + unsigned int nraddr; /* where's the code? */ + } + OrigFn; + +#define __SPECIAL_INSTRUCTION_PREAMBLE \ + "roll $3, %%edi ; roll $13, %%edi\n\t" \ + "roll $29, %%edi ; roll $19, %%edi\n\t" + +#define VALGRIND_DO_CLIENT_REQUEST( \ + _zzq_rlval, _zzq_default, _zzq_request, \ + _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5) \ + { volatile unsigned int _zzq_args[6]; \ + volatile unsigned int _zzq_result; \ + _zzq_args[0] = (unsigned int)(_zzq_request); \ + _zzq_args[1] = (unsigned int)(_zzq_arg1); \ + _zzq_args[2] = (unsigned int)(_zzq_arg2); \ + _zzq_args[3] = (unsigned int)(_zzq_arg3); \ + _zzq_args[4] = (unsigned int)(_zzq_arg4); \ + _zzq_args[5] = (unsigned int)(_zzq_arg5); \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ + /* %EDX = client_request ( %EAX ) */ \ + "xchgl %%ebx,%%ebx" \ + : "=d" (_zzq_result) \ + : "a" (&_zzq_args[0]), "0" (_zzq_default) \ + : "cc", "memory" \ + ); \ + _zzq_rlval = _zzq_result; \ + } + +#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval) \ + { volatile OrigFn* _zzq_orig = &(_zzq_rlval); \ + volatile unsigned int __addr; \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ + /* %EAX = guest_NRADDR */ \ + "xchgl %%ecx,%%ecx" \ + : "=a" (__addr) \ + : \ + : "cc", "memory" \ + ); \ + _zzq_orig->nraddr = __addr; \ + } + +#define VALGRIND_CALL_NOREDIR_EAX \ + __SPECIAL_INSTRUCTION_PREAMBLE \ + /* call-noredir *%EAX */ \ + "xchgl %%edx,%%edx\n\t" +#endif /* PLAT_x86_linux */ + +/* ------------------------ amd64-linux ------------------------ */ + +#if defined(PLAT_amd64_linux) + +typedef + struct { + unsigned long long int nraddr; /* where's the code? */ + } + OrigFn; + +#define __SPECIAL_INSTRUCTION_PREAMBLE \ + "rolq $3, %%rdi ; rolq $13, %%rdi\n\t" \ + "rolq $61, %%rdi ; rolq $51, %%rdi\n\t" + +#define VALGRIND_DO_CLIENT_REQUEST( \ + _zzq_rlval, _zzq_default, _zzq_request, \ + _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5) \ + { volatile unsigned long long int _zzq_args[6]; \ + volatile unsigned long long int _zzq_result; \ + _zzq_args[0] = (unsigned long long int)(_zzq_request); \ + _zzq_args[1] = (unsigned long long int)(_zzq_arg1); \ + _zzq_args[2] = (unsigned long long int)(_zzq_arg2); \ + _zzq_args[3] = (unsigned long long int)(_zzq_arg3); \ + _zzq_args[4] = (unsigned long long int)(_zzq_arg4); \ + _zzq_args[5] = (unsigned long long int)(_zzq_arg5); \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ + /* %RDX = client_request ( %RAX ) */ \ + "xchgq %%rbx,%%rbx" \ + : "=d" (_zzq_result) \ + : "a" (&_zzq_args[0]), "0" (_zzq_default) \ + : "cc", "memory" \ + ); \ + _zzq_rlval = _zzq_result; \ + } + +#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval) \ + { volatile OrigFn* _zzq_orig = &(_zzq_rlval); \ + volatile unsigned long long int __addr; \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ + /* %RAX = guest_NRADDR */ \ + "xchgq %%rcx,%%rcx" \ + : "=a" (__addr) \ + : \ + : "cc", "memory" \ + ); \ + _zzq_orig->nraddr = __addr; \ + } + +#define VALGRIND_CALL_NOREDIR_RAX \ + __SPECIAL_INSTRUCTION_PREAMBLE \ + /* call-noredir *%RAX */ \ + "xchgq %%rdx,%%rdx\n\t" +#endif /* PLAT_amd64_linux */ + +/* ------------------------ ppc32-linux ------------------------ */ + +#if defined(PLAT_ppc32_linux) + +typedef + struct { + unsigned int nraddr; /* where's the code? */ + } + OrigFn; + +#define __SPECIAL_INSTRUCTION_PREAMBLE \ + "rlwinm 0,0,3,0,0 ; rlwinm 0,0,13,0,0\n\t" \ + "rlwinm 0,0,29,0,0 ; rlwinm 0,0,19,0,0\n\t" + +#define VALGRIND_DO_CLIENT_REQUEST( \ + _zzq_rlval, _zzq_default, _zzq_request, \ + _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5) \ + \ + { unsigned int _zzq_args[6]; \ + unsigned int _zzq_result; \ + unsigned int* _zzq_ptr; \ + _zzq_args[0] = (unsigned int)(_zzq_request); \ + _zzq_args[1] = (unsigned int)(_zzq_arg1); \ + _zzq_args[2] = (unsigned int)(_zzq_arg2); \ + _zzq_args[3] = (unsigned int)(_zzq_arg3); \ + _zzq_args[4] = (unsigned int)(_zzq_arg4); \ + _zzq_args[5] = (unsigned int)(_zzq_arg5); \ + _zzq_ptr = _zzq_args; \ + __asm__ volatile("mr 3,%1\n\t" /*default*/ \ + "mr 4,%2\n\t" /*ptr*/ \ + __SPECIAL_INSTRUCTION_PREAMBLE \ + /* %R3 = client_request ( %R4 ) */ \ + "or 1,1,1\n\t" \ + "mr %0,3" /*result*/ \ + : "=b" (_zzq_result) \ + : "b" (_zzq_default), "b" (_zzq_ptr) \ + : "cc", "memory", "r3", "r4"); \ + _zzq_rlval = _zzq_result; \ + } + +#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval) \ + { volatile OrigFn* _zzq_orig = &(_zzq_rlval); \ + unsigned int __addr; \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ + /* %R3 = guest_NRADDR */ \ + "or 2,2,2\n\t" \ + "mr %0,3" \ + : "=b" (__addr) \ + : \ + : "cc", "memory", "r3" \ + ); \ + _zzq_orig->nraddr = __addr; \ + } + +#define VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + __SPECIAL_INSTRUCTION_PREAMBLE \ + /* branch-and-link-to-noredir *%R11 */ \ + "or 3,3,3\n\t" +#endif /* PLAT_ppc32_linux */ + +/* ------------------------ ppc64-linux ------------------------ */ + +#if defined(PLAT_ppc64_linux) + +typedef + struct { + unsigned long long int nraddr; /* where's the code? */ + unsigned long long int r2; /* what tocptr do we need? */ + } + OrigFn; + +#define __SPECIAL_INSTRUCTION_PREAMBLE \ + "rotldi 0,0,3 ; rotldi 0,0,13\n\t" \ + "rotldi 0,0,61 ; rotldi 0,0,51\n\t" + +#define VALGRIND_DO_CLIENT_REQUEST( \ + _zzq_rlval, _zzq_default, _zzq_request, \ + _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5) \ + \ + { unsigned long long int _zzq_args[6]; \ + register unsigned long long int _zzq_result __asm__("r3"); \ + register unsigned long long int* _zzq_ptr __asm__("r4"); \ + _zzq_args[0] = (unsigned long long int)(_zzq_request); \ + _zzq_args[1] = (unsigned long long int)(_zzq_arg1); \ + _zzq_args[2] = (unsigned long long int)(_zzq_arg2); \ + _zzq_args[3] = (unsigned long long int)(_zzq_arg3); \ + _zzq_args[4] = (unsigned long long int)(_zzq_arg4); \ + _zzq_args[5] = (unsigned long long int)(_zzq_arg5); \ + _zzq_ptr = _zzq_args; \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ + /* %R3 = client_request ( %R4 ) */ \ + "or 1,1,1" \ + : "=r" (_zzq_result) \ + : "0" (_zzq_default), "r" (_zzq_ptr) \ + : "cc", "memory"); \ + _zzq_rlval = _zzq_result; \ + } + +#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval) \ + { volatile OrigFn* _zzq_orig = &(_zzq_rlval); \ + register unsigned long long int __addr __asm__("r3"); \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ + /* %R3 = guest_NRADDR */ \ + "or 2,2,2" \ + : "=r" (__addr) \ + : \ + : "cc", "memory" \ + ); \ + _zzq_orig->nraddr = __addr; \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ + /* %R3 = guest_NRADDR_GPR2 */ \ + "or 4,4,4" \ + : "=r" (__addr) \ + : \ + : "cc", "memory" \ + ); \ + _zzq_orig->r2 = __addr; \ + } + +#define VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + __SPECIAL_INSTRUCTION_PREAMBLE \ + /* branch-and-link-to-noredir *%R11 */ \ + "or 3,3,3\n\t" + +#endif /* PLAT_ppc64_linux */ + +/* ------------------------ ppc32-aix5 ------------------------- */ + +#if defined(PLAT_ppc32_aix5) + +typedef + struct { + unsigned int nraddr; /* where's the code? */ + unsigned int r2; /* what tocptr do we need? */ + } + OrigFn; + +#define __SPECIAL_INSTRUCTION_PREAMBLE \ + "rlwinm 0,0,3,0,0 ; rlwinm 0,0,13,0,0\n\t" \ + "rlwinm 0,0,29,0,0 ; rlwinm 0,0,19,0,0\n\t" + +#define VALGRIND_DO_CLIENT_REQUEST( \ + _zzq_rlval, _zzq_default, _zzq_request, \ + _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5) \ + \ + { unsigned int _zzq_args[7]; \ + register unsigned int _zzq_result; \ + register unsigned int* _zzq_ptr; \ + _zzq_args[0] = (unsigned int)(_zzq_request); \ + _zzq_args[1] = (unsigned int)(_zzq_arg1); \ + _zzq_args[2] = (unsigned int)(_zzq_arg2); \ + _zzq_args[3] = (unsigned int)(_zzq_arg3); \ + _zzq_args[4] = (unsigned int)(_zzq_arg4); \ + _zzq_args[5] = (unsigned int)(_zzq_arg5); \ + _zzq_args[6] = (unsigned int)(_zzq_default); \ + _zzq_ptr = _zzq_args; \ + __asm__ volatile("mr 4,%1\n\t" \ + "lwz 3, 24(4)\n\t" \ + __SPECIAL_INSTRUCTION_PREAMBLE \ + /* %R3 = client_request ( %R4 ) */ \ + "or 1,1,1\n\t" \ + "mr %0,3" \ + : "=b" (_zzq_result) \ + : "b" (_zzq_ptr) \ + : "r3", "r4", "cc", "memory"); \ + _zzq_rlval = _zzq_result; \ + } + +#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval) \ + { volatile OrigFn* _zzq_orig = &(_zzq_rlval); \ + register unsigned int __addr; \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ + /* %R3 = guest_NRADDR */ \ + "or 2,2,2\n\t" \ + "mr %0,3" \ + : "=b" (__addr) \ + : \ + : "r3", "cc", "memory" \ + ); \ + _zzq_orig->nraddr = __addr; \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ + /* %R3 = guest_NRADDR_GPR2 */ \ + "or 4,4,4\n\t" \ + "mr %0,3" \ + : "=b" (__addr) \ + : \ + : "r3", "cc", "memory" \ + ); \ + _zzq_orig->r2 = __addr; \ + } + +#define VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + __SPECIAL_INSTRUCTION_PREAMBLE \ + /* branch-and-link-to-noredir *%R11 */ \ + "or 3,3,3\n\t" + +#endif /* PLAT_ppc32_aix5 */ + +/* ------------------------ ppc64-aix5 ------------------------- */ + +#if defined(PLAT_ppc64_aix5) + +typedef + struct { + unsigned long long int nraddr; /* where's the code? */ + unsigned long long int r2; /* what tocptr do we need? */ + } + OrigFn; + +#define __SPECIAL_INSTRUCTION_PREAMBLE \ + "rotldi 0,0,3 ; rotldi 0,0,13\n\t" \ + "rotldi 0,0,61 ; rotldi 0,0,51\n\t" + +#define VALGRIND_DO_CLIENT_REQUEST( \ + _zzq_rlval, _zzq_default, _zzq_request, \ + _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5) \ + \ + { unsigned long long int _zzq_args[7]; \ + register unsigned long long int _zzq_result; \ + register unsigned long long int* _zzq_ptr; \ + _zzq_args[0] = (unsigned int long long)(_zzq_request); \ + _zzq_args[1] = (unsigned int long long)(_zzq_arg1); \ + _zzq_args[2] = (unsigned int long long)(_zzq_arg2); \ + _zzq_args[3] = (unsigned int long long)(_zzq_arg3); \ + _zzq_args[4] = (unsigned int long long)(_zzq_arg4); \ + _zzq_args[5] = (unsigned int long long)(_zzq_arg5); \ + _zzq_args[6] = (unsigned int long long)(_zzq_default); \ + _zzq_ptr = _zzq_args; \ + __asm__ volatile("mr 4,%1\n\t" \ + "ld 3, 48(4)\n\t" \ + __SPECIAL_INSTRUCTION_PREAMBLE \ + /* %R3 = client_request ( %R4 ) */ \ + "or 1,1,1\n\t" \ + "mr %0,3" \ + : "=b" (_zzq_result) \ + : "b" (_zzq_ptr) \ + : "r3", "r4", "cc", "memory"); \ + _zzq_rlval = _zzq_result; \ + } + +#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval) \ + { volatile OrigFn* _zzq_orig = &(_zzq_rlval); \ + register unsigned long long int __addr; \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ + /* %R3 = guest_NRADDR */ \ + "or 2,2,2\n\t" \ + "mr %0,3" \ + : "=b" (__addr) \ + : \ + : "r3", "cc", "memory" \ + ); \ + _zzq_orig->nraddr = __addr; \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ + /* %R3 = guest_NRADDR_GPR2 */ \ + "or 4,4,4\n\t" \ + "mr %0,3" \ + : "=b" (__addr) \ + : \ + : "r3", "cc", "memory" \ + ); \ + _zzq_orig->r2 = __addr; \ + } + +#define VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + __SPECIAL_INSTRUCTION_PREAMBLE \ + /* branch-and-link-to-noredir *%R11 */ \ + "or 3,3,3\n\t" + +#endif /* PLAT_ppc64_aix5 */ + +/* Insert assembly code for other platforms here... */ + +#endif /* NVALGRIND */ + + +/* ------------------------------------------------------------------ */ +/* PLATFORM SPECIFICS for FUNCTION WRAPPING. This is all very */ +/* ugly. It's the least-worst tradeoff I can think of. */ +/* ------------------------------------------------------------------ */ + +/* This section defines magic (a.k.a appalling-hack) macros for doing + guaranteed-no-redirection macros, so as to get from function + wrappers to the functions they are wrapping. The whole point is to + construct standard call sequences, but to do the call itself with a + special no-redirect call pseudo-instruction that the JIT + understands and handles specially. This section is long and + repetitious, and I can't see a way to make it shorter. + + The naming scheme is as follows: + + CALL_FN_{W,v}_{v,W,WW,WWW,WWWW,5W,6W,7W,etc} + + 'W' stands for "word" and 'v' for "void". Hence there are + different macros for calling arity 0, 1, 2, 3, 4, etc, functions, + and for each, the possibility of returning a word-typed result, or + no result. +*/ + +/* Use these to write the name of your wrapper. NOTE: duplicates + VG_WRAP_FUNCTION_Z{U,Z} in pub_tool_redir.h. */ + +#define I_WRAP_SONAME_FNNAME_ZU(soname,fnname) \ + _vgwZU_##soname##_##fnname + +#define I_WRAP_SONAME_FNNAME_ZZ(soname,fnname) \ + _vgwZZ_##soname##_##fnname + +/* Use this macro from within a wrapper function to collect the + context (address and possibly other info) of the original function. + Once you have that you can then use it in one of the CALL_FN_ + macros. The type of the argument _lval is OrigFn. */ +#define VALGRIND_GET_ORIG_FN(_lval) VALGRIND_GET_NR_CONTEXT(_lval) + +/* Derivatives of the main macros below, for calling functions + returning void. */ + +#define CALL_FN_v_v(fnptr) \ + do { volatile unsigned long _junk; \ + CALL_FN_W_v(_junk,fnptr); } while (0) + +#define CALL_FN_v_W(fnptr, arg1) \ + do { volatile unsigned long _junk; \ + CALL_FN_W_W(_junk,fnptr,arg1); } while (0) + +#define CALL_FN_v_WW(fnptr, arg1,arg2) \ + do { volatile unsigned long _junk; \ + CALL_FN_W_WW(_junk,fnptr,arg1,arg2); } while (0) + +#define CALL_FN_v_WWW(fnptr, arg1,arg2,arg3) \ + do { volatile unsigned long _junk; \ + CALL_FN_W_WWW(_junk,fnptr,arg1,arg2,arg3); } while (0) + +/* ------------------------- x86-linux ------------------------- */ + +#if defined(PLAT_x86_linux) + +/* These regs are trashed by the hidden call. No need to mention eax + as gcc can already see that, plus causes gcc to bomb. */ +#define __CALLER_SAVED_REGS /*"eax"*/ "ecx", "edx" + +/* These CALL_FN_ macros assume that on x86-linux, sizeof(unsigned + long) == 4. */ + +#define CALL_FN_W_v(lval, orig) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[1]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + __asm__ volatile( \ + "movl (%%eax), %%eax\n\t" /* target->%eax */ \ + VALGRIND_CALL_NOREDIR_EAX \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_W(lval, orig, arg1) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[2]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + __asm__ volatile( \ + "pushl 4(%%eax)\n\t" \ + "movl (%%eax), %%eax\n\t" /* target->%eax */ \ + VALGRIND_CALL_NOREDIR_EAX \ + "addl $4, %%esp\n" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WW(lval, orig, arg1,arg2) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + __asm__ volatile( \ + "pushl 8(%%eax)\n\t" \ + "pushl 4(%%eax)\n\t" \ + "movl (%%eax), %%eax\n\t" /* target->%eax */ \ + VALGRIND_CALL_NOREDIR_EAX \ + "addl $8, %%esp\n" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[4]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + __asm__ volatile( \ + "pushl 12(%%eax)\n\t" \ + "pushl 8(%%eax)\n\t" \ + "pushl 4(%%eax)\n\t" \ + "movl (%%eax), %%eax\n\t" /* target->%eax */ \ + VALGRIND_CALL_NOREDIR_EAX \ + "addl $12, %%esp\n" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[5]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + __asm__ volatile( \ + "pushl 16(%%eax)\n\t" \ + "pushl 12(%%eax)\n\t" \ + "pushl 8(%%eax)\n\t" \ + "pushl 4(%%eax)\n\t" \ + "movl (%%eax), %%eax\n\t" /* target->%eax */ \ + VALGRIND_CALL_NOREDIR_EAX \ + "addl $16, %%esp\n" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[6]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + __asm__ volatile( \ + "pushl 20(%%eax)\n\t" \ + "pushl 16(%%eax)\n\t" \ + "pushl 12(%%eax)\n\t" \ + "pushl 8(%%eax)\n\t" \ + "pushl 4(%%eax)\n\t" \ + "movl (%%eax), %%eax\n\t" /* target->%eax */ \ + VALGRIND_CALL_NOREDIR_EAX \ + "addl $20, %%esp\n" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[7]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + __asm__ volatile( \ + "pushl 24(%%eax)\n\t" \ + "pushl 20(%%eax)\n\t" \ + "pushl 16(%%eax)\n\t" \ + "pushl 12(%%eax)\n\t" \ + "pushl 8(%%eax)\n\t" \ + "pushl 4(%%eax)\n\t" \ + "movl (%%eax), %%eax\n\t" /* target->%eax */ \ + VALGRIND_CALL_NOREDIR_EAX \ + "addl $24, %%esp\n" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[8]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + __asm__ volatile( \ + "pushl 28(%%eax)\n\t" \ + "pushl 24(%%eax)\n\t" \ + "pushl 20(%%eax)\n\t" \ + "pushl 16(%%eax)\n\t" \ + "pushl 12(%%eax)\n\t" \ + "pushl 8(%%eax)\n\t" \ + "pushl 4(%%eax)\n\t" \ + "movl (%%eax), %%eax\n\t" /* target->%eax */ \ + VALGRIND_CALL_NOREDIR_EAX \ + "addl $28, %%esp\n" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[9]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + __asm__ volatile( \ + "pushl 32(%%eax)\n\t" \ + "pushl 28(%%eax)\n\t" \ + "pushl 24(%%eax)\n\t" \ + "pushl 20(%%eax)\n\t" \ + "pushl 16(%%eax)\n\t" \ + "pushl 12(%%eax)\n\t" \ + "pushl 8(%%eax)\n\t" \ + "pushl 4(%%eax)\n\t" \ + "movl (%%eax), %%eax\n\t" /* target->%eax */ \ + VALGRIND_CALL_NOREDIR_EAX \ + "addl $32, %%esp\n" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[10]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + _argvec[9] = (unsigned long)(arg9); \ + __asm__ volatile( \ + "pushl 36(%%eax)\n\t" \ + "pushl 32(%%eax)\n\t" \ + "pushl 28(%%eax)\n\t" \ + "pushl 24(%%eax)\n\t" \ + "pushl 20(%%eax)\n\t" \ + "pushl 16(%%eax)\n\t" \ + "pushl 12(%%eax)\n\t" \ + "pushl 8(%%eax)\n\t" \ + "pushl 4(%%eax)\n\t" \ + "movl (%%eax), %%eax\n\t" /* target->%eax */ \ + VALGRIND_CALL_NOREDIR_EAX \ + "addl $36, %%esp\n" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[11]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + _argvec[9] = (unsigned long)(arg9); \ + _argvec[10] = (unsigned long)(arg10); \ + __asm__ volatile( \ + "pushl 40(%%eax)\n\t" \ + "pushl 36(%%eax)\n\t" \ + "pushl 32(%%eax)\n\t" \ + "pushl 28(%%eax)\n\t" \ + "pushl 24(%%eax)\n\t" \ + "pushl 20(%%eax)\n\t" \ + "pushl 16(%%eax)\n\t" \ + "pushl 12(%%eax)\n\t" \ + "pushl 8(%%eax)\n\t" \ + "pushl 4(%%eax)\n\t" \ + "movl (%%eax), %%eax\n\t" /* target->%eax */ \ + VALGRIND_CALL_NOREDIR_EAX \ + "addl $40, %%esp\n" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5, \ + arg6,arg7,arg8,arg9,arg10, \ + arg11) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[12]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + _argvec[9] = (unsigned long)(arg9); \ + _argvec[10] = (unsigned long)(arg10); \ + _argvec[11] = (unsigned long)(arg11); \ + __asm__ volatile( \ + "pushl 44(%%eax)\n\t" \ + "pushl 40(%%eax)\n\t" \ + "pushl 36(%%eax)\n\t" \ + "pushl 32(%%eax)\n\t" \ + "pushl 28(%%eax)\n\t" \ + "pushl 24(%%eax)\n\t" \ + "pushl 20(%%eax)\n\t" \ + "pushl 16(%%eax)\n\t" \ + "pushl 12(%%eax)\n\t" \ + "pushl 8(%%eax)\n\t" \ + "pushl 4(%%eax)\n\t" \ + "movl (%%eax), %%eax\n\t" /* target->%eax */ \ + VALGRIND_CALL_NOREDIR_EAX \ + "addl $44, %%esp\n" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5, \ + arg6,arg7,arg8,arg9,arg10, \ + arg11,arg12) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[13]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + _argvec[9] = (unsigned long)(arg9); \ + _argvec[10] = (unsigned long)(arg10); \ + _argvec[11] = (unsigned long)(arg11); \ + _argvec[12] = (unsigned long)(arg12); \ + __asm__ volatile( \ + "pushl 48(%%eax)\n\t" \ + "pushl 44(%%eax)\n\t" \ + "pushl 40(%%eax)\n\t" \ + "pushl 36(%%eax)\n\t" \ + "pushl 32(%%eax)\n\t" \ + "pushl 28(%%eax)\n\t" \ + "pushl 24(%%eax)\n\t" \ + "pushl 20(%%eax)\n\t" \ + "pushl 16(%%eax)\n\t" \ + "pushl 12(%%eax)\n\t" \ + "pushl 8(%%eax)\n\t" \ + "pushl 4(%%eax)\n\t" \ + "movl (%%eax), %%eax\n\t" /* target->%eax */ \ + VALGRIND_CALL_NOREDIR_EAX \ + "addl $48, %%esp\n" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#endif /* PLAT_x86_linux */ + +/* ------------------------ amd64-linux ------------------------ */ + +#if defined(PLAT_amd64_linux) + +/* ARGREGS: rdi rsi rdx rcx r8 r9 (the rest on stack in R-to-L order) */ + +/* These regs are trashed by the hidden call. */ +#define __CALLER_SAVED_REGS /*"rax",*/ "rcx", "rdx", "rsi", \ + "rdi", "r8", "r9", "r10", "r11" + +/* These CALL_FN_ macros assume that on amd64-linux, sizeof(unsigned + long) == 8. */ + +/* NB 9 Sept 07. There is a nasty kludge here in all these CALL_FN_ + macros. In order not to trash the stack redzone, we need to drop + %rsp by 128 before the hidden call, and restore afterwards. The + nastyness is that it is only by luck that the stack still appears + to be unwindable during the hidden call - since then the behaviour + of any routine using this macro does not match what the CFI data + says. Sigh. + + Why is this important? Imagine that a wrapper has a stack + allocated local, and passes to the hidden call, a pointer to it. + Because gcc does not know about the hidden call, it may allocate + that local in the redzone. Unfortunately the hidden call may then + trash it before it comes to use it. So we must step clear of the + redzone, for the duration of the hidden call, to make it safe. + + Probably the same problem afflicts the other redzone-style ABIs too + (ppc64-linux, ppc32-aix5, ppc64-aix5); but for those, the stack is + self describing (none of this CFI nonsense) so at least messing + with the stack pointer doesn't give a danger of non-unwindable + stack. */ + +#define CALL_FN_W_v(lval, orig) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[1]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + __asm__ volatile( \ + "subq $128,%%rsp\n\t" \ + "movq (%%rax), %%rax\n\t" /* target->%rax */ \ + VALGRIND_CALL_NOREDIR_RAX \ + "addq $128,%%rsp\n\t" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_W(lval, orig, arg1) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[2]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + __asm__ volatile( \ + "subq $128,%%rsp\n\t" \ + "movq 8(%%rax), %%rdi\n\t" \ + "movq (%%rax), %%rax\n\t" /* target->%rax */ \ + VALGRIND_CALL_NOREDIR_RAX \ + "addq $128,%%rsp\n\t" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WW(lval, orig, arg1,arg2) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + __asm__ volatile( \ + "subq $128,%%rsp\n\t" \ + "movq 16(%%rax), %%rsi\n\t" \ + "movq 8(%%rax), %%rdi\n\t" \ + "movq (%%rax), %%rax\n\t" /* target->%rax */ \ + VALGRIND_CALL_NOREDIR_RAX \ + "addq $128,%%rsp\n\t" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[4]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + __asm__ volatile( \ + "subq $128,%%rsp\n\t" \ + "movq 24(%%rax), %%rdx\n\t" \ + "movq 16(%%rax), %%rsi\n\t" \ + "movq 8(%%rax), %%rdi\n\t" \ + "movq (%%rax), %%rax\n\t" /* target->%rax */ \ + VALGRIND_CALL_NOREDIR_RAX \ + "addq $128,%%rsp\n\t" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[5]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + __asm__ volatile( \ + "subq $128,%%rsp\n\t" \ + "movq 32(%%rax), %%rcx\n\t" \ + "movq 24(%%rax), %%rdx\n\t" \ + "movq 16(%%rax), %%rsi\n\t" \ + "movq 8(%%rax), %%rdi\n\t" \ + "movq (%%rax), %%rax\n\t" /* target->%rax */ \ + VALGRIND_CALL_NOREDIR_RAX \ + "addq $128,%%rsp\n\t" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[6]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + __asm__ volatile( \ + "subq $128,%%rsp\n\t" \ + "movq 40(%%rax), %%r8\n\t" \ + "movq 32(%%rax), %%rcx\n\t" \ + "movq 24(%%rax), %%rdx\n\t" \ + "movq 16(%%rax), %%rsi\n\t" \ + "movq 8(%%rax), %%rdi\n\t" \ + "movq (%%rax), %%rax\n\t" /* target->%rax */ \ + VALGRIND_CALL_NOREDIR_RAX \ + "addq $128,%%rsp\n\t" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[7]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + __asm__ volatile( \ + "subq $128,%%rsp\n\t" \ + "movq 48(%%rax), %%r9\n\t" \ + "movq 40(%%rax), %%r8\n\t" \ + "movq 32(%%rax), %%rcx\n\t" \ + "movq 24(%%rax), %%rdx\n\t" \ + "movq 16(%%rax), %%rsi\n\t" \ + "movq 8(%%rax), %%rdi\n\t" \ + "movq (%%rax), %%rax\n\t" /* target->%rax */ \ + "addq $128,%%rsp\n\t" \ + VALGRIND_CALL_NOREDIR_RAX \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[8]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + __asm__ volatile( \ + "subq $128,%%rsp\n\t" \ + "pushq 56(%%rax)\n\t" \ + "movq 48(%%rax), %%r9\n\t" \ + "movq 40(%%rax), %%r8\n\t" \ + "movq 32(%%rax), %%rcx\n\t" \ + "movq 24(%%rax), %%rdx\n\t" \ + "movq 16(%%rax), %%rsi\n\t" \ + "movq 8(%%rax), %%rdi\n\t" \ + "movq (%%rax), %%rax\n\t" /* target->%rax */ \ + VALGRIND_CALL_NOREDIR_RAX \ + "addq $8, %%rsp\n" \ + "addq $128,%%rsp\n\t" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[9]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + __asm__ volatile( \ + "subq $128,%%rsp\n\t" \ + "pushq 64(%%rax)\n\t" \ + "pushq 56(%%rax)\n\t" \ + "movq 48(%%rax), %%r9\n\t" \ + "movq 40(%%rax), %%r8\n\t" \ + "movq 32(%%rax), %%rcx\n\t" \ + "movq 24(%%rax), %%rdx\n\t" \ + "movq 16(%%rax), %%rsi\n\t" \ + "movq 8(%%rax), %%rdi\n\t" \ + "movq (%%rax), %%rax\n\t" /* target->%rax */ \ + VALGRIND_CALL_NOREDIR_RAX \ + "addq $16, %%rsp\n" \ + "addq $128,%%rsp\n\t" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[10]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + _argvec[9] = (unsigned long)(arg9); \ + __asm__ volatile( \ + "subq $128,%%rsp\n\t" \ + "pushq 72(%%rax)\n\t" \ + "pushq 64(%%rax)\n\t" \ + "pushq 56(%%rax)\n\t" \ + "movq 48(%%rax), %%r9\n\t" \ + "movq 40(%%rax), %%r8\n\t" \ + "movq 32(%%rax), %%rcx\n\t" \ + "movq 24(%%rax), %%rdx\n\t" \ + "movq 16(%%rax), %%rsi\n\t" \ + "movq 8(%%rax), %%rdi\n\t" \ + "movq (%%rax), %%rax\n\t" /* target->%rax */ \ + VALGRIND_CALL_NOREDIR_RAX \ + "addq $24, %%rsp\n" \ + "addq $128,%%rsp\n\t" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[11]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + _argvec[9] = (unsigned long)(arg9); \ + _argvec[10] = (unsigned long)(arg10); \ + __asm__ volatile( \ + "subq $128,%%rsp\n\t" \ + "pushq 80(%%rax)\n\t" \ + "pushq 72(%%rax)\n\t" \ + "pushq 64(%%rax)\n\t" \ + "pushq 56(%%rax)\n\t" \ + "movq 48(%%rax), %%r9\n\t" \ + "movq 40(%%rax), %%r8\n\t" \ + "movq 32(%%rax), %%rcx\n\t" \ + "movq 24(%%rax), %%rdx\n\t" \ + "movq 16(%%rax), %%rsi\n\t" \ + "movq 8(%%rax), %%rdi\n\t" \ + "movq (%%rax), %%rax\n\t" /* target->%rax */ \ + VALGRIND_CALL_NOREDIR_RAX \ + "addq $32, %%rsp\n" \ + "addq $128,%%rsp\n\t" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10,arg11) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[12]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + _argvec[9] = (unsigned long)(arg9); \ + _argvec[10] = (unsigned long)(arg10); \ + _argvec[11] = (unsigned long)(arg11); \ + __asm__ volatile( \ + "subq $128,%%rsp\n\t" \ + "pushq 88(%%rax)\n\t" \ + "pushq 80(%%rax)\n\t" \ + "pushq 72(%%rax)\n\t" \ + "pushq 64(%%rax)\n\t" \ + "pushq 56(%%rax)\n\t" \ + "movq 48(%%rax), %%r9\n\t" \ + "movq 40(%%rax), %%r8\n\t" \ + "movq 32(%%rax), %%rcx\n\t" \ + "movq 24(%%rax), %%rdx\n\t" \ + "movq 16(%%rax), %%rsi\n\t" \ + "movq 8(%%rax), %%rdi\n\t" \ + "movq (%%rax), %%rax\n\t" /* target->%rax */ \ + VALGRIND_CALL_NOREDIR_RAX \ + "addq $40, %%rsp\n" \ + "addq $128,%%rsp\n\t" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10,arg11,arg12) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[13]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + _argvec[9] = (unsigned long)(arg9); \ + _argvec[10] = (unsigned long)(arg10); \ + _argvec[11] = (unsigned long)(arg11); \ + _argvec[12] = (unsigned long)(arg12); \ + __asm__ volatile( \ + "subq $128,%%rsp\n\t" \ + "pushq 96(%%rax)\n\t" \ + "pushq 88(%%rax)\n\t" \ + "pushq 80(%%rax)\n\t" \ + "pushq 72(%%rax)\n\t" \ + "pushq 64(%%rax)\n\t" \ + "pushq 56(%%rax)\n\t" \ + "movq 48(%%rax), %%r9\n\t" \ + "movq 40(%%rax), %%r8\n\t" \ + "movq 32(%%rax), %%rcx\n\t" \ + "movq 24(%%rax), %%rdx\n\t" \ + "movq 16(%%rax), %%rsi\n\t" \ + "movq 8(%%rax), %%rdi\n\t" \ + "movq (%%rax), %%rax\n\t" /* target->%rax */ \ + VALGRIND_CALL_NOREDIR_RAX \ + "addq $48, %%rsp\n" \ + "addq $128,%%rsp\n\t" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#endif /* PLAT_amd64_linux */ + +/* ------------------------ ppc32-linux ------------------------ */ + +#if defined(PLAT_ppc32_linux) + +/* This is useful for finding out about the on-stack stuff: + + extern int f9 ( int,int,int,int,int,int,int,int,int ); + extern int f10 ( int,int,int,int,int,int,int,int,int,int ); + extern int f11 ( int,int,int,int,int,int,int,int,int,int,int ); + extern int f12 ( int,int,int,int,int,int,int,int,int,int,int,int ); + + int g9 ( void ) { + return f9(11,22,33,44,55,66,77,88,99); + } + int g10 ( void ) { + return f10(11,22,33,44,55,66,77,88,99,110); + } + int g11 ( void ) { + return f11(11,22,33,44,55,66,77,88,99,110,121); + } + int g12 ( void ) { + return f12(11,22,33,44,55,66,77,88,99,110,121,132); + } +*/ + +/* ARGREGS: r3 r4 r5 r6 r7 r8 r9 r10 (the rest on stack somewhere) */ + +/* These regs are trashed by the hidden call. */ +#define __CALLER_SAVED_REGS \ + "lr", "ctr", "xer", \ + "cr0", "cr1", "cr2", "cr3", "cr4", "cr5", "cr6", "cr7", \ + "r0", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", \ + "r11", "r12", "r13" + +/* These CALL_FN_ macros assume that on ppc32-linux, + sizeof(unsigned long) == 4. */ + +#define CALL_FN_W_v(lval, orig) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[1]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "lwz 11,0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr %0,3" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_W(lval, orig, arg1) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[2]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "lwz 3,4(11)\n\t" /* arg1->r3 */ \ + "lwz 11,0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr %0,3" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WW(lval, orig, arg1,arg2) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "lwz 3,4(11)\n\t" /* arg1->r3 */ \ + "lwz 4,8(11)\n\t" \ + "lwz 11,0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr %0,3" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[4]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + _argvec[3] = (unsigned long)arg3; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "lwz 3,4(11)\n\t" /* arg1->r3 */ \ + "lwz 4,8(11)\n\t" \ + "lwz 5,12(11)\n\t" \ + "lwz 11,0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr %0,3" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[5]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + _argvec[3] = (unsigned long)arg3; \ + _argvec[4] = (unsigned long)arg4; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "lwz 3,4(11)\n\t" /* arg1->r3 */ \ + "lwz 4,8(11)\n\t" \ + "lwz 5,12(11)\n\t" \ + "lwz 6,16(11)\n\t" /* arg4->r6 */ \ + "lwz 11,0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr %0,3" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[6]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + _argvec[3] = (unsigned long)arg3; \ + _argvec[4] = (unsigned long)arg4; \ + _argvec[5] = (unsigned long)arg5; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "lwz 3,4(11)\n\t" /* arg1->r3 */ \ + "lwz 4,8(11)\n\t" \ + "lwz 5,12(11)\n\t" \ + "lwz 6,16(11)\n\t" /* arg4->r6 */ \ + "lwz 7,20(11)\n\t" \ + "lwz 11,0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr %0,3" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[7]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + _argvec[3] = (unsigned long)arg3; \ + _argvec[4] = (unsigned long)arg4; \ + _argvec[5] = (unsigned long)arg5; \ + _argvec[6] = (unsigned long)arg6; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "lwz 3,4(11)\n\t" /* arg1->r3 */ \ + "lwz 4,8(11)\n\t" \ + "lwz 5,12(11)\n\t" \ + "lwz 6,16(11)\n\t" /* arg4->r6 */ \ + "lwz 7,20(11)\n\t" \ + "lwz 8,24(11)\n\t" \ + "lwz 11,0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr %0,3" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[8]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + _argvec[3] = (unsigned long)arg3; \ + _argvec[4] = (unsigned long)arg4; \ + _argvec[5] = (unsigned long)arg5; \ + _argvec[6] = (unsigned long)arg6; \ + _argvec[7] = (unsigned long)arg7; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "lwz 3,4(11)\n\t" /* arg1->r3 */ \ + "lwz 4,8(11)\n\t" \ + "lwz 5,12(11)\n\t" \ + "lwz 6,16(11)\n\t" /* arg4->r6 */ \ + "lwz 7,20(11)\n\t" \ + "lwz 8,24(11)\n\t" \ + "lwz 9,28(11)\n\t" \ + "lwz 11,0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr %0,3" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[9]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + _argvec[3] = (unsigned long)arg3; \ + _argvec[4] = (unsigned long)arg4; \ + _argvec[5] = (unsigned long)arg5; \ + _argvec[6] = (unsigned long)arg6; \ + _argvec[7] = (unsigned long)arg7; \ + _argvec[8] = (unsigned long)arg8; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "lwz 3,4(11)\n\t" /* arg1->r3 */ \ + "lwz 4,8(11)\n\t" \ + "lwz 5,12(11)\n\t" \ + "lwz 6,16(11)\n\t" /* arg4->r6 */ \ + "lwz 7,20(11)\n\t" \ + "lwz 8,24(11)\n\t" \ + "lwz 9,28(11)\n\t" \ + "lwz 10,32(11)\n\t" /* arg8->r10 */ \ + "lwz 11,0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr %0,3" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[10]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + _argvec[3] = (unsigned long)arg3; \ + _argvec[4] = (unsigned long)arg4; \ + _argvec[5] = (unsigned long)arg5; \ + _argvec[6] = (unsigned long)arg6; \ + _argvec[7] = (unsigned long)arg7; \ + _argvec[8] = (unsigned long)arg8; \ + _argvec[9] = (unsigned long)arg9; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "addi 1,1,-16\n\t" \ + /* arg9 */ \ + "lwz 3,36(11)\n\t" \ + "stw 3,8(1)\n\t" \ + /* args1-8 */ \ + "lwz 3,4(11)\n\t" /* arg1->r3 */ \ + "lwz 4,8(11)\n\t" \ + "lwz 5,12(11)\n\t" \ + "lwz 6,16(11)\n\t" /* arg4->r6 */ \ + "lwz 7,20(11)\n\t" \ + "lwz 8,24(11)\n\t" \ + "lwz 9,28(11)\n\t" \ + "lwz 10,32(11)\n\t" /* arg8->r10 */ \ + "lwz 11,0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "addi 1,1,16\n\t" \ + "mr %0,3" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[11]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + _argvec[3] = (unsigned long)arg3; \ + _argvec[4] = (unsigned long)arg4; \ + _argvec[5] = (unsigned long)arg5; \ + _argvec[6] = (unsigned long)arg6; \ + _argvec[7] = (unsigned long)arg7; \ + _argvec[8] = (unsigned long)arg8; \ + _argvec[9] = (unsigned long)arg9; \ + _argvec[10] = (unsigned long)arg10; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "addi 1,1,-16\n\t" \ + /* arg10 */ \ + "lwz 3,40(11)\n\t" \ + "stw 3,12(1)\n\t" \ + /* arg9 */ \ + "lwz 3,36(11)\n\t" \ + "stw 3,8(1)\n\t" \ + /* args1-8 */ \ + "lwz 3,4(11)\n\t" /* arg1->r3 */ \ + "lwz 4,8(11)\n\t" \ + "lwz 5,12(11)\n\t" \ + "lwz 6,16(11)\n\t" /* arg4->r6 */ \ + "lwz 7,20(11)\n\t" \ + "lwz 8,24(11)\n\t" \ + "lwz 9,28(11)\n\t" \ + "lwz 10,32(11)\n\t" /* arg8->r10 */ \ + "lwz 11,0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "addi 1,1,16\n\t" \ + "mr %0,3" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10,arg11) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[12]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + _argvec[3] = (unsigned long)arg3; \ + _argvec[4] = (unsigned long)arg4; \ + _argvec[5] = (unsigned long)arg5; \ + _argvec[6] = (unsigned long)arg6; \ + _argvec[7] = (unsigned long)arg7; \ + _argvec[8] = (unsigned long)arg8; \ + _argvec[9] = (unsigned long)arg9; \ + _argvec[10] = (unsigned long)arg10; \ + _argvec[11] = (unsigned long)arg11; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "addi 1,1,-32\n\t" \ + /* arg11 */ \ + "lwz 3,44(11)\n\t" \ + "stw 3,16(1)\n\t" \ + /* arg10 */ \ + "lwz 3,40(11)\n\t" \ + "stw 3,12(1)\n\t" \ + /* arg9 */ \ + "lwz 3,36(11)\n\t" \ + "stw 3,8(1)\n\t" \ + /* args1-8 */ \ + "lwz 3,4(11)\n\t" /* arg1->r3 */ \ + "lwz 4,8(11)\n\t" \ + "lwz 5,12(11)\n\t" \ + "lwz 6,16(11)\n\t" /* arg4->r6 */ \ + "lwz 7,20(11)\n\t" \ + "lwz 8,24(11)\n\t" \ + "lwz 9,28(11)\n\t" \ + "lwz 10,32(11)\n\t" /* arg8->r10 */ \ + "lwz 11,0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "addi 1,1,32\n\t" \ + "mr %0,3" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10,arg11,arg12) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[13]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + _argvec[3] = (unsigned long)arg3; \ + _argvec[4] = (unsigned long)arg4; \ + _argvec[5] = (unsigned long)arg5; \ + _argvec[6] = (unsigned long)arg6; \ + _argvec[7] = (unsigned long)arg7; \ + _argvec[8] = (unsigned long)arg8; \ + _argvec[9] = (unsigned long)arg9; \ + _argvec[10] = (unsigned long)arg10; \ + _argvec[11] = (unsigned long)arg11; \ + _argvec[12] = (unsigned long)arg12; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "addi 1,1,-32\n\t" \ + /* arg12 */ \ + "lwz 3,48(11)\n\t" \ + "stw 3,20(1)\n\t" \ + /* arg11 */ \ + "lwz 3,44(11)\n\t" \ + "stw 3,16(1)\n\t" \ + /* arg10 */ \ + "lwz 3,40(11)\n\t" \ + "stw 3,12(1)\n\t" \ + /* arg9 */ \ + "lwz 3,36(11)\n\t" \ + "stw 3,8(1)\n\t" \ + /* args1-8 */ \ + "lwz 3,4(11)\n\t" /* arg1->r3 */ \ + "lwz 4,8(11)\n\t" \ + "lwz 5,12(11)\n\t" \ + "lwz 6,16(11)\n\t" /* arg4->r6 */ \ + "lwz 7,20(11)\n\t" \ + "lwz 8,24(11)\n\t" \ + "lwz 9,28(11)\n\t" \ + "lwz 10,32(11)\n\t" /* arg8->r10 */ \ + "lwz 11,0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "addi 1,1,32\n\t" \ + "mr %0,3" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#endif /* PLAT_ppc32_linux */ + +/* ------------------------ ppc64-linux ------------------------ */ + +#if defined(PLAT_ppc64_linux) + +/* ARGREGS: r3 r4 r5 r6 r7 r8 r9 r10 (the rest on stack somewhere) */ + +/* These regs are trashed by the hidden call. */ +#define __CALLER_SAVED_REGS \ + "lr", "ctr", "xer", \ + "cr0", "cr1", "cr2", "cr3", "cr4", "cr5", "cr6", "cr7", \ + "r0", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", \ + "r11", "r12", "r13" + +/* These CALL_FN_ macros assume that on ppc64-linux, sizeof(unsigned + long) == 8. */ + +#define CALL_FN_W_v(lval, orig) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+0]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)" /* restore tocptr */ \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_W(lval, orig, arg1) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+1]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)" /* restore tocptr */ \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WW(lval, orig, arg1,arg2) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+2]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)" /* restore tocptr */ \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+3]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)" /* restore tocptr */ \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+4]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)" /* restore tocptr */ \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+5]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 7, 40(11)\n\t" /* arg5->r7 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)" /* restore tocptr */ \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+6]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 7, 40(11)\n\t" /* arg5->r7 */ \ + "ld 8, 48(11)\n\t" /* arg6->r8 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)" /* restore tocptr */ \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+7]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + _argvec[2+7] = (unsigned long)arg7; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 7, 40(11)\n\t" /* arg5->r7 */ \ + "ld 8, 48(11)\n\t" /* arg6->r8 */ \ + "ld 9, 56(11)\n\t" /* arg7->r9 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)" /* restore tocptr */ \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+8]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + _argvec[2+7] = (unsigned long)arg7; \ + _argvec[2+8] = (unsigned long)arg8; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 7, 40(11)\n\t" /* arg5->r7 */ \ + "ld 8, 48(11)\n\t" /* arg6->r8 */ \ + "ld 9, 56(11)\n\t" /* arg7->r9 */ \ + "ld 10, 64(11)\n\t" /* arg8->r10 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)" /* restore tocptr */ \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+9]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + _argvec[2+7] = (unsigned long)arg7; \ + _argvec[2+8] = (unsigned long)arg8; \ + _argvec[2+9] = (unsigned long)arg9; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "addi 1,1,-128\n\t" /* expand stack frame */ \ + /* arg9 */ \ + "ld 3,72(11)\n\t" \ + "std 3,112(1)\n\t" \ + /* args1-8 */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 7, 40(11)\n\t" /* arg5->r7 */ \ + "ld 8, 48(11)\n\t" /* arg6->r8 */ \ + "ld 9, 56(11)\n\t" /* arg7->r9 */ \ + "ld 10, 64(11)\n\t" /* arg8->r10 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)\n\t" /* restore tocptr */ \ + "addi 1,1,128" /* restore frame */ \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+10]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + _argvec[2+7] = (unsigned long)arg7; \ + _argvec[2+8] = (unsigned long)arg8; \ + _argvec[2+9] = (unsigned long)arg9; \ + _argvec[2+10] = (unsigned long)arg10; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "addi 1,1,-128\n\t" /* expand stack frame */ \ + /* arg10 */ \ + "ld 3,80(11)\n\t" \ + "std 3,120(1)\n\t" \ + /* arg9 */ \ + "ld 3,72(11)\n\t" \ + "std 3,112(1)\n\t" \ + /* args1-8 */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 7, 40(11)\n\t" /* arg5->r7 */ \ + "ld 8, 48(11)\n\t" /* arg6->r8 */ \ + "ld 9, 56(11)\n\t" /* arg7->r9 */ \ + "ld 10, 64(11)\n\t" /* arg8->r10 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)\n\t" /* restore tocptr */ \ + "addi 1,1,128" /* restore frame */ \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10,arg11) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+11]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + _argvec[2+7] = (unsigned long)arg7; \ + _argvec[2+8] = (unsigned long)arg8; \ + _argvec[2+9] = (unsigned long)arg9; \ + _argvec[2+10] = (unsigned long)arg10; \ + _argvec[2+11] = (unsigned long)arg11; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "addi 1,1,-144\n\t" /* expand stack frame */ \ + /* arg11 */ \ + "ld 3,88(11)\n\t" \ + "std 3,128(1)\n\t" \ + /* arg10 */ \ + "ld 3,80(11)\n\t" \ + "std 3,120(1)\n\t" \ + /* arg9 */ \ + "ld 3,72(11)\n\t" \ + "std 3,112(1)\n\t" \ + /* args1-8 */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 7, 40(11)\n\t" /* arg5->r7 */ \ + "ld 8, 48(11)\n\t" /* arg6->r8 */ \ + "ld 9, 56(11)\n\t" /* arg7->r9 */ \ + "ld 10, 64(11)\n\t" /* arg8->r10 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)\n\t" /* restore tocptr */ \ + "addi 1,1,144" /* restore frame */ \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10,arg11,arg12) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+12]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + _argvec[2+7] = (unsigned long)arg7; \ + _argvec[2+8] = (unsigned long)arg8; \ + _argvec[2+9] = (unsigned long)arg9; \ + _argvec[2+10] = (unsigned long)arg10; \ + _argvec[2+11] = (unsigned long)arg11; \ + _argvec[2+12] = (unsigned long)arg12; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "addi 1,1,-144\n\t" /* expand stack frame */ \ + /* arg12 */ \ + "ld 3,96(11)\n\t" \ + "std 3,136(1)\n\t" \ + /* arg11 */ \ + "ld 3,88(11)\n\t" \ + "std 3,128(1)\n\t" \ + /* arg10 */ \ + "ld 3,80(11)\n\t" \ + "std 3,120(1)\n\t" \ + /* arg9 */ \ + "ld 3,72(11)\n\t" \ + "std 3,112(1)\n\t" \ + /* args1-8 */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 7, 40(11)\n\t" /* arg5->r7 */ \ + "ld 8, 48(11)\n\t" /* arg6->r8 */ \ + "ld 9, 56(11)\n\t" /* arg7->r9 */ \ + "ld 10, 64(11)\n\t" /* arg8->r10 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)\n\t" /* restore tocptr */ \ + "addi 1,1,144" /* restore frame */ \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#endif /* PLAT_ppc64_linux */ + +/* ------------------------ ppc32-aix5 ------------------------- */ + +#if defined(PLAT_ppc32_aix5) + +/* ARGREGS: r3 r4 r5 r6 r7 r8 r9 r10 (the rest on stack somewhere) */ + +/* These regs are trashed by the hidden call. */ +#define __CALLER_SAVED_REGS \ + "lr", "ctr", "xer", \ + "cr0", "cr1", "cr2", "cr3", "cr4", "cr5", "cr6", "cr7", \ + "r0", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", \ + "r11", "r12", "r13" + +/* Expand the stack frame, copying enough info that unwinding + still works. Trashes r3. */ + +#define VG_EXPAND_FRAME_BY_trashes_r3(_n_fr) \ + "addi 1,1,-" #_n_fr "\n\t" \ + "lwz 3," #_n_fr "(1)\n\t" \ + "stw 3,0(1)\n\t" + +#define VG_CONTRACT_FRAME_BY(_n_fr) \ + "addi 1,1," #_n_fr "\n\t" + +/* These CALL_FN_ macros assume that on ppc32-aix5, sizeof(unsigned + long) == 4. */ + +#define CALL_FN_W_v(lval, orig) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+0]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "stw 2,-8(11)\n\t" /* save tocptr */ \ + "lwz 2,-4(11)\n\t" /* use nraddr's tocptr */ \ + "lwz 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "lwz 2,-8(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_W(lval, orig, arg1) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+1]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "stw 2,-8(11)\n\t" /* save tocptr */ \ + "lwz 2,-4(11)\n\t" /* use nraddr's tocptr */ \ + "lwz 3, 4(11)\n\t" /* arg1->r3 */ \ + "lwz 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "lwz 2,-8(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WW(lval, orig, arg1,arg2) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+2]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "stw 2,-8(11)\n\t" /* save tocptr */ \ + "lwz 2,-4(11)\n\t" /* use nraddr's tocptr */ \ + "lwz 3, 4(11)\n\t" /* arg1->r3 */ \ + "lwz 4, 8(11)\n\t" /* arg2->r4 */ \ + "lwz 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "lwz 2,-8(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+3]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "stw 2,-8(11)\n\t" /* save tocptr */ \ + "lwz 2,-4(11)\n\t" /* use nraddr's tocptr */ \ + "lwz 3, 4(11)\n\t" /* arg1->r3 */ \ + "lwz 4, 8(11)\n\t" /* arg2->r4 */ \ + "lwz 5, 12(11)\n\t" /* arg3->r5 */ \ + "lwz 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "lwz 2,-8(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+4]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "stw 2,-8(11)\n\t" /* save tocptr */ \ + "lwz 2,-4(11)\n\t" /* use nraddr's tocptr */ \ + "lwz 3, 4(11)\n\t" /* arg1->r3 */ \ + "lwz 4, 8(11)\n\t" /* arg2->r4 */ \ + "lwz 5, 12(11)\n\t" /* arg3->r5 */ \ + "lwz 6, 16(11)\n\t" /* arg4->r6 */ \ + "lwz 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "lwz 2,-8(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+5]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "stw 2,-8(11)\n\t" /* save tocptr */ \ + "lwz 2,-4(11)\n\t" /* use nraddr's tocptr */ \ + "lwz 3, 4(11)\n\t" /* arg1->r3 */ \ + "lwz 4, 8(11)\n\t" /* arg2->r4 */ \ + "lwz 5, 12(11)\n\t" /* arg3->r5 */ \ + "lwz 6, 16(11)\n\t" /* arg4->r6 */ \ + "lwz 7, 20(11)\n\t" /* arg5->r7 */ \ + "lwz 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "lwz 2,-8(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+6]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "stw 2,-8(11)\n\t" /* save tocptr */ \ + "lwz 2,-4(11)\n\t" /* use nraddr's tocptr */ \ + "lwz 3, 4(11)\n\t" /* arg1->r3 */ \ + "lwz 4, 8(11)\n\t" /* arg2->r4 */ \ + "lwz 5, 12(11)\n\t" /* arg3->r5 */ \ + "lwz 6, 16(11)\n\t" /* arg4->r6 */ \ + "lwz 7, 20(11)\n\t" /* arg5->r7 */ \ + "lwz 8, 24(11)\n\t" /* arg6->r8 */ \ + "lwz 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "lwz 2,-8(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+7]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + _argvec[2+7] = (unsigned long)arg7; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "stw 2,-8(11)\n\t" /* save tocptr */ \ + "lwz 2,-4(11)\n\t" /* use nraddr's tocptr */ \ + "lwz 3, 4(11)\n\t" /* arg1->r3 */ \ + "lwz 4, 8(11)\n\t" /* arg2->r4 */ \ + "lwz 5, 12(11)\n\t" /* arg3->r5 */ \ + "lwz 6, 16(11)\n\t" /* arg4->r6 */ \ + "lwz 7, 20(11)\n\t" /* arg5->r7 */ \ + "lwz 8, 24(11)\n\t" /* arg6->r8 */ \ + "lwz 9, 28(11)\n\t" /* arg7->r9 */ \ + "lwz 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "lwz 2,-8(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+8]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + _argvec[2+7] = (unsigned long)arg7; \ + _argvec[2+8] = (unsigned long)arg8; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "stw 2,-8(11)\n\t" /* save tocptr */ \ + "lwz 2,-4(11)\n\t" /* use nraddr's tocptr */ \ + "lwz 3, 4(11)\n\t" /* arg1->r3 */ \ + "lwz 4, 8(11)\n\t" /* arg2->r4 */ \ + "lwz 5, 12(11)\n\t" /* arg3->r5 */ \ + "lwz 6, 16(11)\n\t" /* arg4->r6 */ \ + "lwz 7, 20(11)\n\t" /* arg5->r7 */ \ + "lwz 8, 24(11)\n\t" /* arg6->r8 */ \ + "lwz 9, 28(11)\n\t" /* arg7->r9 */ \ + "lwz 10, 32(11)\n\t" /* arg8->r10 */ \ + "lwz 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "lwz 2,-8(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+9]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + _argvec[2+7] = (unsigned long)arg7; \ + _argvec[2+8] = (unsigned long)arg8; \ + _argvec[2+9] = (unsigned long)arg9; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "stw 2,-8(11)\n\t" /* save tocptr */ \ + "lwz 2,-4(11)\n\t" /* use nraddr's tocptr */ \ + VG_EXPAND_FRAME_BY_trashes_r3(64) \ + /* arg9 */ \ + "lwz 3,36(11)\n\t" \ + "stw 3,56(1)\n\t" \ + /* args1-8 */ \ + "lwz 3, 4(11)\n\t" /* arg1->r3 */ \ + "lwz 4, 8(11)\n\t" /* arg2->r4 */ \ + "lwz 5, 12(11)\n\t" /* arg3->r5 */ \ + "lwz 6, 16(11)\n\t" /* arg4->r6 */ \ + "lwz 7, 20(11)\n\t" /* arg5->r7 */ \ + "lwz 8, 24(11)\n\t" /* arg6->r8 */ \ + "lwz 9, 28(11)\n\t" /* arg7->r9 */ \ + "lwz 10, 32(11)\n\t" /* arg8->r10 */ \ + "lwz 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "lwz 2,-8(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(64) \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+10]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + _argvec[2+7] = (unsigned long)arg7; \ + _argvec[2+8] = (unsigned long)arg8; \ + _argvec[2+9] = (unsigned long)arg9; \ + _argvec[2+10] = (unsigned long)arg10; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "stw 2,-8(11)\n\t" /* save tocptr */ \ + "lwz 2,-4(11)\n\t" /* use nraddr's tocptr */ \ + VG_EXPAND_FRAME_BY_trashes_r3(64) \ + /* arg10 */ \ + "lwz 3,40(11)\n\t" \ + "stw 3,60(1)\n\t" \ + /* arg9 */ \ + "lwz 3,36(11)\n\t" \ + "stw 3,56(1)\n\t" \ + /* args1-8 */ \ + "lwz 3, 4(11)\n\t" /* arg1->r3 */ \ + "lwz 4, 8(11)\n\t" /* arg2->r4 */ \ + "lwz 5, 12(11)\n\t" /* arg3->r5 */ \ + "lwz 6, 16(11)\n\t" /* arg4->r6 */ \ + "lwz 7, 20(11)\n\t" /* arg5->r7 */ \ + "lwz 8, 24(11)\n\t" /* arg6->r8 */ \ + "lwz 9, 28(11)\n\t" /* arg7->r9 */ \ + "lwz 10, 32(11)\n\t" /* arg8->r10 */ \ + "lwz 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "lwz 2,-8(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(64) \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10,arg11) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+11]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + _argvec[2+7] = (unsigned long)arg7; \ + _argvec[2+8] = (unsigned long)arg8; \ + _argvec[2+9] = (unsigned long)arg9; \ + _argvec[2+10] = (unsigned long)arg10; \ + _argvec[2+11] = (unsigned long)arg11; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "stw 2,-8(11)\n\t" /* save tocptr */ \ + "lwz 2,-4(11)\n\t" /* use nraddr's tocptr */ \ + VG_EXPAND_FRAME_BY_trashes_r3(72) \ + /* arg11 */ \ + "lwz 3,44(11)\n\t" \ + "stw 3,64(1)\n\t" \ + /* arg10 */ \ + "lwz 3,40(11)\n\t" \ + "stw 3,60(1)\n\t" \ + /* arg9 */ \ + "lwz 3,36(11)\n\t" \ + "stw 3,56(1)\n\t" \ + /* args1-8 */ \ + "lwz 3, 4(11)\n\t" /* arg1->r3 */ \ + "lwz 4, 8(11)\n\t" /* arg2->r4 */ \ + "lwz 5, 12(11)\n\t" /* arg3->r5 */ \ + "lwz 6, 16(11)\n\t" /* arg4->r6 */ \ + "lwz 7, 20(11)\n\t" /* arg5->r7 */ \ + "lwz 8, 24(11)\n\t" /* arg6->r8 */ \ + "lwz 9, 28(11)\n\t" /* arg7->r9 */ \ + "lwz 10, 32(11)\n\t" /* arg8->r10 */ \ + "lwz 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "lwz 2,-8(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(72) \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10,arg11,arg12) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+12]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + _argvec[2+7] = (unsigned long)arg7; \ + _argvec[2+8] = (unsigned long)arg8; \ + _argvec[2+9] = (unsigned long)arg9; \ + _argvec[2+10] = (unsigned long)arg10; \ + _argvec[2+11] = (unsigned long)arg11; \ + _argvec[2+12] = (unsigned long)arg12; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "stw 2,-8(11)\n\t" /* save tocptr */ \ + "lwz 2,-4(11)\n\t" /* use nraddr's tocptr */ \ + VG_EXPAND_FRAME_BY_trashes_r3(72) \ + /* arg12 */ \ + "lwz 3,48(11)\n\t" \ + "stw 3,68(1)\n\t" \ + /* arg11 */ \ + "lwz 3,44(11)\n\t" \ + "stw 3,64(1)\n\t" \ + /* arg10 */ \ + "lwz 3,40(11)\n\t" \ + "stw 3,60(1)\n\t" \ + /* arg9 */ \ + "lwz 3,36(11)\n\t" \ + "stw 3,56(1)\n\t" \ + /* args1-8 */ \ + "lwz 3, 4(11)\n\t" /* arg1->r3 */ \ + "lwz 4, 8(11)\n\t" /* arg2->r4 */ \ + "lwz 5, 12(11)\n\t" /* arg3->r5 */ \ + "lwz 6, 16(11)\n\t" /* arg4->r6 */ \ + "lwz 7, 20(11)\n\t" /* arg5->r7 */ \ + "lwz 8, 24(11)\n\t" /* arg6->r8 */ \ + "lwz 9, 28(11)\n\t" /* arg7->r9 */ \ + "lwz 10, 32(11)\n\t" /* arg8->r10 */ \ + "lwz 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "lwz 2,-8(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(72) \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#endif /* PLAT_ppc32_aix5 */ + +/* ------------------------ ppc64-aix5 ------------------------- */ + +#if defined(PLAT_ppc64_aix5) + +/* ARGREGS: r3 r4 r5 r6 r7 r8 r9 r10 (the rest on stack somewhere) */ + +/* These regs are trashed by the hidden call. */ +#define __CALLER_SAVED_REGS \ + "lr", "ctr", "xer", \ + "cr0", "cr1", "cr2", "cr3", "cr4", "cr5", "cr6", "cr7", \ + "r0", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", \ + "r11", "r12", "r13" + +/* Expand the stack frame, copying enough info that unwinding + still works. Trashes r3. */ + +#define VG_EXPAND_FRAME_BY_trashes_r3(_n_fr) \ + "addi 1,1,-" #_n_fr "\n\t" \ + "ld 3," #_n_fr "(1)\n\t" \ + "std 3,0(1)\n\t" + +#define VG_CONTRACT_FRAME_BY(_n_fr) \ + "addi 1,1," #_n_fr "\n\t" + +/* These CALL_FN_ macros assume that on ppc64-aix5, sizeof(unsigned + long) == 8. */ + +#define CALL_FN_W_v(lval, orig) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+0]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_W(lval, orig, arg1) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+1]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WW(lval, orig, arg1,arg2) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+2]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+3]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+4]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+5]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 7, 40(11)\n\t" /* arg5->r7 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+6]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 7, 40(11)\n\t" /* arg5->r7 */ \ + "ld 8, 48(11)\n\t" /* arg6->r8 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+7]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + _argvec[2+7] = (unsigned long)arg7; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 7, 40(11)\n\t" /* arg5->r7 */ \ + "ld 8, 48(11)\n\t" /* arg6->r8 */ \ + "ld 9, 56(11)\n\t" /* arg7->r9 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+8]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + _argvec[2+7] = (unsigned long)arg7; \ + _argvec[2+8] = (unsigned long)arg8; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 7, 40(11)\n\t" /* arg5->r7 */ \ + "ld 8, 48(11)\n\t" /* arg6->r8 */ \ + "ld 9, 56(11)\n\t" /* arg7->r9 */ \ + "ld 10, 64(11)\n\t" /* arg8->r10 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+9]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + _argvec[2+7] = (unsigned long)arg7; \ + _argvec[2+8] = (unsigned long)arg8; \ + _argvec[2+9] = (unsigned long)arg9; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + VG_EXPAND_FRAME_BY_trashes_r3(128) \ + /* arg9 */ \ + "ld 3,72(11)\n\t" \ + "std 3,112(1)\n\t" \ + /* args1-8 */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 7, 40(11)\n\t" /* arg5->r7 */ \ + "ld 8, 48(11)\n\t" /* arg6->r8 */ \ + "ld 9, 56(11)\n\t" /* arg7->r9 */ \ + "ld 10, 64(11)\n\t" /* arg8->r10 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(128) \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+10]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + _argvec[2+7] = (unsigned long)arg7; \ + _argvec[2+8] = (unsigned long)arg8; \ + _argvec[2+9] = (unsigned long)arg9; \ + _argvec[2+10] = (unsigned long)arg10; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + VG_EXPAND_FRAME_BY_trashes_r3(128) \ + /* arg10 */ \ + "ld 3,80(11)\n\t" \ + "std 3,120(1)\n\t" \ + /* arg9 */ \ + "ld 3,72(11)\n\t" \ + "std 3,112(1)\n\t" \ + /* args1-8 */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 7, 40(11)\n\t" /* arg5->r7 */ \ + "ld 8, 48(11)\n\t" /* arg6->r8 */ \ + "ld 9, 56(11)\n\t" /* arg7->r9 */ \ + "ld 10, 64(11)\n\t" /* arg8->r10 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(128) \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10,arg11) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+11]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + _argvec[2+7] = (unsigned long)arg7; \ + _argvec[2+8] = (unsigned long)arg8; \ + _argvec[2+9] = (unsigned long)arg9; \ + _argvec[2+10] = (unsigned long)arg10; \ + _argvec[2+11] = (unsigned long)arg11; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + VG_EXPAND_FRAME_BY_trashes_r3(144) \ + /* arg11 */ \ + "ld 3,88(11)\n\t" \ + "std 3,128(1)\n\t" \ + /* arg10 */ \ + "ld 3,80(11)\n\t" \ + "std 3,120(1)\n\t" \ + /* arg9 */ \ + "ld 3,72(11)\n\t" \ + "std 3,112(1)\n\t" \ + /* args1-8 */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 7, 40(11)\n\t" /* arg5->r7 */ \ + "ld 8, 48(11)\n\t" /* arg6->r8 */ \ + "ld 9, 56(11)\n\t" /* arg7->r9 */ \ + "ld 10, 64(11)\n\t" /* arg8->r10 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(144) \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10,arg11,arg12) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+12]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + _argvec[2+7] = (unsigned long)arg7; \ + _argvec[2+8] = (unsigned long)arg8; \ + _argvec[2+9] = (unsigned long)arg9; \ + _argvec[2+10] = (unsigned long)arg10; \ + _argvec[2+11] = (unsigned long)arg11; \ + _argvec[2+12] = (unsigned long)arg12; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + VG_EXPAND_FRAME_BY_trashes_r3(144) \ + /* arg12 */ \ + "ld 3,96(11)\n\t" \ + "std 3,136(1)\n\t" \ + /* arg11 */ \ + "ld 3,88(11)\n\t" \ + "std 3,128(1)\n\t" \ + /* arg10 */ \ + "ld 3,80(11)\n\t" \ + "std 3,120(1)\n\t" \ + /* arg9 */ \ + "ld 3,72(11)\n\t" \ + "std 3,112(1)\n\t" \ + /* args1-8 */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 7, 40(11)\n\t" /* arg5->r7 */ \ + "ld 8, 48(11)\n\t" /* arg6->r8 */ \ + "ld 9, 56(11)\n\t" /* arg7->r9 */ \ + "ld 10, 64(11)\n\t" /* arg8->r10 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(144) \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#endif /* PLAT_ppc64_aix5 */ + + +/* ------------------------------------------------------------------ */ +/* ARCHITECTURE INDEPENDENT MACROS for CLIENT REQUESTS. */ +/* */ +/* ------------------------------------------------------------------ */ + +/* Some request codes. There are many more of these, but most are not + exposed to end-user view. These are the public ones, all of the + form 0x1000 + small_number. + + Core ones are in the range 0x00000000--0x0000ffff. The non-public + ones start at 0x2000. +*/ + +/* These macros are used by tools -- they must be public, but don't + embed them into other programs. */ +#define VG_USERREQ_TOOL_BASE(a,b) \ + ((unsigned int)(((a)&0xff) << 24 | ((b)&0xff) << 16)) +#define VG_IS_TOOL_USERREQ(a, b, v) \ + (VG_USERREQ_TOOL_BASE(a,b) == ((v) & 0xffff0000)) + +/* !! ABIWARNING !! ABIWARNING !! ABIWARNING !! ABIWARNING !! + This enum comprises an ABI exported by Valgrind to programs + which use client requests. DO NOT CHANGE THE ORDER OF THESE + ENTRIES, NOR DELETE ANY -- add new ones at the end. */ +typedef + enum { VG_USERREQ__RUNNING_ON_VALGRIND = 0x1001, + VG_USERREQ__DISCARD_TRANSLATIONS = 0x1002, + + /* These allow any function to be called from the simulated + CPU but run on the real CPU. Nb: the first arg passed to + the function is always the ThreadId of the running + thread! So CLIENT_CALL0 actually requires a 1 arg + function, etc. */ + VG_USERREQ__CLIENT_CALL0 = 0x1101, + VG_USERREQ__CLIENT_CALL1 = 0x1102, + VG_USERREQ__CLIENT_CALL2 = 0x1103, + VG_USERREQ__CLIENT_CALL3 = 0x1104, + + /* Can be useful in regression testing suites -- eg. can + send Valgrind's output to /dev/null and still count + errors. */ + VG_USERREQ__COUNT_ERRORS = 0x1201, + + /* These are useful and can be interpreted by any tool that + tracks malloc() et al, by using vg_replace_malloc.c. */ + VG_USERREQ__MALLOCLIKE_BLOCK = 0x1301, + VG_USERREQ__FREELIKE_BLOCK = 0x1302, + /* Memory pool support. */ + VG_USERREQ__CREATE_MEMPOOL = 0x1303, + VG_USERREQ__DESTROY_MEMPOOL = 0x1304, + VG_USERREQ__MEMPOOL_ALLOC = 0x1305, + VG_USERREQ__MEMPOOL_FREE = 0x1306, + VG_USERREQ__MEMPOOL_TRIM = 0x1307, + VG_USERREQ__MOVE_MEMPOOL = 0x1308, + VG_USERREQ__MEMPOOL_CHANGE = 0x1309, + VG_USERREQ__MEMPOOL_EXISTS = 0x130a, + + /* Allow printfs to valgrind log. */ + VG_USERREQ__PRINTF = 0x1401, + VG_USERREQ__PRINTF_BACKTRACE = 0x1402, + + /* Stack support. */ + VG_USERREQ__STACK_REGISTER = 0x1501, + VG_USERREQ__STACK_DEREGISTER = 0x1502, + VG_USERREQ__STACK_CHANGE = 0x1503 + } Vg_ClientRequest; + +#if !defined(__GNUC__) +# define __extension__ /* */ +#endif + +/* Returns the number of Valgrinds this code is running under. That + is, 0 if running natively, 1 if running under Valgrind, 2 if + running under Valgrind which is running under another Valgrind, + etc. */ +#define RUNNING_ON_VALGRIND __extension__ \ + ({unsigned int _qzz_res; \ + VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0 /* if not */, \ + VG_USERREQ__RUNNING_ON_VALGRIND, \ + 0, 0, 0, 0, 0); \ + _qzz_res; \ + }) + + +/* Discard translation of code in the range [_qzz_addr .. _qzz_addr + + _qzz_len - 1]. Useful if you are debugging a JITter or some such, + since it provides a way to make sure valgrind will retranslate the + invalidated area. Returns no value. */ +#define VALGRIND_DISCARD_TRANSLATIONS(_qzz_addr,_qzz_len) \ + {unsigned int _qzz_res; \ + VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \ + VG_USERREQ__DISCARD_TRANSLATIONS, \ + _qzz_addr, _qzz_len, 0, 0, 0); \ + } + + +/* These requests are for getting Valgrind itself to print something. + Possibly with a backtrace. This is a really ugly hack. */ + +#if defined(NVALGRIND) + +# define VALGRIND_PRINTF(...) +# define VALGRIND_PRINTF_BACKTRACE(...) + +#else /* NVALGRIND */ + +/* Modern GCC will optimize the static routine out if unused, + and unused attribute will shut down warnings about it. */ +static int VALGRIND_PRINTF(const char *format, ...) + __attribute__((format(__printf__, 1, 2), __unused__)); +static int +VALGRIND_PRINTF(const char *format, ...) +{ + unsigned long _qzz_res; + va_list vargs; + va_start(vargs, format); + VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, VG_USERREQ__PRINTF, + (unsigned long)format, (unsigned long)vargs, + 0, 0, 0); + va_end(vargs); + return (int)_qzz_res; +} + +static int VALGRIND_PRINTF_BACKTRACE(const char *format, ...) + __attribute__((format(__printf__, 1, 2), __unused__)); +static int +VALGRIND_PRINTF_BACKTRACE(const char *format, ...) +{ + unsigned long _qzz_res; + va_list vargs; + va_start(vargs, format); + VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, VG_USERREQ__PRINTF_BACKTRACE, + (unsigned long)format, (unsigned long)vargs, + 0, 0, 0); + va_end(vargs); + return (int)_qzz_res; +} + +#endif /* NVALGRIND */ + + +/* These requests allow control to move from the simulated CPU to the + real CPU, calling an arbitary function. + + Note that the current ThreadId is inserted as the first argument. + So this call: + + VALGRIND_NON_SIMD_CALL2(f, arg1, arg2) + + requires f to have this signature: + + Word f(Word tid, Word arg1, Word arg2) + + where "Word" is a word-sized type. + + Note that these client requests are not entirely reliable. For example, + if you call a function with them that subsequently calls printf(), + there's a high chance Valgrind will crash. Generally, your prospects of + these working are made higher if the called function does not refer to + any global variables, and does not refer to any libc or other functions + (printf et al). Any kind of entanglement with libc or dynamic linking is + likely to have a bad outcome, for tricky reasons which we've grappled + with a lot in the past. +*/ +#define VALGRIND_NON_SIMD_CALL0(_qyy_fn) \ + __extension__ \ + ({unsigned long _qyy_res; \ + VALGRIND_DO_CLIENT_REQUEST(_qyy_res, 0 /* default return */, \ + VG_USERREQ__CLIENT_CALL0, \ + _qyy_fn, \ + 0, 0, 0, 0); \ + _qyy_res; \ + }) + +#define VALGRIND_NON_SIMD_CALL1(_qyy_fn, _qyy_arg1) \ + __extension__ \ + ({unsigned long _qyy_res; \ + VALGRIND_DO_CLIENT_REQUEST(_qyy_res, 0 /* default return */, \ + VG_USERREQ__CLIENT_CALL1, \ + _qyy_fn, \ + _qyy_arg1, 0, 0, 0); \ + _qyy_res; \ + }) + +#define VALGRIND_NON_SIMD_CALL2(_qyy_fn, _qyy_arg1, _qyy_arg2) \ + __extension__ \ + ({unsigned long _qyy_res; \ + VALGRIND_DO_CLIENT_REQUEST(_qyy_res, 0 /* default return */, \ + VG_USERREQ__CLIENT_CALL2, \ + _qyy_fn, \ + _qyy_arg1, _qyy_arg2, 0, 0); \ + _qyy_res; \ + }) + +#define VALGRIND_NON_SIMD_CALL3(_qyy_fn, _qyy_arg1, _qyy_arg2, _qyy_arg3) \ + __extension__ \ + ({unsigned long _qyy_res; \ + VALGRIND_DO_CLIENT_REQUEST(_qyy_res, 0 /* default return */, \ + VG_USERREQ__CLIENT_CALL3, \ + _qyy_fn, \ + _qyy_arg1, _qyy_arg2, \ + _qyy_arg3, 0); \ + _qyy_res; \ + }) + + +/* Counts the number of errors that have been recorded by a tool. Nb: + the tool must record the errors with VG_(maybe_record_error)() or + VG_(unique_error)() for them to be counted. */ +#define VALGRIND_COUNT_ERRORS \ + __extension__ \ + ({unsigned int _qyy_res; \ + VALGRIND_DO_CLIENT_REQUEST(_qyy_res, 0 /* default return */, \ + VG_USERREQ__COUNT_ERRORS, \ + 0, 0, 0, 0, 0); \ + _qyy_res; \ + }) + +/* Mark a block of memory as having been allocated by a malloc()-like + function. `addr' is the start of the usable block (ie. after any + redzone) `rzB' is redzone size if the allocator can apply redzones; + use '0' if not. Adding redzones makes it more likely Valgrind will spot + block overruns. `is_zeroed' indicates if the memory is zeroed, as it is + for calloc(). Put it immediately after the point where a block is + allocated. + + If you're using Memcheck: If you're allocating memory via superblocks, + and then handing out small chunks of each superblock, if you don't have + redzones on your small blocks, it's worth marking the superblock with + VALGRIND_MAKE_MEM_NOACCESS when it's created, so that block overruns are + detected. But if you can put redzones on, it's probably better to not do + this, so that messages for small overruns are described in terms of the + small block rather than the superblock (but if you have a big overrun + that skips over a redzone, you could miss an error this way). See + memcheck/tests/custom_alloc.c for an example. + + WARNING: if your allocator uses malloc() or 'new' to allocate + superblocks, rather than mmap() or brk(), this will not work properly -- + you'll likely get assertion failures during leak detection. This is + because Valgrind doesn't like seeing overlapping heap blocks. Sorry. + + Nb: block must be freed via a free()-like function specified + with VALGRIND_FREELIKE_BLOCK or mismatch errors will occur. */ +#define VALGRIND_MALLOCLIKE_BLOCK(addr, sizeB, rzB, is_zeroed) \ + {unsigned int _qzz_res; \ + VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \ + VG_USERREQ__MALLOCLIKE_BLOCK, \ + addr, sizeB, rzB, is_zeroed, 0); \ + } + +/* Mark a block of memory as having been freed by a free()-like function. + `rzB' is redzone size; it must match that given to + VALGRIND_MALLOCLIKE_BLOCK. Memory not freed will be detected by the leak + checker. Put it immediately after the point where the block is freed. */ +#define VALGRIND_FREELIKE_BLOCK(addr, rzB) \ + {unsigned int _qzz_res; \ + VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \ + VG_USERREQ__FREELIKE_BLOCK, \ + addr, rzB, 0, 0, 0); \ + } + +/* Create a memory pool. */ +#define VALGRIND_CREATE_MEMPOOL(pool, rzB, is_zeroed) \ + {unsigned int _qzz_res; \ + VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \ + VG_USERREQ__CREATE_MEMPOOL, \ + pool, rzB, is_zeroed, 0, 0); \ + } + +/* Destroy a memory pool. */ +#define VALGRIND_DESTROY_MEMPOOL(pool) \ + {unsigned int _qzz_res; \ + VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \ + VG_USERREQ__DESTROY_MEMPOOL, \ + pool, 0, 0, 0, 0); \ + } + +/* Associate a piece of memory with a memory pool. */ +#define VALGRIND_MEMPOOL_ALLOC(pool, addr, size) \ + {unsigned int _qzz_res; \ + VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \ + VG_USERREQ__MEMPOOL_ALLOC, \ + pool, addr, size, 0, 0); \ + } + +/* Disassociate a piece of memory from a memory pool. */ +#define VALGRIND_MEMPOOL_FREE(pool, addr) \ + {unsigned int _qzz_res; \ + VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \ + VG_USERREQ__MEMPOOL_FREE, \ + pool, addr, 0, 0, 0); \ + } + +/* Disassociate any pieces outside a particular range. */ +#define VALGRIND_MEMPOOL_TRIM(pool, addr, size) \ + {unsigned int _qzz_res; \ + VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \ + VG_USERREQ__MEMPOOL_TRIM, \ + pool, addr, size, 0, 0); \ + } + +/* Resize and/or move a piece associated with a memory pool. */ +#define VALGRIND_MOVE_MEMPOOL(poolA, poolB) \ + {unsigned int _qzz_res; \ + VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \ + VG_USERREQ__MOVE_MEMPOOL, \ + poolA, poolB, 0, 0, 0); \ + } + +/* Resize and/or move a piece associated with a memory pool. */ +#define VALGRIND_MEMPOOL_CHANGE(pool, addrA, addrB, size) \ + {unsigned int _qzz_res; \ + VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \ + VG_USERREQ__MEMPOOL_CHANGE, \ + pool, addrA, addrB, size, 0); \ + } + +/* Return 1 if a mempool exists, else 0. */ +#define VALGRIND_MEMPOOL_EXISTS(pool) \ + ({unsigned int _qzz_res; \ + VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \ + VG_USERREQ__MEMPOOL_EXISTS, \ + pool, 0, 0, 0, 0); \ + _qzz_res; \ + }) + +/* Mark a piece of memory as being a stack. Returns a stack id. */ +#define VALGRIND_STACK_REGISTER(start, end) \ + ({unsigned int _qzz_res; \ + VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \ + VG_USERREQ__STACK_REGISTER, \ + start, end, 0, 0, 0); \ + _qzz_res; \ + }) + +/* Unmark the piece of memory associated with a stack id as being a + stack. */ +#define VALGRIND_STACK_DEREGISTER(id) \ + {unsigned int _qzz_res; \ + VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \ + VG_USERREQ__STACK_DEREGISTER, \ + id, 0, 0, 0, 0); \ + } + +/* Change the start and end address of the stack id. */ +#define VALGRIND_STACK_CHANGE(id, start, end) \ + {unsigned int _qzz_res; \ + VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \ + VG_USERREQ__STACK_CHANGE, \ + id, start, end, 0, 0); \ + } + + +#undef PLAT_x86_linux +#undef PLAT_amd64_linux +#undef PLAT_ppc32_linux +#undef PLAT_ppc64_linux +#undef PLAT_ppc32_aix5 +#undef PLAT_ppc64_aix5 + +#endif /* __VALGRIND_H */ diff --git a/src/third_party/gperftools-2.5/src/thread_cache.cc b/src/third_party/gperftools-2.5/src/thread_cache.cc new file mode 100644 index 00000000000..ef1f435e0c6 --- /dev/null +++ b/src/third_party/gperftools-2.5/src/thread_cache.cc @@ -0,0 +1,479 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Ken Ashcraft <opensource@google.com> + +#include <config.h> +#include "thread_cache.h" +#include <errno.h> +#include <string.h> // for memcpy +#include <algorithm> // for max, min +#include "base/commandlineflags.h" // for SpinLockHolder +#include "base/spinlock.h" // for SpinLockHolder +#include "getenv_safe.h" // for TCMallocGetenvSafe +#include "central_freelist.h" // for CentralFreeListPadded +#include "maybe_threads.h" + +using std::min; +using std::max; + +// Note: this is initialized manually in InitModule to ensure that +// it's configured at right time +// +// DEFINE_int64(tcmalloc_max_total_thread_cache_bytes, +// EnvToInt64("TCMALLOC_MAX_TOTAL_THREAD_CACHE_BYTES", +// kDefaultOverallThreadCacheSize), +// "Bound on the total amount of bytes allocated to " +// "thread caches. This bound is not strict, so it is possible " +// "for the cache to go over this bound in certain circumstances. " +// "Maximum value of this flag is capped to 1 GB."); + + +namespace tcmalloc { + +static bool phinited = false; + +volatile size_t ThreadCache::per_thread_cache_size_ = kMaxThreadCacheSize; +size_t ThreadCache::overall_thread_cache_size_ = kDefaultOverallThreadCacheSize; +ssize_t ThreadCache::unclaimed_cache_space_ = kDefaultOverallThreadCacheSize; +PageHeapAllocator<ThreadCache> threadcache_allocator; +ThreadCache* ThreadCache::thread_heaps_ = NULL; +int ThreadCache::thread_heap_count_ = 0; +ThreadCache* ThreadCache::next_memory_steal_ = NULL; +#ifdef HAVE_TLS +__thread ThreadCache::ThreadLocalData ThreadCache::threadlocal_data_ + ATTR_INITIAL_EXEC + = {0, 0}; +#endif +bool ThreadCache::tsd_inited_ = false; +pthread_key_t ThreadCache::heap_key_; + +void ThreadCache::Init(pthread_t tid) { + size_ = 0; + + max_size_ = 0; + IncreaseCacheLimitLocked(); + if (max_size_ == 0) { + // There isn't enough memory to go around. Just give the minimum to + // this thread. + max_size_ = kMinThreadCacheSize; + + // Take unclaimed_cache_space_ negative. + unclaimed_cache_space_ -= kMinThreadCacheSize; + ASSERT(unclaimed_cache_space_ < 0); + } + + next_ = NULL; + prev_ = NULL; + tid_ = tid; + in_setspecific_ = false; + for (size_t cl = 0; cl < kNumClasses; ++cl) { + list_[cl].Init(); + } + + uint32_t sampler_seed; + memcpy(&sampler_seed, &tid, sizeof(sampler_seed)); + sampler_.Init(sampler_seed); +} + +void ThreadCache::Cleanup() { + // Put unused memory back into central cache + for (int cl = 0; cl < kNumClasses; ++cl) { + if (list_[cl].length() > 0) { + ReleaseToCentralCache(&list_[cl], cl, list_[cl].length()); + } + } +} + +// Remove some objects of class "cl" from central cache and add to thread heap. +// On success, return the first object for immediate use; otherwise return NULL. +void* ThreadCache::FetchFromCentralCache(size_t cl, size_t byte_size) { + FreeList* list = &list_[cl]; + ASSERT(list->empty()); + const int batch_size = Static::sizemap()->num_objects_to_move(cl); + + const int num_to_move = min<int>(list->max_length(), batch_size); + void *start, *end; + int fetch_count = Static::central_cache()[cl].RemoveRange( + &start, &end, num_to_move); + + ASSERT((start == NULL) == (fetch_count == 0)); + if (--fetch_count >= 0) { + size_ += byte_size * fetch_count; + list->PushRange(fetch_count, SLL_Next(start), end); + } + + // Increase max length slowly up to batch_size. After that, + // increase by batch_size in one shot so that the length is a + // multiple of batch_size. + if (list->max_length() < batch_size) { + list->set_max_length(list->max_length() + 1); + } else { + // Don't let the list get too long. In 32 bit builds, the length + // is represented by a 16 bit int, so we need to watch out for + // integer overflow. + int new_length = min<int>(list->max_length() + batch_size, + kMaxDynamicFreeListLength); + // The list's max_length must always be a multiple of batch_size, + // and kMaxDynamicFreeListLength is not necessarily a multiple + // of batch_size. + new_length -= new_length % batch_size; + ASSERT(new_length % batch_size == 0); + list->set_max_length(new_length); + } + return start; +} + +void ThreadCache::ListTooLong(FreeList* list, size_t cl) { + const int batch_size = Static::sizemap()->num_objects_to_move(cl); + ReleaseToCentralCache(list, cl, batch_size); + + // If the list is too long, we need to transfer some number of + // objects to the central cache. Ideally, we would transfer + // num_objects_to_move, so the code below tries to make max_length + // converge on num_objects_to_move. + + if (list->max_length() < batch_size) { + // Slow start the max_length so we don't overreserve. + list->set_max_length(list->max_length() + 1); + } else if (list->max_length() > batch_size) { + // If we consistently go over max_length, shrink max_length. If we don't + // shrink it, some amount of memory will always stay in this freelist. + list->set_length_overages(list->length_overages() + 1); + if (list->length_overages() > kMaxOverages) { + ASSERT(list->max_length() > batch_size); + list->set_max_length(list->max_length() - batch_size); + list->set_length_overages(0); + } + } +} + +// Remove some objects of class "cl" from thread heap and add to central cache +void ThreadCache::ReleaseToCentralCache(FreeList* src, size_t cl, int N) { + ASSERT(src == &list_[cl]); + if (N > src->length()) N = src->length(); + size_t delta_bytes = N * Static::sizemap()->ByteSizeForClass(cl); + + // We return prepackaged chains of the correct size to the central cache. + // TODO: Use the same format internally in the thread caches? + int batch_size = Static::sizemap()->num_objects_to_move(cl); + while (N > batch_size) { + void *tail, *head; + src->PopRange(batch_size, &head, &tail); + Static::central_cache()[cl].InsertRange(head, tail, batch_size); + N -= batch_size; + } + void *tail, *head; + src->PopRange(N, &head, &tail); + Static::central_cache()[cl].InsertRange(head, tail, N); + size_ -= delta_bytes; +} + +// Release idle memory to the central cache +void ThreadCache::Scavenge() { + // If the low-water mark for the free list is L, it means we would + // not have had to allocate anything from the central cache even if + // we had reduced the free list size by L. We aim to get closer to + // that situation by dropping L/2 nodes from the free list. This + // may not release much memory, but if so we will call scavenge again + // pretty soon and the low-water marks will be high on that call. + for (int cl = 0; cl < kNumClasses; cl++) { + FreeList* list = &list_[cl]; + const int lowmark = list->lowwatermark(); + if (lowmark > 0) { + const int drop = (lowmark > 1) ? lowmark/2 : 1; + ReleaseToCentralCache(list, cl, drop); + + // Shrink the max length if it isn't used. Only shrink down to + // batch_size -- if the thread was active enough to get the max_length + // above batch_size, it will likely be that active again. If + // max_length shinks below batch_size, the thread will have to + // go through the slow-start behavior again. The slow-start is useful + // mainly for threads that stay relatively idle for their entire + // lifetime. + const int batch_size = Static::sizemap()->num_objects_to_move(cl); + if (list->max_length() > batch_size) { + list->set_max_length( + max<int>(list->max_length() - batch_size, batch_size)); + } + } + list->clear_lowwatermark(); + } + + IncreaseCacheLimit(); +} + +void ThreadCache::IncreaseCacheLimit() { + SpinLockHolder h(Static::pageheap_lock()); + IncreaseCacheLimitLocked(); +} + +void ThreadCache::IncreaseCacheLimitLocked() { + if (unclaimed_cache_space_ > 0) { + // Possibly make unclaimed_cache_space_ negative. + unclaimed_cache_space_ -= kStealAmount; + max_size_ += kStealAmount; + return; + } + // Don't hold pageheap_lock too long. Try to steal from 10 other + // threads before giving up. The i < 10 condition also prevents an + // infinite loop in case none of the existing thread heaps are + // suitable places to steal from. + for (int i = 0; i < 10; + ++i, next_memory_steal_ = next_memory_steal_->next_) { + // Reached the end of the linked list. Start at the beginning. + if (next_memory_steal_ == NULL) { + ASSERT(thread_heaps_ != NULL); + next_memory_steal_ = thread_heaps_; + } + if (next_memory_steal_ == this || + next_memory_steal_->max_size_ <= kMinThreadCacheSize) { + continue; + } + next_memory_steal_->max_size_ -= kStealAmount; + max_size_ += kStealAmount; + + next_memory_steal_ = next_memory_steal_->next_; + return; + } +} + +int ThreadCache::GetSamplePeriod() { + return sampler_.GetSamplePeriod(); +} + +void ThreadCache::InitModule() { + SpinLockHolder h(Static::pageheap_lock()); + if (!phinited) { + const char *tcb = TCMallocGetenvSafe("TCMALLOC_MAX_TOTAL_THREAD_CACHE_BYTES"); + if (tcb) { + set_overall_thread_cache_size(strtoll(tcb, NULL, 10)); + } + Static::InitStaticVars(); + threadcache_allocator.Init(); + phinited = 1; + } +} + +void ThreadCache::InitTSD() { + ASSERT(!tsd_inited_); + perftools_pthread_key_create(&heap_key_, DestroyThreadCache); + tsd_inited_ = true; + +#ifdef PTHREADS_CRASHES_IF_RUN_TOO_EARLY + // We may have used a fake pthread_t for the main thread. Fix it. + pthread_t zero; + memset(&zero, 0, sizeof(zero)); + SpinLockHolder h(Static::pageheap_lock()); + for (ThreadCache* h = thread_heaps_; h != NULL; h = h->next_) { + if (h->tid_ == zero) { + h->tid_ = pthread_self(); + } + } +#endif +} + +ThreadCache* ThreadCache::CreateCacheIfNecessary() { + // Initialize per-thread data if necessary + ThreadCache* heap = NULL; + { + SpinLockHolder h(Static::pageheap_lock()); + // On some old glibc's, and on freebsd's libc (as of freebsd 8.1), + // calling pthread routines (even pthread_self) too early could + // cause a segfault. Since we can call pthreads quite early, we + // have to protect against that in such situations by making a + // 'fake' pthread. This is not ideal since it doesn't work well + // when linking tcmalloc statically with apps that create threads + // before main, so we only do it if we have to. +#ifdef PTHREADS_CRASHES_IF_RUN_TOO_EARLY + pthread_t me; + if (!tsd_inited_) { + memset(&me, 0, sizeof(me)); + } else { + me = pthread_self(); + } +#else + const pthread_t me = pthread_self(); +#endif + + // This may be a recursive malloc call from pthread_setspecific() + // In that case, the heap for this thread has already been created + // and added to the linked list. So we search for that first. + for (ThreadCache* h = thread_heaps_; h != NULL; h = h->next_) { + if (h->tid_ == me) { + heap = h; + break; + } + } + + if (heap == NULL) heap = NewHeap(me); + } + + // We call pthread_setspecific() outside the lock because it may + // call malloc() recursively. We check for the recursive call using + // the "in_setspecific_" flag so that we can avoid calling + // pthread_setspecific() if we are already inside pthread_setspecific(). + if (!heap->in_setspecific_ && tsd_inited_) { + heap->in_setspecific_ = true; + perftools_pthread_setspecific(heap_key_, heap); +#ifdef HAVE_TLS + // Also keep a copy in __thread for faster retrieval + threadlocal_data_.heap = heap; + SetMinSizeForSlowPath(kMaxSize + 1); +#endif + heap->in_setspecific_ = false; + } + return heap; +} + +ThreadCache* ThreadCache::NewHeap(pthread_t tid) { + // Create the heap and add it to the linked list + ThreadCache *heap = threadcache_allocator.New(); + heap->Init(tid); + heap->next_ = thread_heaps_; + heap->prev_ = NULL; + if (thread_heaps_ != NULL) { + thread_heaps_->prev_ = heap; + } else { + // This is the only thread heap at the momment. + ASSERT(next_memory_steal_ == NULL); + next_memory_steal_ = heap; + } + thread_heaps_ = heap; + thread_heap_count_++; + return heap; +} + +void ThreadCache::BecomeIdle() { + if (!tsd_inited_) return; // No caches yet + ThreadCache* heap = GetThreadHeap(); + if (heap == NULL) return; // No thread cache to remove + if (heap->in_setspecific_) return; // Do not disturb the active caller + + heap->in_setspecific_ = true; + perftools_pthread_setspecific(heap_key_, NULL); +#ifdef HAVE_TLS + // Also update the copy in __thread + threadlocal_data_.heap = NULL; + SetMinSizeForSlowPath(0); +#endif + heap->in_setspecific_ = false; + if (GetThreadHeap() == heap) { + // Somehow heap got reinstated by a recursive call to malloc + // from pthread_setspecific. We give up in this case. + return; + } + + // We can now get rid of the heap + DeleteCache(heap); +} + +void ThreadCache::BecomeTemporarilyIdle() { + ThreadCache* heap = GetCacheIfPresent(); + if (heap) + heap->Cleanup(); +} + +void ThreadCache::DestroyThreadCache(void* ptr) { + // Note that "ptr" cannot be NULL since pthread promises not + // to invoke the destructor on NULL values, but for safety, + // we check anyway. + if (ptr == NULL) return; +#ifdef HAVE_TLS + // Prevent fast path of GetThreadHeap() from returning heap. + threadlocal_data_.heap = NULL; + SetMinSizeForSlowPath(0); +#endif + DeleteCache(reinterpret_cast<ThreadCache*>(ptr)); +} + +void ThreadCache::DeleteCache(ThreadCache* heap) { + // Remove all memory from heap + heap->Cleanup(); + + // Remove from linked list + SpinLockHolder h(Static::pageheap_lock()); + if (heap->next_ != NULL) heap->next_->prev_ = heap->prev_; + if (heap->prev_ != NULL) heap->prev_->next_ = heap->next_; + if (thread_heaps_ == heap) thread_heaps_ = heap->next_; + thread_heap_count_--; + + if (next_memory_steal_ == heap) next_memory_steal_ = heap->next_; + if (next_memory_steal_ == NULL) next_memory_steal_ = thread_heaps_; + unclaimed_cache_space_ += heap->max_size_; + + threadcache_allocator.Delete(heap); +} + +void ThreadCache::RecomputePerThreadCacheSize() { + // Divide available space across threads + int n = thread_heap_count_ > 0 ? thread_heap_count_ : 1; + size_t space = overall_thread_cache_size_ / n; + + // Limit to allowed range + if (space < kMinThreadCacheSize) space = kMinThreadCacheSize; + if (space > kMaxThreadCacheSize) space = kMaxThreadCacheSize; + + double ratio = space / max<double>(1, per_thread_cache_size_); + size_t claimed = 0; + for (ThreadCache* h = thread_heaps_; h != NULL; h = h->next_) { + // Increasing the total cache size should not circumvent the + // slow-start growth of max_size_. + if (ratio < 1.0) { + h->max_size_ = static_cast<size_t>(h->max_size_ * ratio); + } + claimed += h->max_size_; + } + unclaimed_cache_space_ = overall_thread_cache_size_ - claimed; + per_thread_cache_size_ = space; +} + +void ThreadCache::GetThreadStats(uint64_t* total_bytes, uint64_t* class_count) { + for (ThreadCache* h = thread_heaps_; h != NULL; h = h->next_) { + *total_bytes += h->Size(); + if (class_count) { + for (int cl = 0; cl < kNumClasses; ++cl) { + class_count[cl] += h->freelist_length(cl); + } + } + } +} + +void ThreadCache::set_overall_thread_cache_size(size_t new_size) { + // Clip the value to a reasonable range + if (new_size < kMinThreadCacheSize) new_size = kMinThreadCacheSize; + if (new_size > (1<<30)) new_size = (1<<30); // Limit to 1GB + overall_thread_cache_size_ = new_size; + + RecomputePerThreadCacheSize(); +} + +} // namespace tcmalloc diff --git a/src/third_party/gperftools-2.5/src/thread_cache.h b/src/third_party/gperftools-2.5/src/thread_cache.h new file mode 100644 index 00000000000..445a0b57edd --- /dev/null +++ b/src/third_party/gperftools-2.5/src/thread_cache.h @@ -0,0 +1,480 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Sanjay Ghemawat <opensource@google.com> + +#ifndef TCMALLOC_THREAD_CACHE_H_ +#define TCMALLOC_THREAD_CACHE_H_ + +#include <config.h> +#ifdef HAVE_PTHREAD +#include <pthread.h> // for pthread_t, pthread_key_t +#endif +#include <stddef.h> // for size_t, NULL +#ifdef HAVE_STDINT_H +#include <stdint.h> // for uint32_t, uint64_t +#endif +#include <sys/types.h> // for ssize_t +#include "base/commandlineflags.h" +#include "common.h" +#include "linked_list.h" +#include "maybe_threads.h" +#include "page_heap_allocator.h" +#include "sampler.h" +#include "static_vars.h" + +#include "common.h" // for SizeMap, kMaxSize, etc +#include "internal_logging.h" // for ASSERT, etc +#include "linked_list.h" // for SLL_Pop, SLL_PopRange, etc +#include "page_heap_allocator.h" // for PageHeapAllocator +#include "sampler.h" // for Sampler +#include "static_vars.h" // for Static + +DECLARE_int64(tcmalloc_sample_parameter); + +namespace tcmalloc { + +//------------------------------------------------------------------- +// Data kept per thread +//------------------------------------------------------------------- + +class ThreadCache { + public: +#ifdef HAVE_TLS + enum { have_tls = true }; +#else + enum { have_tls = false }; +#endif + + // All ThreadCache objects are kept in a linked list (for stats collection) + ThreadCache* next_; + ThreadCache* prev_; + + void Init(pthread_t tid); + void Cleanup(); + + // Accessors (mostly just for printing stats) + int freelist_length(size_t cl) const { return list_[cl].length(); } + + // Total byte size in cache + size_t Size() const { return size_; } + + // Allocate an object of the given size and class. The size given + // must be the same as the size of the class in the size map. + void* Allocate(size_t size, size_t cl); + void Deallocate(void* ptr, size_t size_class); + + void Scavenge(); + + int GetSamplePeriod(); + + // Record allocation of "k" bytes. Return true iff allocation + // should be sampled + bool SampleAllocation(size_t k); + + static void InitModule(); + static void InitTSD(); + static ThreadCache* GetThreadHeap(); + static ThreadCache* GetCache(); + static ThreadCache* GetCacheIfPresent(); + static ThreadCache* GetCacheWhichMustBePresent(); + static ThreadCache* CreateCacheIfNecessary(); + static void BecomeIdle(); + static void BecomeTemporarilyIdle(); + static size_t MinSizeForSlowPath(); + static void SetMinSizeForSlowPath(size_t size); + static void SetUseEmergencyMalloc(); + static void ResetUseEmergencyMalloc(); + static bool IsUseEmergencyMalloc(); + + static bool IsFastPathAllowed() { return MinSizeForSlowPath() != 0; } + + // Return the number of thread heaps in use. + static inline int HeapsInUse(); + + // Adds to *total_bytes the total number of bytes used by all thread heaps. + // Also, if class_count is not NULL, it must be an array of size kNumClasses, + // and this function will increment each element of class_count by the number + // of items in all thread-local freelists of the corresponding size class. + // REQUIRES: Static::pageheap_lock is held. + static void GetThreadStats(uint64_t* total_bytes, uint64_t* class_count); + + // Sets the total thread cache size to new_size, recomputing the + // individual thread cache sizes as necessary. + // REQUIRES: Static::pageheap lock is held. + static void set_overall_thread_cache_size(size_t new_size); + static size_t overall_thread_cache_size() { + return overall_thread_cache_size_; + } + + private: + class FreeList { + private: + void* list_; // Linked list of nodes + +#ifdef _LP64 + // On 64-bit hardware, manipulating 16-bit values may be slightly slow. + uint32_t length_; // Current length. + uint32_t lowater_; // Low water mark for list length. + uint32_t max_length_; // Dynamic max list length based on usage. + // Tracks the number of times a deallocation has caused + // length_ > max_length_. After the kMaxOverages'th time, max_length_ + // shrinks and length_overages_ is reset to zero. + uint32_t length_overages_; +#else + // If we aren't using 64-bit pointers then pack these into less space. + uint16_t length_; + uint16_t lowater_; + uint16_t max_length_; + uint16_t length_overages_; +#endif + + public: + void Init() { + list_ = NULL; + length_ = 0; + lowater_ = 0; + max_length_ = 1; + length_overages_ = 0; + } + + // Return current length of list + size_t length() const { + return length_; + } + + // Return the maximum length of the list. + size_t max_length() const { + return max_length_; + } + + // Set the maximum length of the list. If 'new_max' > length(), the + // client is responsible for removing objects from the list. + void set_max_length(size_t new_max) { + max_length_ = new_max; + } + + // Return the number of times that length() has gone over max_length(). + size_t length_overages() const { + return length_overages_; + } + + void set_length_overages(size_t new_count) { + length_overages_ = new_count; + } + + // Is list empty? + bool empty() const { + return list_ == NULL; + } + + // Low-water mark management + int lowwatermark() const { return lowater_; } + void clear_lowwatermark() { lowater_ = length_; } + + void Push(void* ptr) { + SLL_Push(&list_, ptr); + length_++; + } + + void* Pop() { + ASSERT(list_ != NULL); + length_--; + if (length_ < lowater_) lowater_ = length_; + return SLL_Pop(&list_); + } + + void* Next() { + return SLL_Next(&list_); + } + + void PushRange(int N, void *start, void *end) { + SLL_PushRange(&list_, start, end); + length_ += N; + } + + void PopRange(int N, void **start, void **end) { + SLL_PopRange(&list_, N, start, end); + ASSERT(length_ >= N); + length_ -= N; + if (length_ < lowater_) lowater_ = length_; + } + }; + + // Gets and returns an object from the central cache, and, if possible, + // also adds some objects of that size class to this thread cache. + void* FetchFromCentralCache(size_t cl, size_t byte_size); + + // Releases some number of items from src. Adjusts the list's max_length + // to eventually converge on num_objects_to_move(cl). + void ListTooLong(FreeList* src, size_t cl); + + // Releases N items from this thread cache. + void ReleaseToCentralCache(FreeList* src, size_t cl, int N); + + // Increase max_size_ by reducing unclaimed_cache_space_ or by + // reducing the max_size_ of some other thread. In both cases, + // the delta is kStealAmount. + void IncreaseCacheLimit(); + // Same as above but requires Static::pageheap_lock() is held. + void IncreaseCacheLimitLocked(); + + // If TLS is available, we also store a copy of the per-thread object + // in a __thread variable since __thread variables are faster to read + // than pthread_getspecific(). We still need pthread_setspecific() + // because __thread variables provide no way to run cleanup code when + // a thread is destroyed. + // We also give a hint to the compiler to use the "initial exec" TLS + // model. This is faster than the default TLS model, at the cost that + // you cannot dlopen this library. (To see the difference, look at + // the CPU use of __tls_get_addr with and without this attribute.) + // Since we don't really use dlopen in google code -- and using dlopen + // on a malloc replacement is asking for trouble in any case -- that's + // a good tradeoff for us. +#ifdef HAVE___ATTRIBUTE__ +#define ATTR_INITIAL_EXEC __attribute__ ((tls_model ("initial-exec"))) +#else +#define ATTR_INITIAL_EXEC +#endif + +#ifdef HAVE_TLS + struct ThreadLocalData { + ThreadCache* heap; + // min_size_for_slow_path is 0 if heap is NULL or kMaxSize + 1 otherwise. + // The latter is the common case and allows allocation to be faster + // than it would be otherwise: typically a single branch will + // determine that the requested allocation is no more than kMaxSize + // and we can then proceed, knowing that global and thread-local tcmalloc + // state is initialized. + size_t min_size_for_slow_path; + + bool use_emergency_malloc; + size_t old_min_size_for_slow_path; + }; + static __thread ThreadLocalData threadlocal_data_ ATTR_INITIAL_EXEC; +#endif + + // Thread-specific key. Initialization here is somewhat tricky + // because some Linux startup code invokes malloc() before it + // is in a good enough state to handle pthread_keycreate(). + // Therefore, we use TSD keys only after tsd_inited is set to true. + // Until then, we use a slow path to get the heap object. + static bool tsd_inited_; + static pthread_key_t heap_key_; + + // Linked list of heap objects. Protected by Static::pageheap_lock. + static ThreadCache* thread_heaps_; + static int thread_heap_count_; + + // A pointer to one of the objects in thread_heaps_. Represents + // the next ThreadCache from which a thread over its max_size_ should + // steal memory limit. Round-robin through all of the objects in + // thread_heaps_. Protected by Static::pageheap_lock. + static ThreadCache* next_memory_steal_; + + // Overall thread cache size. Protected by Static::pageheap_lock. + static size_t overall_thread_cache_size_; + + // Global per-thread cache size. Writes are protected by + // Static::pageheap_lock. Reads are done without any locking, which should be + // fine as long as size_t can be written atomically and we don't place + // invariants between this variable and other pieces of state. + static volatile size_t per_thread_cache_size_; + + // Represents overall_thread_cache_size_ minus the sum of max_size_ + // across all ThreadCaches. Protected by Static::pageheap_lock. + static ssize_t unclaimed_cache_space_; + + // This class is laid out with the most frequently used fields + // first so that hot elements are placed on the same cache line. + + size_t size_; // Combined size of data + size_t max_size_; // size_ > max_size_ --> Scavenge() + + // We sample allocations, biased by the size of the allocation + Sampler sampler_; // A sampler + + FreeList list_[kNumClasses]; // Array indexed by size-class + + pthread_t tid_; // Which thread owns it + bool in_setspecific_; // In call to pthread_setspecific? + + // Allocate a new heap. REQUIRES: Static::pageheap_lock is held. + static ThreadCache* NewHeap(pthread_t tid); + + // Use only as pthread thread-specific destructor function. + static void DestroyThreadCache(void* ptr); + + static void DeleteCache(ThreadCache* heap); + static void RecomputePerThreadCacheSize(); + + // Ensure that this class is cacheline-aligned. This is critical for + // performance, as false sharing would negate many of the benefits + // of a per-thread cache. +} CACHELINE_ALIGNED; + +// Allocator for thread heaps +// This is logically part of the ThreadCache class, but MSVC, at +// least, does not like using ThreadCache as a template argument +// before the class is fully defined. So we put it outside the class. +extern PageHeapAllocator<ThreadCache> threadcache_allocator; + +inline int ThreadCache::HeapsInUse() { + return threadcache_allocator.inuse(); +} + +inline bool ThreadCache::SampleAllocation(size_t k) { +#ifndef NO_TCMALLOC_SAMPLES + return UNLIKELY(FLAGS_tcmalloc_sample_parameter > 0) && sampler_.SampleAllocation(k); +#else + return false; +#endif +} + +inline void* ThreadCache::Allocate(size_t size, size_t cl) { + ASSERT(size <= kMaxSize); + ASSERT(size == Static::sizemap()->ByteSizeForClass(cl)); + + FreeList* list = &list_[cl]; + if (UNLIKELY(list->empty())) { + return FetchFromCentralCache(cl, size); + } + size_ -= size; + return list->Pop(); +} + +inline void ThreadCache::Deallocate(void* ptr, size_t cl) { + FreeList* list = &list_[cl]; + size_ += Static::sizemap()->ByteSizeForClass(cl); + ssize_t size_headroom = max_size_ - size_ - 1; + + // This catches back-to-back frees of allocs in the same size + // class. A more comprehensive (and expensive) test would be to walk + // the entire freelist. But this might be enough to find some bugs. + ASSERT(ptr != list->Next()); + + list->Push(ptr); + ssize_t list_headroom = + static_cast<ssize_t>(list->max_length()) - list->length(); + + // There are two relatively uncommon things that require further work. + // In the common case we're done, and in that case we need a single branch + // because of the bitwise-or trick that follows. + if (UNLIKELY((list_headroom | size_headroom) < 0)) { + if (list_headroom < 0) { + ListTooLong(list, cl); + } + if (size_ >= max_size_) Scavenge(); + } +} + +inline ThreadCache* ThreadCache::GetThreadHeap() { +#ifdef HAVE_TLS + return threadlocal_data_.heap; +#else + return reinterpret_cast<ThreadCache *>( + perftools_pthread_getspecific(heap_key_)); +#endif +} + +inline ThreadCache* ThreadCache::GetCacheWhichMustBePresent() { +#ifdef HAVE_TLS + ASSERT(threadlocal_data_.heap); + return threadlocal_data_.heap; +#else + ASSERT(perftools_pthread_getspecific(heap_key_)); + return reinterpret_cast<ThreadCache *>( + perftools_pthread_getspecific(heap_key_)); +#endif +} + +inline ThreadCache* ThreadCache::GetCache() { + ThreadCache* ptr = NULL; + if (!tsd_inited_) { + InitModule(); + } else { + ptr = GetThreadHeap(); + } + if (ptr == NULL) ptr = CreateCacheIfNecessary(); + return ptr; +} + +// In deletion paths, we do not try to create a thread-cache. This is +// because we may be in the thread destruction code and may have +// already cleaned up the cache for this thread. +inline ThreadCache* ThreadCache::GetCacheIfPresent() { +#ifndef HAVE_TLS + if (!tsd_inited_) return NULL; +#endif + return GetThreadHeap(); +} + +inline size_t ThreadCache::MinSizeForSlowPath() { +#ifdef HAVE_TLS + return threadlocal_data_.min_size_for_slow_path; +#else + return 0; +#endif +} + +inline void ThreadCache::SetMinSizeForSlowPath(size_t size) { +#ifdef HAVE_TLS + threadlocal_data_.min_size_for_slow_path = size; +#endif +} + +inline void ThreadCache::SetUseEmergencyMalloc() { +#ifdef HAVE_TLS + threadlocal_data_.old_min_size_for_slow_path = threadlocal_data_.min_size_for_slow_path; + threadlocal_data_.min_size_for_slow_path = 0; + threadlocal_data_.use_emergency_malloc = true; +#endif +} + +inline void ThreadCache::ResetUseEmergencyMalloc() { +#ifdef HAVE_TLS + threadlocal_data_.min_size_for_slow_path = threadlocal_data_.old_min_size_for_slow_path; + threadlocal_data_.use_emergency_malloc = false; +#endif +} + +inline bool ThreadCache::IsUseEmergencyMalloc() { +#if defined(HAVE_TLS) && defined(ENABLE_EMERGENCY_MALLOC) + return UNLIKELY(threadlocal_data_.use_emergency_malloc); +#else + return false; +#endif +} + + +} // namespace tcmalloc + +#endif // TCMALLOC_THREAD_CACHE_H_ diff --git a/src/third_party/gperftools-2.5/src/windows/TODO b/src/third_party/gperftools-2.5/src/windows/TODO new file mode 100644 index 00000000000..708ec237ac4 --- /dev/null +++ b/src/third_party/gperftools-2.5/src/windows/TODO @@ -0,0 +1,86 @@ +* Get heap-profile-table.cc using DeleteMatchingFiles +* Get heap-profile-table.cc using FillProcSelfMaps, DumpProcSelfMaps +* Play around with ExperimentalGetStackTrace +* Support the windows-level memory-allocation functions? See + /home/build/googleclient/earth/client/tools/memorytracking/client/memorytrace/src/memorytrace.cpp + /home/build/googleclient/total_recall/common/sitestep/* + http://www.internals.com/articles/apispy/apispy.htm + http://www.wheaty.net/APISPY32.zip +* Verify /proc/xxx/maps: + http://www.geocities.com/wah_java_dotnet/procmap/index.html +* Figure out how to edit the executable IAT so tcmalloc.dll is loaded first +* Use QueryPerformanceCounter instead of GetTickCount() (also for sparsehash) + +---- +More info on windows-level memory-allocation functions: + C runtime malloc + LocalAlloc + GlobalAlloc + HeapAlloc + VirtualAlloc + mmap stuff + +malloc, LocalAlloc and GlobalAlloc call HeapAlloc, which calls +VirtualAlloc when needed, which calls VirtualAllocEx (the __sbrk equiv?) + +siggi sez: If you want to do a generic job, you probably need to +preserve the semantics of all of these Win32 calls: + Heap32First + Heap32ListFirst + Heap32ListNext + Heap32Next + HeapAlloc + HeapCompact + HeapCreate + HeapCreateTagsW + HeapDestroy + HeapExtend + HeapFree + HeapLock + HeapQueryInformation + HeapQueryTagW + HeapReAlloc + HeapSetInformation + HeapSize + HeapSummary + HeapUnlock + HeapUsage + HeapValidate + HeapWalk + +kernel32.dll export functions and nt.dll export functions: + http://www.shorthike.com/svn/trunk/tools_win32/dm/lib/kernel32.def + http://undocumented.ntinternals.net/ + +You can edit the executable IAT to have the patching DLL be the +first one loaded. + +Most complete way to intercept system calls is patch the functions +(not the IAT). + +Microsoft has somee built-in routines for heap-checking: + http://support.microsoft.com/kb/268343 + +---- +Itimer replacement: + http://msdn2.microsoft.com/en-us/library/ms712713.aspx + +---- +Changes I've had to make to the project file: + +0) When creating the project file, click on "no autogenerated files" + +--- For each project: +1) Alt-F7 -> General -> [pulldown "all configurations" ] -> Output Directory -> $(SolutionDir)$(ConfigurationName) +2) Alt-F7 -> General -> [pulldown "all configurations" ] -> Intermediate Directory -> $(ConfigurationName) + +--- For each .cc file: +1) Alt-F7 -> C/C++ -> General -> [pulldown "all configurations"] -> Additional Include Directives --> src/windows + src/ +2) Alt-F7 -> C/C++ -> Code Generation -> Runtime Library -> Multi-threaded, debug/release, DLL or not + +--- For DLL: +3) Alt-F7 -> Linker -> Input -> [pulldown "all configurations" ] -> Module Definition File -> src\windows\vc7and8.def +--- For binaries depending on a DLL: +3) Right-click on project -> Project Dependencies -> [add dll] +--- For static binaries (not depending on a DLL) +3) Alt-F7 -> C/C++ -> Command Line -> [pulldown "all configurations"] -> /D PERFTOOLS_DLL_DECL= diff --git a/src/third_party/gperftools-2.5/src/windows/addr2line-pdb.c b/src/third_party/gperftools-2.5/src/windows/addr2line-pdb.c new file mode 100644 index 00000000000..5c65a0357e5 --- /dev/null +++ b/src/third_party/gperftools-2.5/src/windows/addr2line-pdb.c @@ -0,0 +1,163 @@ +/* Copyright (c) 2008, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: David Vitek + * + * Dump function addresses using Microsoft debug symbols. This works + * on PDB files. Note that this program will download symbols to + * c:\websymbols without asking. + */ + +#define WIN32_LEAN_AND_MEAN +#define _CRT_SECURE_NO_WARNINGS +#define _CRT_SECURE_NO_DEPRECATE + +#include <stdio.h> +#include <stdlib.h> + +#include <windows.h> +#include <dbghelp.h> + +#define SEARCH_CAP (1024*1024) +#define WEBSYM "SRV*c:\\websymbols*http://msdl.microsoft.com/download/symbols" + +void usage() { + fprintf(stderr, "usage: " + "addr2line-pdb [-f|--functions] [-C|--demangle] [-e filename]\n"); + fprintf(stderr, "(Then list the hex addresses on stdin, one per line)\n"); +} + +int main(int argc, char *argv[]) { + DWORD error; + HANDLE process; + ULONG64 module_base; + int i; + char* search; + char buf[256]; /* Enough to hold one hex address, I trust! */ + int rv = 0; + /* We may add SYMOPT_UNDNAME if --demangle is specified: */ + DWORD symopts = SYMOPT_DEFERRED_LOADS | SYMOPT_DEBUG | SYMOPT_LOAD_LINES; + char* filename = "a.out"; /* The default if -e isn't specified */ + int print_function_name = 0; /* Set to 1 if -f is specified */ + + for (i = 1; i < argc; i++) { + if (strcmp(argv[i], "--functions") == 0 || strcmp(argv[i], "-f") == 0) { + print_function_name = 1; + } else if (strcmp(argv[i], "--demangle") == 0 || + strcmp(argv[i], "-C") == 0) { + symopts |= SYMOPT_UNDNAME; + } else if (strcmp(argv[i], "-e") == 0) { + if (i + 1 >= argc) { + fprintf(stderr, "FATAL ERROR: -e must be followed by a filename\n"); + return 1; + } + filename = argv[i+1]; + i++; /* to skip over filename too */ + } else if (strcmp(argv[i], "--help") == 0) { + usage(); + exit(0); + } else { + usage(); + exit(1); + } + } + + process = GetCurrentProcess(); + + if (!SymInitialize(process, NULL, FALSE)) { + error = GetLastError(); + fprintf(stderr, "SymInitialize returned error : %d\n", error); + return 1; + } + + search = malloc(SEARCH_CAP); + if (SymGetSearchPath(process, search, SEARCH_CAP)) { + if (strlen(search) + sizeof(";" WEBSYM) > SEARCH_CAP) { + fprintf(stderr, "Search path too long\n"); + SymCleanup(process); + return 1; + } + strcat(search, ";" WEBSYM); + } else { + error = GetLastError(); + fprintf(stderr, "SymGetSearchPath returned error : %d\n", error); + rv = 1; /* An error, but not a fatal one */ + strcpy(search, WEBSYM); /* Use a default value */ + } + if (!SymSetSearchPath(process, search)) { + error = GetLastError(); + fprintf(stderr, "SymSetSearchPath returned error : %d\n", error); + rv = 1; /* An error, but not a fatal one */ + } + + SymSetOptions(symopts); + module_base = SymLoadModuleEx(process, NULL, filename, NULL, 0, 0, NULL, 0); + if (!module_base) { + /* SymLoadModuleEx failed */ + error = GetLastError(); + fprintf(stderr, "SymLoadModuleEx returned error : %d for %s\n", + error, filename); + SymCleanup(process); + return 1; + } + + buf[sizeof(buf)-1] = '\0'; /* Just to be safe */ + while (fgets(buf, sizeof(buf)-1, stdin)) { + /* GNU addr2line seems to just do a strtol and ignore any + * weird characters it gets, so we will too. + */ + unsigned __int64 addr = _strtoui64(buf, NULL, 16); + ULONG64 buffer[(sizeof(SYMBOL_INFO) + + MAX_SYM_NAME*sizeof(TCHAR) + + sizeof(ULONG64) - 1) + / sizeof(ULONG64)]; + PSYMBOL_INFO pSymbol = (PSYMBOL_INFO)buffer; + IMAGEHLP_LINE64 line; + DWORD dummy; + pSymbol->SizeOfStruct = sizeof(SYMBOL_INFO); + pSymbol->MaxNameLen = MAX_SYM_NAME; + if (print_function_name) { + if (SymFromAddr(process, (DWORD64)addr, NULL, pSymbol)) { + printf("%s\n", pSymbol->Name); + } else { + printf("??\n"); + } + } + line.SizeOfStruct = sizeof(IMAGEHLP_LINE64); + if (SymGetLineFromAddr64(process, (DWORD64)addr, &dummy, &line)) { + printf("%s:%d\n", line.FileName, (int)line.LineNumber); + } else { + printf("??:0\n"); + } + } + SymUnloadModule64(process, module_base); + SymCleanup(process); + return rv; +} diff --git a/src/third_party/gperftools-2.5/src/windows/auto_testing_hook.h b/src/third_party/gperftools-2.5/src/windows/auto_testing_hook.h new file mode 100644 index 00000000000..fc2b71013e9 --- /dev/null +++ b/src/third_party/gperftools-2.5/src/windows/auto_testing_hook.h @@ -0,0 +1,156 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2010 The Chromium Authors. All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Utility for using SideStep with unit tests. + +#ifndef CEEE_TESTING_SIDESTEP_AUTO_TESTING_HOOK_H_ +#define CEEE_TESTING_SIDESTEP_AUTO_TESTING_HOOK_H_ + +#include "base/basictypes.h" +#include "base/logging.h" +#include "preamble_patcher.h" + +#define SIDESTEP_CHK(x) CHECK(x) +#define SIDESTEP_EXPECT_TRUE(x) SIDESTEP_CHK(x) + +namespace sidestep { + +// Same trick as common/scope_cleanup.h ScopeGuardImplBase +class AutoTestingHookBase { + public: + virtual ~AutoTestingHookBase() {} +}; + +// This is the typedef you normally use for the class, e.g. +// +// AutoTestingHook hook = MakeTestingHook(TargetFunc, HookTargetFunc); +// +// The 'hook' variable will then be destroyed when it goes out of scope. +// +// NOTE: You must not hold this type as a member of another class. Its +// destructor will not get called. +typedef const AutoTestingHookBase& AutoTestingHook; + +// This is the class you must use when holding a hook as a member of another +// class, e.g. +// +// public: +// AutoTestingHookHolder holder_; +// MyClass() : my_hook_holder(MakeTestingHookHolder(Target, Hook)) {} +class AutoTestingHookHolder { + public: + explicit AutoTestingHookHolder(AutoTestingHookBase* hook) : hook_(hook) {} + ~AutoTestingHookHolder() { delete hook_; } + private: + AutoTestingHookHolder() {} // disallow + AutoTestingHookBase* hook_; +}; + +// This class helps patch a function, then unpatch it when the object exits +// scope, and also maintains the pointer to the original function stub. +// +// To enable use of the class without having to explicitly provide the +// type of the function pointers (and instead only providing it +// implicitly) we use the same trick as ScopeGuard (see +// common/scope_cleanup.h) uses, so to create a hook you use the MakeHook +// function rather than a constructor. +// +// NOTE: This function is only safe for e.g. unit tests and _not_ for +// production code. See PreamblePatcher class for details. +template <typename T> +class AutoTestingHookImpl : public AutoTestingHookBase { + public: + static AutoTestingHookImpl<T> MakeTestingHook(T target_function, + T replacement_function, + bool do_it) { + return AutoTestingHookImpl<T>(target_function, replacement_function, do_it); + } + + static AutoTestingHookImpl<T>* MakeTestingHookHolder(T target_function, + T replacement_function, + bool do_it) { + return new AutoTestingHookImpl<T>(target_function, + replacement_function, do_it); + } + + ~AutoTestingHookImpl() { + if (did_it_) { + SIDESTEP_CHK(SIDESTEP_SUCCESS == PreamblePatcher::Unpatch( + (void*)target_function_, (void*)replacement_function_, + (void*)original_function_)); + } + } + + // Returns a pointer to the original function. To use this method you will + // have to explicitly create an AutoTestingHookImpl of the specific + // function pointer type (i.e. not use the AutoTestingHook typedef). + T original_function() { + return original_function_; + } + + private: + AutoTestingHookImpl(T target_function, T replacement_function, bool do_it) + : target_function_(target_function), + original_function_(NULL), + replacement_function_(replacement_function), + did_it_(do_it) { + if (do_it) { + SIDESTEP_CHK(SIDESTEP_SUCCESS == PreamblePatcher::Patch(target_function, + replacement_function, + &original_function_)); + } + } + + T target_function_; // always valid + T original_function_; // always valid + T replacement_function_; // always valid + bool did_it_; // Remember if we did it or not... +}; + +template <typename T> +inline AutoTestingHookImpl<T> MakeTestingHook(T target, + T replacement, + bool do_it) { + return AutoTestingHookImpl<T>::MakeTestingHook(target, replacement, do_it); +} + +template <typename T> +inline AutoTestingHookImpl<T> MakeTestingHook(T target, T replacement) { + return AutoTestingHookImpl<T>::MakeTestingHook(target, replacement, true); +} + +template <typename T> +inline AutoTestingHookImpl<T>* MakeTestingHookHolder(T target, T replacement) { + return AutoTestingHookImpl<T>::MakeTestingHookHolder(target, replacement, + true); +} + +}; // namespace sidestep + +#endif // CEEE_TESTING_SIDESTEP_AUTO_TESTING_HOOK_H_ diff --git a/src/third_party/gperftools-2.5/src/windows/config.h b/src/third_party/gperftools-2.5/src/windows/config.h new file mode 100644 index 00000000000..6bbeb1afb1a --- /dev/null +++ b/src/third_party/gperftools-2.5/src/windows/config.h @@ -0,0 +1,320 @@ +/* A manual version of config.h fit for windows machines. + * + * Use of this source code is governed by a BSD-style license that can + * be found in the LICENSE file. + */ + +/* Sometimes we accidentally #include this config.h instead of the one + in .. -- this is particularly true for msys/mingw, which uses the + unix config.h but also runs code in the windows directory. + */ +#ifdef __MINGW32__ +#include "../config.h" +#define GOOGLE_PERFTOOLS_WINDOWS_CONFIG_H_ +#endif + +#ifndef GOOGLE_PERFTOOLS_WINDOWS_CONFIG_H_ +#define GOOGLE_PERFTOOLS_WINDOWS_CONFIG_H_ +/* used by tcmalloc.h */ +#define GPERFTOOLS_CONFIG_H_ + +/* define this if you are linking tcmalloc statically and overriding the + * default allocators. + * For instructions on how to use this mode, see + * http://groups.google.com/group/google-perftools/browse_thread/thread/41cd3710af85e57b + */ +#undef WIN32_OVERRIDE_ALLOCATORS + +/* Define to 1 if your libc has a snprintf implementation */ +#if defined(_MSC_VER) && _MSC_VER >= 1900 +#define HAVE_SNPRINTF 1 +#else +#undef HAVE_SNPRINTF +#endif + +/* Define to 1 if compiler supports __builtin_stack_pointer */ +#undef HAVE_BUILTIN_STACK_POINTER + +/* Define to 1 if you have the <conflict-signal.h> header file. */ +#undef HAVE_CONFLICT_SIGNAL_H + +/* Define to 1 if you have the <cygwin/signal.h> header file. */ +#undef HAVE_CYGWIN_SIGNAL_H + +/* Define to 1 if you have the declaration of `cfree', and to 0 if you don't. + */ +#undef HAVE_DECL_CFREE + +/* Define to 1 if you have the declaration of `memalign', and to 0 if you + don't. */ +#undef HAVE_DECL_MEMALIGN + +/* Define to 1 if you have the declaration of `posix_memalign', and to 0 if + you don't. */ +#undef HAVE_DECL_POSIX_MEMALIGN + +/* Define to 1 if you have the declaration of `pvalloc', and to 0 if you + don't. */ +#undef HAVE_DECL_PVALLOC + +/* Define to 1 if you have the declaration of `uname', and to 0 if you don't. + */ +#undef HAVE_DECL_UNAME + +/* Define to 1 if you have the declaration of `valloc', and to 0 if you don't. + */ +#undef HAVE_DECL_VALLOC + +/* Define to 1 if you have the <dlfcn.h> header file. */ +#undef HAVE_DLFCN_H + +/* Define to 1 if the system has the type `Elf32_Versym'. */ +#undef HAVE_ELF32_VERSYM + +/* Define to 1 if you have the <execinfo.h> header file. */ +#undef HAVE_EXECINFO_H + +/* Define to 1 if you have the <fcntl.h> header file. */ +#undef HAVE_FCNTL_H + +/* Define to 1 if you have the <features.h> header file. */ +#undef HAVE_FEATURES_H + +/* Define to 1 if you have the `geteuid' function. */ +#undef HAVE_GETEUID + +/* Define to 1 if you have the `getpagesize' function. */ +#define HAVE_GETPAGESIZE 1 /* we define it in windows/port.cc */ + +/* Define to 1 if you have the <glob.h> header file. */ +#undef HAVE_GLOB_H + +/* Define to 1 if you have the <grp.h> header file. */ +#undef HAVE_GRP_H + +/* Define to 1 if you have the <inttypes.h> header file. */ +#undef HAVE_INTTYPES_H + +/* Define to 1 if you have the <libunwind.h> header file. */ +#undef HAVE_LIBUNWIND_H + +/* Define to 1 if you have the <linux/ptrace.h> header file. */ +#undef HAVE_LINUX_PTRACE_H + +/* Define to 1 if you have the <malloc.h> header file. */ +#define HAVE_MALLOC_H 1 + +/* Define to 1 if you have the <malloc/malloc.h> header file. */ +#undef HAVE_MALLOC_MALLOC_H + +/* Define to 1 if you have the <memory.h> header file. */ +#undef HAVE_MEMORY_H + +/* Define to 1 if you have a working `mmap' system call. */ +#undef HAVE_MMAP + +/* define if the compiler implements namespaces */ +#define HAVE_NAMESPACES 1 + +/* Define to 1 if you have the <poll.h> header file. */ +#undef HAVE_POLL_H + +/* define if libc has program_invocation_name */ +#undef HAVE_PROGRAM_INVOCATION_NAME + +/* Define if you have POSIX threads libraries and header files. */ +#undef HAVE_PTHREAD + +/* Define to 1 if you have the <pwd.h> header file. */ +#undef HAVE_PWD_H + +/* Define to 1 if you have the `sbrk' function. */ +#undef HAVE_SBRK + +/* Define to 1 if you have the <sched.h> header file. */ +#undef HAVE_SCHED_H + +/* Define to 1 if you have the <stdint.h> header file. */ +#if defined(_MSC_VER) && _MSC_VER >= 1900 +#define HAVE_STDINT_H 1 +#else +#undef HAVE_STDINT_H +#endif + +/* Define to 1 if you have the <stdlib.h> header file. */ +#define HAVE_STDLIB_H 1 + +/* Define to 1 if you have the <strings.h> header file. */ +#undef HAVE_STRINGS_H + +/* Define to 1 if you have the <string.h> header file. */ +#define HAVE_STRING_H 1 + +/* Define to 1 if the system has the type `struct mallinfo'. */ +#undef HAVE_STRUCT_MALLINFO + +/* Define to 1 if you have the <sys/cdefs.h> header file. */ +#undef HAVE_SYS_CDEFS_H + +/* Define to 1 if you have the <sys/malloc.h> header file. */ +#undef HAVE_SYS_MALLOC_H + +/* Define to 1 if you have the <sys/param.h> header file. */ +#undef HAVE_SYS_PARAM_H + +/* Define to 1 if you have the <sys/prctl.h> header file. */ +#undef HAVE_SYS_PRCTL_H + +/* Define to 1 if you have the <sys/resource.h> header file. */ +#undef HAVE_SYS_RESOURCE_H + +/* Define to 1 if you have the <sys/socket.h> header file. */ +#undef HAVE_SYS_SOCKET_H + +/* Define to 1 if you have the <sys/stat.h> header file. */ +#define HAVE_SYS_STAT_H 1 + +/* Define to 1 if you have the <sys/syscall.h> header file. */ +#undef HAVE_SYS_SYSCALL_H + +/* Define to 1 if you have the <sys/types.h> header file. */ +#define HAVE_SYS_TYPES_H 1 + +/* <sys/ucontext.h> is broken on redhat 7 */ +#undef HAVE_SYS_UCONTEXT_H + +/* Define to 1 if you have the <sys/wait.h> header file. */ +#undef HAVE_SYS_WAIT_H + +/* Define to 1 if compiler supports __thread */ +#define HAVE_TLS 1 + +/* Define to 1 if you have the <ucontext.h> header file. */ +#undef HAVE_UCONTEXT_H + +/* Define to 1 if you have the <unistd.h> header file. */ +#undef HAVE_UNISTD_H + +/* Define to 1 if you have the <unwind.h> header file. */ +#undef HAVE_UNWIND_H + +/* Define to 1 if you have the <valgrind.h> header file. */ +#undef HAVE_VALGRIND_H + +/* define if your compiler has __attribute__ */ +#undef HAVE___ATTRIBUTE__ + +/* Define to 1 if compiler supports __environ */ +#undef HAVE___ENVIRON + +/* Define to 1 if the system has the type `__int64'. */ +#define HAVE___INT64 1 + +/* prefix where we look for installed files */ +#undef INSTALL_PREFIX + +/* Define to 1 if int32_t is equivalent to intptr_t */ +#undef INT32_EQUALS_INTPTR + +/* Define to the sub-directory in which libtool stores uninstalled libraries. + */ +#undef LT_OBJDIR + +/* Define to 'volatile' if __malloc_hook is declared volatile */ +#undef MALLOC_HOOK_MAYBE_VOLATILE + +/* Define to 1 if your C compiler doesn't accept -c and -o together. */ +#undef NO_MINUS_C_MINUS_O + +/* Name of package */ +#define PACKAGE "gperftools" + +/* Define to the address where bug reports for this package should be sent. */ +#define PACKAGE_BUGREPORT "gperftools@googlegroups.com" + +/* Define to the full name of this package. */ +#define PACKAGE_NAME "gperftools" + +/* Define to the full name and version of this package. */ +#define PACKAGE_STRING "gperftools 2.5" + +/* Define to the one symbol short name of this package. */ +#define PACKAGE_TARNAME "gperftools" + +/* Define to the home page for this package. */ +#undef PACKAGE_URL + +/* Define to the version of this package. */ +#define PACKAGE_VERSION "2.5" + +/* How to access the PC from a struct ucontext */ +#undef PC_FROM_UCONTEXT + +/* Always the empty-string on non-windows systems. On windows, should be + "__declspec(dllexport)". This way, when we compile the dll, we export our + functions/classes. It's safe to define this here because config.h is only + used internally, to compile the DLL, and every DLL source file #includes + "config.h" before anything else. */ +#ifndef PERFTOOLS_DLL_DECL +# define PERFTOOLS_IS_A_DLL 1 /* not set if you're statically linking */ +# define PERFTOOLS_DLL_DECL __declspec(dllexport) +# define PERFTOOLS_DLL_DECL_FOR_UNITTESTS __declspec(dllimport) +#endif + +/* printf format code for printing a size_t and ssize_t */ +#define PRIdS "Id" + +/* printf format code for printing a size_t and ssize_t */ +#define PRIuS "Iu" + +/* printf format code for printing a size_t and ssize_t */ +#define PRIxS "Ix" + +/* Mark the systems where we know it's bad if pthreads runs too + early before main (before threads are initialized, presumably). */ +#ifdef __FreeBSD__ +#define PTHREADS_CRASHES_IF_RUN_TOO_EARLY 1 +#endif + +/* Define to necessary symbol if this constant uses a non-standard name on + your system. */ +#undef PTHREAD_CREATE_JOINABLE + +/* Define to 1 if you have the ANSI C header files. */ +#define STDC_HEADERS 1 + +/* the namespace where STL code like vector<> is defined */ +#define STL_NAMESPACE std + +/* Version number of package */ +#undef VERSION + +/* C99 says: define this to get the PRI... macros from stdint.h */ +#ifndef __STDC_FORMAT_MACROS +# define __STDC_FORMAT_MACROS 1 +#endif + +/* Define to `__inline__' or `__inline' if that's what the C compiler + calls it, or to nothing if 'inline' is not supported under any name. */ +#ifndef __cplusplus +#undef inline +#endif + +// --------------------------------------------------------------------- +// Extra stuff not found in config.h.in + +// This must be defined before the windows.h is included. We need at +// least 0x0400 for mutex.h to have access to TryLock, and at least +// 0x0501 for patch_functions.cc to have access to GetModuleHandleEx. +// (This latter is an optimization we could take out if need be.) +#ifndef _WIN32_WINNT +# define _WIN32_WINNT 0x0501 +#endif + +// We want to make sure not to ever try to #include heap-checker.h +#define NO_HEAP_CHECK 1 + +// TODO(csilvers): include windows/port.h in every relevant source file instead? +#include "windows/port.h" + +#endif /* GOOGLE_PERFTOOLS_WINDOWS_CONFIG_H_ */ diff --git a/src/third_party/gperftools-2.5/src/windows/get_mangled_names.cc b/src/third_party/gperftools-2.5/src/windows/get_mangled_names.cc new file mode 100644 index 00000000000..08bd03be6fb --- /dev/null +++ b/src/third_party/gperftools-2.5/src/windows/get_mangled_names.cc @@ -0,0 +1,65 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// --- +// Author: Craig Silverstein (opensource@google.com) + +// When you are porting perftools to a new compiler or architecture +// (win64 vs win32) for instance, you'll need to change the mangled +// symbol names for operator new and friends at the top of +// patch_functions.cc. This file helps you do that. +// +// It does this by defining these functions with the proper signature. +// All you need to do is compile this file and the run dumpbin on it. +// (See http://msdn.microsoft.com/en-us/library/5x49w699.aspx for more +// on dumpbin). To do this in MSVC, use the MSVC commandline shell: +// http://msdn.microsoft.com/en-us/library/ms235639(VS.80).aspx) +// +// The run: +// cl /c get_mangled_names.cc +// dumpbin /symbols get_mangled_names.obj +// +// It will print out the mangled (and associated unmangled) names of +// the 8 symbols you need to put at the top of patch_functions.cc + +#include <sys/types.h> // for size_t +#include <new> // for nothrow_t + +static char m; // some dummy memory so new doesn't return NULL. + +void* operator new(size_t size) { return &m; } +void operator delete(void* p) throw() { } +void* operator new[](size_t size) { return &m; } +void operator delete[](void* p) throw() { } + +void* operator new(size_t size, const std::nothrow_t&) throw() { return &m; } +void operator delete(void* p, const std::nothrow_t&) throw() { } +void* operator new[](size_t size, const std::nothrow_t&) throw() { return &m; } +void operator delete[](void* p, const std::nothrow_t&) throw() { } diff --git a/src/third_party/gperftools-2.5/src/windows/gperftools/tcmalloc.h b/src/third_party/gperftools-2.5/src/windows/gperftools/tcmalloc.h new file mode 100644 index 00000000000..1140a65f939 --- /dev/null +++ b/src/third_party/gperftools-2.5/src/windows/gperftools/tcmalloc.h @@ -0,0 +1,139 @@ +// -*- Mode: C; c-basic-offset: 2; indent-tabs-mode: nil -*- +/* Copyright (c) 2003, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: Sanjay Ghemawat <opensource@google.com> + * .h file by Craig Silverstein <opensource@google.com> + */ + +#ifndef TCMALLOC_TCMALLOC_H_ +#define TCMALLOC_TCMALLOC_H_ + +#include <stddef.h> /* for size_t */ + +/* Define the version number so folks can check against it */ +#define TC_VERSION_MAJOR 2 +#define TC_VERSION_MINOR 5 +#define TC_VERSION_PATCH "" +#define TC_VERSION_STRING "gperftools 2.5" + +#ifdef __cplusplus +#define PERFTOOLS_THROW throw() +#else +# ifdef __GNUC__ +# define PERFTOOLS_THROW __attribute__((__nothrow__)) +# else +# define PERFTOOLS_THROW +# endif +#endif + +#ifndef PERFTOOLS_DLL_DECL +#define PERFTOOLS_DLL_DECL_DEFINED +# ifdef _WIN32 +# define PERFTOOLS_DLL_DECL __declspec(dllimport) +# else +# define PERFTOOLS_DLL_DECL +# endif +#endif + +#ifdef __cplusplus +namespace std { +struct nothrow_t; +} + +extern "C" { +#endif + /* + * Returns a human-readable version string. If major, minor, + * and/or patch are not NULL, they are set to the major version, + * minor version, and patch-code (a string, usually ""). + */ + PERFTOOLS_DLL_DECL const char* tc_version(int* major, int* minor, + const char** patch) PERFTOOLS_THROW; + + PERFTOOLS_DLL_DECL void* tc_malloc(size_t size) PERFTOOLS_THROW; + PERFTOOLS_DLL_DECL void* tc_malloc_skip_new_handler(size_t size) PERFTOOLS_THROW; + PERFTOOLS_DLL_DECL void tc_free(void* ptr) PERFTOOLS_THROW; + PERFTOOLS_DLL_DECL void tc_free_sized(void *ptr, size_t size) PERFTOOLS_THROW; + PERFTOOLS_DLL_DECL void* tc_realloc(void* ptr, size_t size) PERFTOOLS_THROW; + PERFTOOLS_DLL_DECL void* tc_calloc(size_t nmemb, size_t size) PERFTOOLS_THROW; + PERFTOOLS_DLL_DECL void tc_cfree(void* ptr) PERFTOOLS_THROW; + + PERFTOOLS_DLL_DECL void* tc_memalign(size_t __alignment, + size_t __size) PERFTOOLS_THROW; + PERFTOOLS_DLL_DECL int tc_posix_memalign(void** ptr, + size_t align, size_t size) PERFTOOLS_THROW; + PERFTOOLS_DLL_DECL void* tc_valloc(size_t __size) PERFTOOLS_THROW; + PERFTOOLS_DLL_DECL void* tc_pvalloc(size_t __size) PERFTOOLS_THROW; + + PERFTOOLS_DLL_DECL void tc_malloc_stats(void) PERFTOOLS_THROW; + PERFTOOLS_DLL_DECL int tc_mallopt(int cmd, int value) PERFTOOLS_THROW; + + /* + * This is an alias for MallocExtension::instance()->GetAllocatedSize(). + * It is equivalent to + * OS X: malloc_size() + * glibc: malloc_usable_size() + * Windows: _msize() + */ + PERFTOOLS_DLL_DECL size_t tc_malloc_size(void* ptr) PERFTOOLS_THROW; + +#ifdef __cplusplus + PERFTOOLS_DLL_DECL int tc_set_new_mode(int flag) PERFTOOLS_THROW; + PERFTOOLS_DLL_DECL void* tc_new(size_t size); + PERFTOOLS_DLL_DECL void* tc_new_nothrow(size_t size, + const std::nothrow_t&) PERFTOOLS_THROW; + PERFTOOLS_DLL_DECL void tc_delete(void* p) PERFTOOLS_THROW; + PERFTOOLS_DLL_DECL void tc_delete_sized(void* p, size_t size) throw(); + PERFTOOLS_DLL_DECL void tc_delete_nothrow(void* p, + const std::nothrow_t&) PERFTOOLS_THROW; + PERFTOOLS_DLL_DECL void* tc_newarray(size_t size); + PERFTOOLS_DLL_DECL void* tc_newarray_nothrow(size_t size, + const std::nothrow_t&) PERFTOOLS_THROW; + PERFTOOLS_DLL_DECL void tc_deletearray(void* p) PERFTOOLS_THROW; + PERFTOOLS_DLL_DECL void tc_deletearray_sized(void* p, size_t size) throw(); + PERFTOOLS_DLL_DECL void tc_deletearray_nothrow(void* p, + const std::nothrow_t&) PERFTOOLS_THROW; +} +#endif + +/* We're only un-defining those for public */ +#if !defined(GPERFTOOLS_CONFIG_H_) + +#undef PERFTOOLS_THROW + +#ifdef PERFTOOLS_DLL_DECL_DEFINED +#undef PERFTOOLS_DLL_DECL +#undef PERFTOOLS_DLL_DECL_DEFINED +#endif + +#endif /* GPERFTOOLS_CONFIG_H_ */ + +#endif /* #ifndef TCMALLOC_TCMALLOC_H_ */ diff --git a/src/third_party/gperftools-2.5/src/windows/gperftools/tcmalloc.h.in b/src/third_party/gperftools-2.5/src/windows/gperftools/tcmalloc.h.in new file mode 100644 index 00000000000..66bbdb835f6 --- /dev/null +++ b/src/third_party/gperftools-2.5/src/windows/gperftools/tcmalloc.h.in @@ -0,0 +1,139 @@ +// -*- Mode: C; c-basic-offset: 2; indent-tabs-mode: nil -*- +/* Copyright (c) 2003, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: Sanjay Ghemawat <opensource@google.com> + * .h file by Craig Silverstein <opensource@google.com> + */ + +#ifndef TCMALLOC_TCMALLOC_H_ +#define TCMALLOC_TCMALLOC_H_ + +#include <stddef.h> /* for size_t */ + +/* Define the version number so folks can check against it */ +#define TC_VERSION_MAJOR @TC_VERSION_MAJOR@ +#define TC_VERSION_MINOR @TC_VERSION_MINOR@ +#define TC_VERSION_PATCH "@TC_VERSION_PATCH@" +#define TC_VERSION_STRING "gperftools @TC_VERSION_MAJOR@.@TC_VERSION_MINOR@@TC_VERSION_PATCH@" + +#ifdef __cplusplus +#define PERFTOOLS_THROW throw() +#else +# ifdef __GNUC__ +# define PERFTOOLS_THROW __attribute__((__nothrow__)) +# else +# define PERFTOOLS_THROW +# endif +#endif + +#ifndef PERFTOOLS_DLL_DECL +#define PERFTOOLS_DLL_DECL_DEFINED +# ifdef _WIN32 +# define PERFTOOLS_DLL_DECL __declspec(dllimport) +# else +# define PERFTOOLS_DLL_DECL +# endif +#endif + +#ifdef __cplusplus +namespace std { +struct nothrow_t; +} + +extern "C" { +#endif + /* + * Returns a human-readable version string. If major, minor, + * and/or patch are not NULL, they are set to the major version, + * minor version, and patch-code (a string, usually ""). + */ + PERFTOOLS_DLL_DECL const char* tc_version(int* major, int* minor, + const char** patch) PERFTOOLS_THROW; + + PERFTOOLS_DLL_DECL void* tc_malloc(size_t size) PERFTOOLS_THROW; + PERFTOOLS_DLL_DECL void* tc_malloc_skip_new_handler(size_t size) PERFTOOLS_THROW; + PERFTOOLS_DLL_DECL void tc_free(void* ptr) PERFTOOLS_THROW; + PERFTOOLS_DLL_DECL void tc_free_sized(void *ptr, size_t size) PERFTOOLS_THROW; + PERFTOOLS_DLL_DECL void* tc_realloc(void* ptr, size_t size) PERFTOOLS_THROW; + PERFTOOLS_DLL_DECL void* tc_calloc(size_t nmemb, size_t size) PERFTOOLS_THROW; + PERFTOOLS_DLL_DECL void tc_cfree(void* ptr) PERFTOOLS_THROW; + + PERFTOOLS_DLL_DECL void* tc_memalign(size_t __alignment, + size_t __size) PERFTOOLS_THROW; + PERFTOOLS_DLL_DECL int tc_posix_memalign(void** ptr, + size_t align, size_t size) PERFTOOLS_THROW; + PERFTOOLS_DLL_DECL void* tc_valloc(size_t __size) PERFTOOLS_THROW; + PERFTOOLS_DLL_DECL void* tc_pvalloc(size_t __size) PERFTOOLS_THROW; + + PERFTOOLS_DLL_DECL void tc_malloc_stats(void) PERFTOOLS_THROW; + PERFTOOLS_DLL_DECL int tc_mallopt(int cmd, int value) PERFTOOLS_THROW; + + /* + * This is an alias for MallocExtension::instance()->GetAllocatedSize(). + * It is equivalent to + * OS X: malloc_size() + * glibc: malloc_usable_size() + * Windows: _msize() + */ + PERFTOOLS_DLL_DECL size_t tc_malloc_size(void* ptr) PERFTOOLS_THROW; + +#ifdef __cplusplus + PERFTOOLS_DLL_DECL int tc_set_new_mode(int flag) PERFTOOLS_THROW; + PERFTOOLS_DLL_DECL void* tc_new(size_t size); + PERFTOOLS_DLL_DECL void* tc_new_nothrow(size_t size, + const std::nothrow_t&) PERFTOOLS_THROW; + PERFTOOLS_DLL_DECL void tc_delete(void* p) PERFTOOLS_THROW; + PERFTOOLS_DLL_DECL void tc_delete_sized(void* p, size_t size) throw(); + PERFTOOLS_DLL_DECL void tc_delete_nothrow(void* p, + const std::nothrow_t&) PERFTOOLS_THROW; + PERFTOOLS_DLL_DECL void* tc_newarray(size_t size); + PERFTOOLS_DLL_DECL void* tc_newarray_nothrow(size_t size, + const std::nothrow_t&) PERFTOOLS_THROW; + PERFTOOLS_DLL_DECL void tc_deletearray(void* p) PERFTOOLS_THROW; + PERFTOOLS_DLL_DECL void tc_deletearray_sized(void* p, size_t size) throw(); + PERFTOOLS_DLL_DECL void tc_deletearray_nothrow(void* p, + const std::nothrow_t&) PERFTOOLS_THROW; +} +#endif + +/* We're only un-defining those for public */ +#if !defined(GPERFTOOLS_CONFIG_H_) + +#undef PERFTOOLS_THROW + +#ifdef PERFTOOLS_DLL_DECL_DEFINED +#undef PERFTOOLS_DLL_DECL +#undef PERFTOOLS_DLL_DECL_DEFINED +#endif + +#endif /* GPERFTOOLS_CONFIG_H_ */ + +#endif /* #ifndef TCMALLOC_TCMALLOC_H_ */ diff --git a/src/third_party/gperftools-2.5/src/windows/ia32_modrm_map.cc b/src/third_party/gperftools-2.5/src/windows/ia32_modrm_map.cc new file mode 100644 index 00000000000..f1f1906289c --- /dev/null +++ b/src/third_party/gperftools-2.5/src/windows/ia32_modrm_map.cc @@ -0,0 +1,121 @@ +/* Copyright (c) 2007, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: Joi Sigurdsson + * + * Table of relevant information about how to decode the ModR/M byte. + * Based on information in the IA-32 Intel® Architecture + * Software Developer’s Manual Volume 2: Instruction Set Reference. + */ + +#include "mini_disassembler.h" +#include "mini_disassembler_types.h" + +namespace sidestep { + +const ModrmEntry MiniDisassembler::s_ia16_modrm_map_[] = { +// mod == 00 + /* r/m == 000 */ { false, false, OS_ZERO }, + /* r/m == 001 */ { false, false, OS_ZERO }, + /* r/m == 010 */ { false, false, OS_ZERO }, + /* r/m == 011 */ { false, false, OS_ZERO }, + /* r/m == 100 */ { false, false, OS_ZERO }, + /* r/m == 101 */ { false, false, OS_ZERO }, + /* r/m == 110 */ { true, false, OS_WORD }, + /* r/m == 111 */ { false, false, OS_ZERO }, +// mod == 01 + /* r/m == 000 */ { true, false, OS_BYTE }, + /* r/m == 001 */ { true, false, OS_BYTE }, + /* r/m == 010 */ { true, false, OS_BYTE }, + /* r/m == 011 */ { true, false, OS_BYTE }, + /* r/m == 100 */ { true, false, OS_BYTE }, + /* r/m == 101 */ { true, false, OS_BYTE }, + /* r/m == 110 */ { true, false, OS_BYTE }, + /* r/m == 111 */ { true, false, OS_BYTE }, +// mod == 10 + /* r/m == 000 */ { true, false, OS_WORD }, + /* r/m == 001 */ { true, false, OS_WORD }, + /* r/m == 010 */ { true, false, OS_WORD }, + /* r/m == 011 */ { true, false, OS_WORD }, + /* r/m == 100 */ { true, false, OS_WORD }, + /* r/m == 101 */ { true, false, OS_WORD }, + /* r/m == 110 */ { true, false, OS_WORD }, + /* r/m == 111 */ { true, false, OS_WORD }, +// mod == 11 + /* r/m == 000 */ { false, false, OS_ZERO }, + /* r/m == 001 */ { false, false, OS_ZERO }, + /* r/m == 010 */ { false, false, OS_ZERO }, + /* r/m == 011 */ { false, false, OS_ZERO }, + /* r/m == 100 */ { false, false, OS_ZERO }, + /* r/m == 101 */ { false, false, OS_ZERO }, + /* r/m == 110 */ { false, false, OS_ZERO }, + /* r/m == 111 */ { false, false, OS_ZERO } +}; + +const ModrmEntry MiniDisassembler::s_ia32_modrm_map_[] = { +// mod == 00 + /* r/m == 000 */ { false, false, OS_ZERO }, + /* r/m == 001 */ { false, false, OS_ZERO }, + /* r/m == 010 */ { false, false, OS_ZERO }, + /* r/m == 011 */ { false, false, OS_ZERO }, + /* r/m == 100 */ { false, true, OS_ZERO }, + /* r/m == 101 */ { true, false, OS_DOUBLE_WORD }, + /* r/m == 110 */ { false, false, OS_ZERO }, + /* r/m == 111 */ { false, false, OS_ZERO }, +// mod == 01 + /* r/m == 000 */ { true, false, OS_BYTE }, + /* r/m == 001 */ { true, false, OS_BYTE }, + /* r/m == 010 */ { true, false, OS_BYTE }, + /* r/m == 011 */ { true, false, OS_BYTE }, + /* r/m == 100 */ { true, true, OS_BYTE }, + /* r/m == 101 */ { true, false, OS_BYTE }, + /* r/m == 110 */ { true, false, OS_BYTE }, + /* r/m == 111 */ { true, false, OS_BYTE }, +// mod == 10 + /* r/m == 000 */ { true, false, OS_DOUBLE_WORD }, + /* r/m == 001 */ { true, false, OS_DOUBLE_WORD }, + /* r/m == 010 */ { true, false, OS_DOUBLE_WORD }, + /* r/m == 011 */ { true, false, OS_DOUBLE_WORD }, + /* r/m == 100 */ { true, true, OS_DOUBLE_WORD }, + /* r/m == 101 */ { true, false, OS_DOUBLE_WORD }, + /* r/m == 110 */ { true, false, OS_DOUBLE_WORD }, + /* r/m == 111 */ { true, false, OS_DOUBLE_WORD }, +// mod == 11 + /* r/m == 000 */ { false, false, OS_ZERO }, + /* r/m == 001 */ { false, false, OS_ZERO }, + /* r/m == 010 */ { false, false, OS_ZERO }, + /* r/m == 011 */ { false, false, OS_ZERO }, + /* r/m == 100 */ { false, false, OS_ZERO }, + /* r/m == 101 */ { false, false, OS_ZERO }, + /* r/m == 110 */ { false, false, OS_ZERO }, + /* r/m == 111 */ { false, false, OS_ZERO }, +}; + +}; // namespace sidestep diff --git a/src/third_party/gperftools-2.5/src/windows/ia32_opcode_map.cc b/src/third_party/gperftools-2.5/src/windows/ia32_opcode_map.cc new file mode 100644 index 00000000000..ba6a79e3d19 --- /dev/null +++ b/src/third_party/gperftools-2.5/src/windows/ia32_opcode_map.cc @@ -0,0 +1,1219 @@ +/* Copyright (c) 2007, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: Joi Sigurdsson + * + * Opcode decoding maps. Based on the IA-32 Intel® Architecture + * Software Developer’s Manual Volume 2: Instruction Set Reference. Idea + * for how to lay out the tables in memory taken from the implementation + * in the Bastard disassembly environment. + */ + +#include "mini_disassembler.h" + +namespace sidestep { + +/* +* This is the first table to be searched; the first field of each +* Opcode in the table is either 0 to indicate you're in the +* right table, or an index to the correct table, in the global +* map g_pentiumOpcodeMap +*/ +const Opcode s_first_opcode_byte[] = { + /* 0x0 */ { 0, IT_GENERIC, AM_E | OT_B, AM_G | OT_B, AM_NOT_USED, "add", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x1 */ { 0, IT_GENERIC, AM_E | OT_V, AM_G | OT_V, AM_NOT_USED, "add", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x2 */ { 0, IT_GENERIC, AM_G | OT_B, AM_E | OT_B, AM_NOT_USED, "add", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x3 */ { 0, IT_GENERIC, AM_G | OT_V, AM_E | OT_V, AM_NOT_USED, "add", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x4 */ { 0, IT_GENERIC, AM_REGISTER | OT_B, AM_I | OT_B, AM_NOT_USED, "add", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x5 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_I | OT_V, AM_NOT_USED, "add", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x6 */ { 0, IT_GENERIC, AM_REGISTER | OT_W, AM_NOT_USED, AM_NOT_USED, "push", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x7 */ { 0, IT_GENERIC, AM_REGISTER | OT_W, AM_NOT_USED, AM_NOT_USED, "pop", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x8 */ { 0, IT_GENERIC, AM_E | OT_B, AM_G | OT_B, AM_NOT_USED, "or", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x9 */ { 0, IT_GENERIC, AM_E | OT_V, AM_G | OT_V, AM_NOT_USED, "or", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xA */ { 0, IT_GENERIC, AM_G | OT_B, AM_E | OT_B, AM_NOT_USED, "or", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xB */ { 0, IT_GENERIC, AM_G | OT_V, AM_E | OT_V, AM_NOT_USED, "or", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xC */ { 0, IT_GENERIC, AM_REGISTER | OT_B, AM_I | OT_B, AM_NOT_USED, "or", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xD */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_I | OT_V, AM_NOT_USED, "or", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xE */ { 0, IT_GENERIC, AM_REGISTER | OT_W, AM_NOT_USED, AM_NOT_USED, "push", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xF */ { 1, IT_REFERENCE, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x10 */ { 0, IT_GENERIC, AM_E | OT_B, AM_G | OT_B, AM_NOT_USED, "adc", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x11 */ { 0, IT_GENERIC, AM_E | OT_V, AM_G | OT_V, AM_NOT_USED, "adc", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x12 */ { 0, IT_GENERIC, AM_G | OT_B, AM_E | OT_B, AM_NOT_USED, "adc", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x13 */ { 0, IT_GENERIC, AM_G | OT_V, AM_E | OT_V, AM_NOT_USED, "adc", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x14 */ { 0, IT_GENERIC, AM_REGISTER | OT_B, AM_I | OT_B, AM_NOT_USED, "adc", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x15 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_I | OT_V, AM_NOT_USED, "adc", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x16 */ { 0, IT_GENERIC, AM_REGISTER | OT_W, AM_NOT_USED, AM_NOT_USED, "push", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x17 */ { 0, IT_GENERIC, AM_REGISTER | OT_W, AM_NOT_USED, AM_NOT_USED, "pop", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x18 */ { 0, IT_GENERIC, AM_E | OT_B, AM_G | OT_B, AM_NOT_USED, "sbb", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x19 */ { 0, IT_GENERIC, AM_E | OT_V, AM_G | OT_V, AM_NOT_USED, "sbb", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x1A */ { 0, IT_GENERIC, AM_G | OT_B, AM_E | OT_B, AM_NOT_USED, "sbb", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x1B */ { 0, IT_GENERIC, AM_G | OT_V, AM_E | OT_V, AM_NOT_USED, "sbb", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x1C */ { 0, IT_GENERIC, AM_REGISTER | OT_B, AM_I | OT_B, AM_NOT_USED, "sbb", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x1D */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_I | OT_V, AM_NOT_USED, "sbb", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x1E */ { 0, IT_GENERIC, AM_REGISTER | OT_W, AM_NOT_USED, AM_NOT_USED, "push", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x1F */ { 0, IT_GENERIC, AM_REGISTER | OT_W, AM_NOT_USED, AM_NOT_USED, "pop", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x20 */ { 0, IT_GENERIC, AM_E | OT_B, AM_G | OT_B, AM_NOT_USED, "and", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x21 */ { 0, IT_GENERIC, AM_E | OT_V, AM_G | OT_V, AM_NOT_USED, "and", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x22 */ { 0, IT_GENERIC, AM_G | OT_B, AM_E | OT_B, AM_NOT_USED, "and", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x23 */ { 0, IT_GENERIC, AM_G | OT_V, AM_E | OT_V, AM_NOT_USED, "and", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x24 */ { 0, IT_GENERIC, AM_REGISTER | OT_B, AM_I | OT_B, AM_NOT_USED, "and", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x25 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_I | OT_V, AM_NOT_USED, "and", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x26 */ { 0, IT_PREFIX, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x27 */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "daa", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x28 */ { 0, IT_GENERIC, AM_E | OT_B, AM_G | OT_B, AM_NOT_USED, "sub", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x29 */ { 0, IT_GENERIC, AM_E | OT_V, AM_G | OT_V, AM_NOT_USED, "sub", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x2A */ { 0, IT_GENERIC, AM_G | OT_B, AM_E | OT_B, AM_NOT_USED, "sub", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x2B */ { 0, IT_GENERIC, AM_G | OT_V, AM_E | OT_V, AM_NOT_USED, "sub", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x2C */ { 0, IT_GENERIC, AM_REGISTER | OT_B, AM_I | OT_B, AM_NOT_USED, "sub", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x2D */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_I | OT_V, AM_NOT_USED, "sub", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x2E */ { 0, IT_PREFIX, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x2F */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "das", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x30 */ { 0, IT_GENERIC, AM_E | OT_B, AM_G | OT_B, AM_NOT_USED, "xor", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x31 */ { 0, IT_GENERIC, AM_E | OT_V, AM_G | OT_V, AM_NOT_USED, "xor", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x32 */ { 0, IT_GENERIC, AM_G | OT_B, AM_E | OT_B, AM_NOT_USED, "xor", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x33 */ { 0, IT_GENERIC, AM_G | OT_V, AM_E | OT_V, AM_NOT_USED, "xor", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x34 */ { 0, IT_GENERIC, AM_REGISTER | OT_B, AM_I | OT_B, AM_NOT_USED, "xor", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x35 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_I | OT_V, AM_NOT_USED, "xor", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x36 */ { 0, IT_PREFIX, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x37 */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "aaa", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x38 */ { 0, IT_GENERIC, AM_E | OT_B, AM_G | OT_B, AM_NOT_USED, "cmp", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x39 */ { 0, IT_GENERIC, AM_E | OT_V, AM_G | OT_V, AM_NOT_USED, "cmp", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x3A */ { 0, IT_GENERIC, AM_G | OT_B, AM_E | OT_B, AM_NOT_USED, "cmp", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x3B */ { 0, IT_GENERIC, AM_G | OT_V, AM_E | OT_V, AM_NOT_USED, "cmp", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x3C */ { 0, IT_GENERIC, AM_REGISTER | OT_B, AM_I | OT_B, AM_NOT_USED, "cmp", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x3D */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_I | OT_V, AM_NOT_USED, "cmp", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x3E */ { 0, IT_PREFIX, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x3F */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "aas", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, +#ifdef _M_X64 + /* REX Prefixes in 64-bit mode. */ + /* 0x40 */ { 0, IT_PREFIX, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x41 */ { 0, IT_PREFIX, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x42 */ { 0, IT_PREFIX, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x43 */ { 0, IT_PREFIX, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x44 */ { 0, IT_PREFIX, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x45 */ { 0, IT_PREFIX, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x46 */ { 0, IT_PREFIX, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x47 */ { 0, IT_PREFIX, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x48 */ { 0, IT_PREFIX, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x49 */ { 0, IT_PREFIX, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x4A */ { 0, IT_PREFIX, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x4B */ { 0, IT_PREFIX, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x4C */ { 0, IT_PREFIX, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x4D */ { 0, IT_PREFIX, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x4E */ { 0, IT_PREFIX, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x4F */ { 0, IT_PREFIX, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, +#else + /* 0x40 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_NOT_USED, AM_NOT_USED, "inc", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x41 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_NOT_USED, AM_NOT_USED, "inc", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x42 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_NOT_USED, AM_NOT_USED, "inc", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x43 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_NOT_USED, AM_NOT_USED, "inc", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x44 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_NOT_USED, AM_NOT_USED, "inc", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x45 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_NOT_USED, AM_NOT_USED, "inc", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x46 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_NOT_USED, AM_NOT_USED, "inc", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x47 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_NOT_USED, AM_NOT_USED, "inc", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x48 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_NOT_USED, AM_NOT_USED, "dec", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x49 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_NOT_USED, AM_NOT_USED, "dec", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x4A */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_NOT_USED, AM_NOT_USED, "dec", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x4B */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_NOT_USED, AM_NOT_USED, "dec", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x4C */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_NOT_USED, AM_NOT_USED, "dec", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x4D */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_NOT_USED, AM_NOT_USED, "dec", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x4E */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_NOT_USED, AM_NOT_USED, "dec", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x4F */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_NOT_USED, AM_NOT_USED, "dec", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, +#endif + /* 0x50 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_NOT_USED, AM_NOT_USED, "push", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x51 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_NOT_USED, AM_NOT_USED, "push", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x52 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_NOT_USED, AM_NOT_USED, "push", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x53 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_NOT_USED, AM_NOT_USED, "push", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x54 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_NOT_USED, AM_NOT_USED, "push", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x55 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_NOT_USED, AM_NOT_USED, "push", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x56 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_NOT_USED, AM_NOT_USED, "push", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x57 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_NOT_USED, AM_NOT_USED, "push", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x58 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_NOT_USED, AM_NOT_USED, "pop", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x59 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_NOT_USED, AM_NOT_USED, "pop", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x5A */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_NOT_USED, AM_NOT_USED, "pop", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x5B */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_NOT_USED, AM_NOT_USED, "pop", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x5C */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_NOT_USED, AM_NOT_USED, "pop", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x5D */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_NOT_USED, AM_NOT_USED, "pop", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x5E */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_NOT_USED, AM_NOT_USED, "pop", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x5F */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_NOT_USED, AM_NOT_USED, "pop", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x60 */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "pushad", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x61 */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "popad", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x62 */ { 0, IT_GENERIC, AM_G | OT_V, AM_M | OT_A, AM_NOT_USED, "bound", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x63 */ { 0, IT_GENERIC, AM_E | OT_W, AM_G | OT_W, AM_NOT_USED, "arpl", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x64 */ { 0, IT_PREFIX, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x65 */ { 0, IT_PREFIX, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x66 */ { 0, IT_PREFIX_OPERAND, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x67 */ { 0, IT_PREFIX_ADDRESS, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x68 */ { 0, IT_GENERIC, AM_I | OT_V, AM_NOT_USED, AM_NOT_USED, "push", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x69 */ { 0, IT_GENERIC, AM_G | OT_V, AM_E | OT_V, AM_I | OT_V, "imul", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x6A */ { 0, IT_GENERIC, AM_I | OT_B, AM_NOT_USED, AM_NOT_USED, "push", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x6B */ { 0, IT_GENERIC, AM_G | OT_V, AM_E | OT_V, AM_I | OT_B, "imul", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x6C */ { 0, IT_GENERIC, AM_Y | OT_B, AM_REGISTER | OT_B, AM_NOT_USED, "insb", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x6D */ { 0, IT_GENERIC, AM_Y | OT_V, AM_REGISTER | OT_V, AM_NOT_USED, "insd", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x6E */ { 0, IT_GENERIC, AM_REGISTER | OT_B, AM_X | OT_B, AM_NOT_USED, "outsb", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x6F */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_X | OT_V, AM_NOT_USED, "outsb", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x70 */ { 0, IT_JUMP, AM_J | OT_B, AM_NOT_USED, AM_NOT_USED, "jo", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x71 */ { 0, IT_JUMP, AM_J | OT_B, AM_NOT_USED, AM_NOT_USED, "jno", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x72 */ { 0, IT_JUMP, AM_J | OT_B, AM_NOT_USED, AM_NOT_USED, "jc", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x73 */ { 0, IT_JUMP, AM_J | OT_B, AM_NOT_USED, AM_NOT_USED, "jnc", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x74 */ { 0, IT_JUMP, AM_J | OT_B, AM_NOT_USED, AM_NOT_USED, "jz", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x75 */ { 0, IT_JUMP, AM_J | OT_B, AM_NOT_USED, AM_NOT_USED, "jnz", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x76 */ { 0, IT_JUMP, AM_J | OT_B, AM_NOT_USED, AM_NOT_USED, "jbe", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x77 */ { 0, IT_JUMP, AM_J | OT_B, AM_NOT_USED, AM_NOT_USED, "ja", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x78 */ { 0, IT_JUMP, AM_J | OT_B, AM_NOT_USED, AM_NOT_USED, "js", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x79 */ { 0, IT_JUMP, AM_J | OT_B, AM_NOT_USED, AM_NOT_USED, "jns", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x7A */ { 0, IT_JUMP, AM_J | OT_B, AM_NOT_USED, AM_NOT_USED, "jpe", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x7B */ { 0, IT_JUMP, AM_J | OT_B, AM_NOT_USED, AM_NOT_USED, "jpo", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x7C */ { 0, IT_JUMP, AM_J | OT_B, AM_NOT_USED, AM_NOT_USED, "jl", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x7D */ { 0, IT_JUMP, AM_J | OT_B, AM_NOT_USED, AM_NOT_USED, "jge", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x7E */ { 0, IT_JUMP, AM_J | OT_B, AM_NOT_USED, AM_NOT_USED, "jle", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x7F */ { 0, IT_JUMP, AM_J | OT_B, AM_NOT_USED, AM_NOT_USED, "jg", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x80 */ { 2, IT_REFERENCE, AM_E | OT_B, AM_I | OT_B, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x81 */ { 3, IT_REFERENCE, AM_E | OT_V, AM_I | OT_V, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x82 */ { 4, IT_REFERENCE, AM_E | OT_V, AM_I | OT_B, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x83 */ { 5, IT_REFERENCE, AM_E | OT_V, AM_I | OT_B, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x84 */ { 0, IT_GENERIC, AM_E | OT_B, AM_G | OT_B, AM_NOT_USED, "test", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x85 */ { 0, IT_GENERIC, AM_E | OT_V, AM_G | OT_V, AM_NOT_USED, "test", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x86 */ { 0, IT_GENERIC, AM_E | OT_B, AM_G | OT_B, AM_NOT_USED, "xchg", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x87 */ { 0, IT_GENERIC, AM_E | OT_V, AM_G | OT_V, AM_NOT_USED, "xchg", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x88 */ { 0, IT_GENERIC, AM_E | OT_B, AM_G | OT_B, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x89 */ { 0, IT_GENERIC, AM_E | OT_V, AM_G | OT_V, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x8A */ { 0, IT_GENERIC, AM_G | OT_B, AM_E | OT_B, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x8B */ { 0, IT_GENERIC, AM_G | OT_V, AM_E | OT_V, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x8C */ { 0, IT_GENERIC, AM_E | OT_W, AM_S | OT_W, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x8D */ { 0, IT_GENERIC, AM_G | OT_V, AM_M | OT_ADDRESS_MODE_M, AM_NOT_USED, "lea", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x8E */ { 0, IT_GENERIC, AM_S | OT_W, AM_E | OT_W, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x8F */ { 0, IT_GENERIC, AM_E | OT_V, AM_NOT_USED, AM_NOT_USED, "pop", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x90 */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "nop", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x91 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_REGISTER | OT_V, AM_NOT_USED, "xchg", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x92 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_REGISTER | OT_V, AM_NOT_USED, "xchg", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x93 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_REGISTER | OT_V, AM_NOT_USED, "xchg", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x94 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_REGISTER | OT_V, AM_NOT_USED, "xchg", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x95 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_REGISTER | OT_V, AM_NOT_USED, "xchg", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x96 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_REGISTER | OT_V, AM_NOT_USED, "xchg", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x97 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_REGISTER | OT_V, AM_NOT_USED, "xchg", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x98 */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "cwde", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x99 */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "cdq", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x9A */ { 0, IT_JUMP, AM_A | OT_P, AM_NOT_USED, AM_NOT_USED, "callf", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x9B */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "wait", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x9C */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "pushfd", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x9D */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "popfd", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x9E */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "sahf", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x9F */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "lahf", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xA0 */ { 0, IT_GENERIC, AM_REGISTER | OT_B, AM_O | OT_B, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xA1 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_O | OT_V, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xA2 */ { 0, IT_GENERIC, AM_O | OT_B, AM_REGISTER | OT_B, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xA3 */ { 0, IT_GENERIC, AM_O | OT_V, AM_REGISTER | OT_V, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xA4 */ { 0, IT_GENERIC, AM_X | OT_B, AM_Y | OT_B, AM_NOT_USED, "movsb", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xA5 */ { 0, IT_GENERIC, AM_X | OT_V, AM_Y | OT_V, AM_NOT_USED, "movsd", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xA6 */ { 0, IT_GENERIC, AM_X | OT_B, AM_Y | OT_B, AM_NOT_USED, "cmpsb", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xA7 */ { 0, IT_GENERIC, AM_X | OT_V, AM_Y | OT_V, AM_NOT_USED, "cmpsd", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xA8 */ { 0, IT_GENERIC, AM_REGISTER | OT_B, AM_I | OT_B, AM_NOT_USED, "test", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xA9 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_I | OT_V, AM_NOT_USED, "test", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xAA */ { 0, IT_GENERIC, AM_Y | OT_B, AM_REGISTER | OT_B, AM_NOT_USED, "stosb", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xAB */ { 0, IT_GENERIC, AM_Y | OT_V, AM_REGISTER | OT_V, AM_NOT_USED, "stosd", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xAC */ { 0, IT_GENERIC, AM_REGISTER | OT_B, AM_X| OT_B, AM_NOT_USED, "lodsb", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xAD */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_X| OT_V, AM_NOT_USED, "lodsd", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xAE */ { 0, IT_GENERIC, AM_REGISTER | OT_B, AM_Y | OT_B, AM_NOT_USED, "scasb", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xAF */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_Y | OT_V, AM_NOT_USED, "scasd", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xB0 */ { 0, IT_GENERIC, AM_REGISTER | OT_B, AM_I | OT_B, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xB1 */ { 0, IT_GENERIC, AM_REGISTER | OT_B, AM_I | OT_B, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xB2 */ { 0, IT_GENERIC, AM_REGISTER | OT_B, AM_I | OT_B, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xB3 */ { 0, IT_GENERIC, AM_REGISTER | OT_B, AM_I | OT_B, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xB4 */ { 0, IT_GENERIC, AM_REGISTER | OT_B, AM_I | OT_B, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xB5 */ { 0, IT_GENERIC, AM_REGISTER | OT_B, AM_I | OT_B, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xB6 */ { 0, IT_GENERIC, AM_REGISTER | OT_B, AM_I | OT_B, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xB7 */ { 0, IT_GENERIC, AM_REGISTER | OT_B, AM_I | OT_B, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, +#ifdef _M_X64 + /* 0xB8 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_I | OT_V | IOS_64, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xB9 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_I | OT_V | IOS_64, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xBA */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_I | OT_V | IOS_64, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xBB */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_I | OT_V | IOS_64, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xBC */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_I | OT_V | IOS_64, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xBD */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_I | OT_V | IOS_64, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xBE */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_I | OT_V | IOS_64, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xBF */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_I | OT_V | IOS_64, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, +#else + /* 0xB8 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_I | OT_V, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xB9 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_I | OT_V, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xBA */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_I | OT_V, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xBB */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_I | OT_V, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xBC */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_I | OT_V, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xBD */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_I | OT_V, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xBE */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_I | OT_V, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xBF */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_I | OT_V, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, +#endif + /* 0xC0 */ { 6, IT_REFERENCE, AM_E | OT_B, AM_I | OT_B, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xC1 */ { 7, IT_REFERENCE, AM_E | OT_V, AM_I | OT_B, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xC2 */ { 0, IT_RETURN, AM_I | OT_W, AM_NOT_USED, AM_NOT_USED, "ret", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xC3 */ { 0, IT_RETURN, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "ret", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xC4 */ { 0, IT_GENERIC, AM_G | OT_V, AM_M | OT_P, AM_NOT_USED, "les", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xC5 */ { 0, IT_GENERIC, AM_G | OT_V, AM_M | OT_P, AM_NOT_USED, "lds", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xC6 */ { 0, IT_GENERIC, AM_E | OT_B, AM_I | OT_B, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xC7 */ { 0, IT_GENERIC, AM_E | OT_V, AM_I | OT_V, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xC8 */ { 0, IT_GENERIC, AM_I | OT_W, AM_I | OT_B, AM_NOT_USED, "enter", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xC9 */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "leave", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xCA */ { 0, IT_RETURN, AM_I | OT_W, AM_NOT_USED, AM_NOT_USED, "retf", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xCB */ { 0, IT_RETURN, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "retf", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xCC */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "int3", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xCD */ { 0, IT_GENERIC, AM_I | OT_B, AM_NOT_USED, AM_NOT_USED, "int", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xCE */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "into", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xCF */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "iret", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xD0 */ { 8, IT_REFERENCE, AM_E | OT_B, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xD1 */ { 9, IT_REFERENCE, AM_E | OT_V, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xD2 */ { 10, IT_REFERENCE, AM_E | OT_B, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xD3 */ { 11, IT_REFERENCE, AM_E | OT_V, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xD4 */ { 0, IT_GENERIC, AM_I | OT_B, AM_NOT_USED, AM_NOT_USED, "aam", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xD5 */ { 0, IT_GENERIC, AM_I | OT_B, AM_NOT_USED, AM_NOT_USED, "aad", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xD6 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xD7 */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "xlat", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + + // The following 8 lines would be references to the FPU tables, but we currently + // do not support the FPU instructions in this disassembler. + + /* 0xD8 */ { 0, IT_UNKNOWN, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xD9 */ { 0, IT_UNKNOWN, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xDA */ { 0, IT_UNKNOWN, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xDB */ { 0, IT_UNKNOWN, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xDC */ { 0, IT_UNKNOWN, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xDD */ { 0, IT_UNKNOWN, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xDE */ { 0, IT_UNKNOWN, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xDF */ { 0, IT_UNKNOWN, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + + + /* 0xE0 */ { 0, IT_JUMP, AM_J | OT_B, AM_NOT_USED, AM_NOT_USED, "loopnz", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xE1 */ { 0, IT_JUMP, AM_J | OT_B, AM_NOT_USED, AM_NOT_USED, "loopz", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xE2 */ { 0, IT_JUMP, AM_J | OT_B, AM_NOT_USED, AM_NOT_USED, "loop", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xE3 */ { 0, IT_JUMP, AM_J | OT_B, AM_NOT_USED, AM_NOT_USED, "jcxz", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xE4 */ { 0, IT_GENERIC, AM_REGISTER | OT_B, AM_I | OT_B, AM_NOT_USED, "in", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xE5 */ { 0, IT_GENERIC, AM_REGISTER | OT_B, AM_I | OT_B, AM_NOT_USED, "in", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xE6 */ { 0, IT_GENERIC, AM_I | OT_B, AM_REGISTER | OT_B, AM_NOT_USED, "out", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xE7 */ { 0, IT_GENERIC, AM_I | OT_B, AM_REGISTER | OT_B, AM_NOT_USED, "out", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xE8 */ { 0, IT_JUMP, AM_J | OT_V, AM_NOT_USED, AM_NOT_USED, "call", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xE9 */ { 0, IT_JUMP, AM_J | OT_V, AM_NOT_USED, AM_NOT_USED, "jmp", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xEA */ { 0, IT_JUMP, AM_A | OT_P, AM_NOT_USED, AM_NOT_USED, "jmp", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xEB */ { 0, IT_JUMP, AM_J | OT_B, AM_NOT_USED, AM_NOT_USED, "jmp", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xEC */ { 0, IT_GENERIC, AM_REGISTER | OT_B, AM_REGISTER | OT_W, AM_NOT_USED, "in", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xED */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_REGISTER | OT_W, AM_NOT_USED, "in", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xEE */ { 0, IT_GENERIC, AM_REGISTER | OT_W, AM_REGISTER | OT_B, AM_NOT_USED, "out", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xEF */ { 0, IT_GENERIC, AM_REGISTER | OT_W, AM_REGISTER | OT_V, AM_NOT_USED, "out", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xF0 */ { 0, IT_PREFIX, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "lock:", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xF1 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xF2 */ { 0, IT_PREFIX, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "repne:", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xF3 */ { 0, IT_PREFIX, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "rep:", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xF4 */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "hlt", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xF5 */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "cmc", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xF6 */ { 12, IT_REFERENCE, AM_E | OT_B, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xF7 */ { 13, IT_REFERENCE, AM_E | OT_V, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xF8 */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "clc", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xF9 */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "stc", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xFA */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "cli", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xFB */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "sti", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xFC */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "cld", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xFD */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "std", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xFE */ { 14, IT_REFERENCE, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xFF */ { 15, IT_REFERENCE, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } } +}; + +const Opcode s_opcode_byte_after_0f[] = { + /* 0x0 */ { 16, IT_REFERENCE, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x1 */ { 17, IT_REFERENCE, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x2 */ { 0, IT_GENERIC, AM_G | OT_V, AM_E | OT_W, AM_NOT_USED, "lar", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x3 */ { 0, IT_GENERIC, AM_G | OT_V, AM_E | OT_W, AM_NOT_USED, "lsl", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x4 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x5 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x6 */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "clts", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x7 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x8 */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "invd", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x9 */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "wbinvd", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xA */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xB */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "ud2", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xC */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xD */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xE */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xF */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x10 */ { 0, IT_GENERIC, AM_V | OT_PS, AM_W | OT_PS, AM_NOT_USED, "movups", true, + /* F2h */ { 0, IT_GENERIC, AM_V | OT_SD, AM_W | OT_SD, AM_NOT_USED, "movsd" }, + /* F3h */ { 0, IT_GENERIC, AM_V | OT_SS, AM_W | OT_SS, AM_NOT_USED, "movss" }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_PD, AM_W | OT_PD, AM_NOT_USED, "movupd" } }, + /* 0x11 */ { 0, IT_GENERIC, AM_W | OT_PS, AM_V | OT_PS, AM_NOT_USED, "movups", true, + /* F2h */ { 0, IT_GENERIC, AM_W | OT_SD, AM_V | OT_SD, AM_NOT_USED, "movsd" }, + /* F3h */ { 0, IT_GENERIC, AM_W | OT_SS, AM_V | OT_SS, AM_NOT_USED, "movss" }, + /* 66h */ { 0, IT_GENERIC, AM_W | OT_PD, AM_V | OT_PD, AM_NOT_USED, "movupd" } }, + /* 0x12 */ { 0, IT_GENERIC, AM_W | OT_Q, AM_V | OT_Q, AM_NOT_USED, "movlps", true, + /* F2h */ { 0, IT_GENERIC, AM_V | OT_Q, AM_V | OT_Q, AM_NOT_USED, "movhlps" }, // only one of ... + /* F3h */ { 0, IT_GENERIC, AM_V | OT_Q, AM_V | OT_Q, AM_NOT_USED, "movhlps" }, // ...these two is correct, Intel doesn't specify which + /* 66h */ { 0, IT_GENERIC, AM_V | OT_Q, AM_W | OT_S, AM_NOT_USED, "movlpd" } }, + /* 0x13 */ { 0, IT_GENERIC, AM_V | OT_Q, AM_W | OT_Q, AM_NOT_USED, "movlps", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_Q, AM_W | OT_Q, AM_NOT_USED, "movlpd" } }, + /* 0x14 */ { 0, IT_GENERIC, AM_V | OT_PS, AM_W | OT_Q, AM_NOT_USED, "unpcklps", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_PD, AM_W | OT_Q, AM_NOT_USED, "unpcklpd" } }, + /* 0x15 */ { 0, IT_GENERIC, AM_V | OT_PS, AM_W | OT_Q, AM_NOT_USED, "unpckhps", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_PD, AM_W | OT_Q, AM_NOT_USED, "unpckhpd" } }, + /* 0x16 */ { 0, IT_GENERIC, AM_V | OT_Q, AM_W | OT_Q, AM_NOT_USED, "movhps", true, + /* F2h */ { 0, IT_GENERIC, AM_V | OT_Q, AM_V | OT_Q, AM_NOT_USED, "movlhps" }, // only one of... + /* F3h */ { 0, IT_GENERIC, AM_V | OT_Q, AM_V | OT_Q, AM_NOT_USED, "movlhps" }, // ...these two is correct, Intel doesn't specify which + /* 66h */ { 0, IT_GENERIC, AM_V | OT_Q, AM_W | OT_Q, AM_NOT_USED, "movhpd" } }, + /* 0x17 */ { 0, IT_GENERIC, AM_W | OT_Q, AM_V | OT_Q, AM_NOT_USED, "movhps", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_W | OT_Q, AM_V | OT_Q, AM_NOT_USED, "movhpd" } }, + /* 0x18 */ { 18, IT_REFERENCE, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x19 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x1A */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x1B */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x1C */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x1D */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x1E */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x1F */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x20 */ { 0, IT_GENERIC, AM_R | OT_D, AM_C | OT_D, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x21 */ { 0, IT_GENERIC, AM_R | OT_D, AM_D | OT_D, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x22 */ { 0, IT_GENERIC, AM_C | OT_D, AM_R | OT_D, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x23 */ { 0, IT_GENERIC, AM_D | OT_D, AM_R | OT_D, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x24 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x25 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x26 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x27 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x28 */ { 0, IT_GENERIC, AM_V | OT_PS, AM_W | OT_PS, AM_NOT_USED, "movaps", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_PD, AM_W | OT_PD, AM_NOT_USED, "movapd" } }, + /* 0x29 */ { 0, IT_GENERIC, AM_W | OT_PS, AM_V | OT_PS, AM_NOT_USED, "movaps", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_W | OT_PD, AM_V | OT_PD, AM_NOT_USED, "movapd" } }, + /* 0x2A */ { 0, IT_GENERIC, AM_V | OT_PS, AM_Q | OT_Q, AM_NOT_USED, "cvtpi2ps", true, + /* F2h */ { 0, IT_GENERIC, AM_V | OT_SD, AM_E | OT_D, AM_NOT_USED, "cvtsi2sd" }, + /* F3h */ { 0, IT_GENERIC, AM_V | OT_SS, AM_E | OT_D, AM_NOT_USED, "cvtsi2ss" }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_PD, AM_Q | OT_DQ, AM_NOT_USED, "cvtpi2pd" } }, + /* 0x2B */ { 0, IT_GENERIC, AM_W | OT_PS, AM_V | OT_PS, AM_NOT_USED, "movntps", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_W | OT_PD, AM_V | OT_PD, AM_NOT_USED, "movntpd" } }, + /* 0x2C */ { 0, IT_GENERIC, AM_Q | OT_Q, AM_W | OT_PS, AM_NOT_USED, "cvttps2pi", true, + /* F2h */ { 0, IT_GENERIC, AM_G | OT_D, AM_W | OT_SD, AM_NOT_USED, "cvttsd2si" }, + /* F3h */ { 0, IT_GENERIC, AM_G | OT_D, AM_W | OT_SS, AM_NOT_USED, "cvttss2si" }, + /* 66h */ { 0, IT_GENERIC, AM_Q | OT_DQ, AM_W | OT_PD, AM_NOT_USED, "cvttpd2pi" } }, + /* 0x2D */ { 0, IT_GENERIC, AM_Q | OT_Q, AM_W | OT_PS, AM_NOT_USED, "cvtps2pi", true, + /* F2h */ { 0, IT_GENERIC, AM_G | OT_D, AM_W | OT_SD, AM_NOT_USED, "cvtsd2si" }, + /* F3h */ { 0, IT_GENERIC, AM_G | OT_D, AM_W | OT_SS, AM_NOT_USED, "cvtss2si" }, + /* 66h */ { 0, IT_GENERIC, AM_Q | OT_DQ, AM_W | OT_PD, AM_NOT_USED, "cvtpd2pi" } }, + /* 0x2E */ { 0, IT_GENERIC, AM_V | OT_SS, AM_W | OT_SS, AM_NOT_USED, "ucomiss", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_SD, AM_W | OT_SD, AM_NOT_USED, "ucomisd" } }, + /* 0x2F */ { 0, IT_GENERIC, AM_V | OT_PS, AM_W | OT_SS, AM_NOT_USED, "comiss", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_SD, AM_W | OT_SD, AM_NOT_USED, "comisd" } }, + /* 0x30 */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "wrmsr", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x31 */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "rdtsc", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x32 */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "rdmsr", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x33 */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "rdpmc", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x34 */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "sysenter", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x35 */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "sysexit", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x36 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x37 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x38 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x39 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x3A */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x3B */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x3C */ { 0, IT_GENERIC, AM_G | OT_V, AM_E | OT_V, AM_NOT_USED, "movnti", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x3D */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x3E */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x3F */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x40 */ { 0, IT_GENERIC, AM_G | OT_V, AM_E | OT_V, AM_NOT_USED, "cmovo", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x41 */ { 0, IT_GENERIC, AM_G | OT_V, AM_E | OT_V, AM_NOT_USED, "cmovno", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x42 */ { 0, IT_GENERIC, AM_G | OT_V, AM_E | OT_V, AM_NOT_USED, "cmovc", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x43 */ { 0, IT_GENERIC, AM_G | OT_V, AM_E | OT_V, AM_NOT_USED, "cmovnc", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x44 */ { 0, IT_GENERIC, AM_G | OT_V, AM_E | OT_V, AM_NOT_USED, "cmovz", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x45 */ { 0, IT_GENERIC, AM_G | OT_V, AM_E | OT_V, AM_NOT_USED, "cmovnz", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x46 */ { 0, IT_GENERIC, AM_G | OT_V, AM_E | OT_V, AM_NOT_USED, "cmovbe", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x47 */ { 0, IT_GENERIC, AM_G | OT_V, AM_E | OT_V, AM_NOT_USED, "cmova", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x48 */ { 0, IT_GENERIC, AM_G | OT_V, AM_E | OT_V, AM_NOT_USED, "cmovs", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x49 */ { 0, IT_GENERIC, AM_G | OT_V, AM_E | OT_V, AM_NOT_USED, "cmovns", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x4A */ { 0, IT_GENERIC, AM_G | OT_V, AM_E | OT_V, AM_NOT_USED, "cmovpe", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x4B */ { 0, IT_GENERIC, AM_G | OT_V, AM_E | OT_V, AM_NOT_USED, "cmovpo", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x4C */ { 0, IT_GENERIC, AM_G | OT_V, AM_E | OT_V, AM_NOT_USED, "cmovl", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x4D */ { 0, IT_GENERIC, AM_G | OT_V, AM_E | OT_V, AM_NOT_USED, "cmovge", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x4E */ { 0, IT_GENERIC, AM_G | OT_V, AM_E | OT_V, AM_NOT_USED, "cmovle", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x4F */ { 0, IT_GENERIC, AM_G | OT_V, AM_E | OT_V, AM_NOT_USED, "cmovg", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x50 */ { 0, IT_GENERIC, AM_E | OT_D, AM_V | OT_PS, AM_NOT_USED, "movmskps", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_E | OT_D, AM_V | OT_PD, AM_NOT_USED, "movmskpd" } }, + /* 0x51 */ { 0, IT_GENERIC, AM_V | OT_PS, AM_W | OT_PS, AM_NOT_USED, "sqrtps", true, + /* F2h */ { 0, IT_GENERIC, AM_V | OT_SD, AM_W | OT_SD, AM_NOT_USED, "sqrtsd" }, + /* F3h */ { 0, IT_GENERIC, AM_V | OT_SS, AM_W | OT_SS, AM_NOT_USED, "sqrtss" }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_PD, AM_W | OT_PD, AM_NOT_USED, "sqrtpd" } }, + /* 0x52 */ { 0, IT_GENERIC, AM_V | OT_PS, AM_W | OT_PS, AM_NOT_USED, "rsqrtps", true, + /* F2h */ { 0 }, + /* F3h */ { 0, IT_GENERIC, AM_V | OT_SS, AM_W | OT_SS, AM_NOT_USED, "rsqrtss" }, + /* 66h */ { 0 } }, + /* 0x53 */ { 0, IT_GENERIC, AM_V | OT_PS, AM_W | OT_PS, AM_NOT_USED, "rcpps", true, + /* F2h */ { 0 }, + /* F3h */ { 0, IT_GENERIC, AM_V | OT_SS, AM_W | OT_SS, AM_NOT_USED, "rcpss" }, + /* 66h */ { 0 } }, + /* 0x54 */ { 0, IT_GENERIC, AM_V | OT_PS, AM_W | OT_PS, AM_NOT_USED, "andps", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_PD, AM_W | OT_PD, AM_NOT_USED, "andpd" } }, + /* 0x55 */ { 0, IT_GENERIC, AM_V | OT_PS, AM_W | OT_PS, AM_NOT_USED, "andnps", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_PD, AM_W | OT_PD, AM_NOT_USED, "andnpd" } }, + /* 0x56 */ { 0, IT_GENERIC, AM_V | OT_PS, AM_W | OT_PS, AM_NOT_USED, "orps", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_PD, AM_W | OT_PD, AM_NOT_USED, "orpd" } }, + /* 0x57 */ { 0, IT_GENERIC, AM_V | OT_PS, AM_W | OT_PS, AM_NOT_USED, "xorps", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_PD, AM_W | OT_PD, AM_NOT_USED, "xorpd" } }, + /* 0x58 */ { 0, IT_GENERIC, AM_V | OT_PS, AM_W | OT_PS, AM_NOT_USED, "addps", true, + /* F2h */ { 0, IT_GENERIC, AM_V | OT_SD, AM_W | OT_SD, AM_NOT_USED, "addsd" }, + /* F3h */ { 0, IT_GENERIC, AM_V | OT_SS, AM_W | OT_SS, AM_NOT_USED, "addss" }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_PD, AM_W | OT_PD, AM_NOT_USED, "addpd" } }, + /* 0x59 */ { 0, IT_GENERIC, AM_V | OT_PS, AM_W | OT_PS, AM_NOT_USED, "mulps", true, + /* F2h */ { 0, IT_GENERIC, AM_V | OT_SD, AM_W | OT_SD, AM_NOT_USED, "mulsd" }, + /* F3h */ { 0, IT_GENERIC, AM_V | OT_SS, AM_W | OT_SS, AM_NOT_USED, "mulss" }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_PD, AM_W | OT_PD, AM_NOT_USED, "mulpd" } }, + /* 0x5A */ { 0, IT_GENERIC, AM_V | OT_PD, AM_W | OT_PS, AM_NOT_USED, "cvtps2pd", true, + /* F2h */ { 0, IT_GENERIC, AM_V | OT_SD, AM_W | OT_SD, AM_NOT_USED, "cvtsd2ss" }, + /* F3h */ { 0, IT_GENERIC, AM_V | OT_SS, AM_W | OT_SS, AM_NOT_USED, "cvtss2sd" }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_PS, AM_W | OT_PD, AM_NOT_USED, "cvtpd2ps" } }, + /* 0x5B */ { 0, IT_GENERIC, AM_V | OT_PS, AM_W | OT_DQ, AM_NOT_USED, "cvtdq2ps", true, + /* F2h */ { 0 }, + /* F3h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_PS, AM_NOT_USED, "cvttps2dq" }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_PS, AM_NOT_USED, "cvtps2dq" } }, + /* 0x5C */ { 0, IT_GENERIC, AM_V | OT_PS, AM_W | OT_PS, AM_NOT_USED, "subps", true, + /* F2h */ { 0, IT_GENERIC, AM_V | OT_SD, AM_W | OT_SD, AM_NOT_USED, "subsd" }, + /* F3h */ { 0, IT_GENERIC, AM_V | OT_SS, AM_W | OT_SS, AM_NOT_USED, "subss" }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_PD, AM_W | OT_PD, AM_NOT_USED, "subpd" } }, + /* 0x5D */ { 0, IT_GENERIC, AM_V | OT_PS, AM_W | OT_PS, AM_NOT_USED, "minps", true, + /* F2h */ { 0, IT_GENERIC, AM_V | OT_SD, AM_W | OT_SD, AM_NOT_USED, "minsd" }, + /* F3h */ { 0, IT_GENERIC, AM_V | OT_SS, AM_W | OT_SS, AM_NOT_USED, "minss" }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_PD, AM_W | OT_PD, AM_NOT_USED, "minpd" } }, + /* 0x5E */ { 0, IT_GENERIC, AM_V | OT_PS, AM_W | OT_PS, AM_NOT_USED, "divps", true, + /* F2h */ { 0, IT_GENERIC, AM_V | OT_SD, AM_W | OT_SD, AM_NOT_USED, "divsd" }, + /* F3h */ { 0, IT_GENERIC, AM_V | OT_SS, AM_W | OT_SS, AM_NOT_USED, "divss" }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_PD, AM_W | OT_PD, AM_NOT_USED, "divpd" } }, + /* 0x5F */ { 0, IT_GENERIC, AM_V | OT_PS, AM_W | OT_PS, AM_NOT_USED, "maxps", true, + /* F2h */ { 0, IT_GENERIC, AM_V | OT_SD, AM_W | OT_SD, AM_NOT_USED, "maxsd" }, + /* F3h */ { 0, IT_GENERIC, AM_V | OT_SS, AM_W | OT_SS, AM_NOT_USED, "maxss" }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_PD, AM_W | OT_PD, AM_NOT_USED, "maxpd" } }, + /* 0x60 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_D, AM_NOT_USED, "punpcklbw", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "punpcklbw" } }, + /* 0x61 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_D, AM_NOT_USED, "punpcklwd", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "punpcklwd" } }, + /* 0x62 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_D, AM_NOT_USED, "punpckldq", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "punpckldq" } }, + /* 0x63 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_D, AM_NOT_USED, "packsswb", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "packsswb" } }, + /* 0x64 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_D, AM_NOT_USED, "pcmpgtb", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "pcmpgtb" } }, + /* 0x65 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_D, AM_NOT_USED, "pcmpgtw", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "pcmpgtw" } }, + /* 0x66 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_D, AM_NOT_USED, "pcmpgtd", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "pcmpgtd" } }, + /* 0x67 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_D, AM_NOT_USED, "packuswb", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "packuswb" } }, + /* 0x68 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_D, AM_NOT_USED, "punpckhbw", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_P | OT_DQ, AM_Q | OT_DQ, AM_NOT_USED, "punpckhbw" } }, + /* 0x69 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_D, AM_NOT_USED, "punpckhwd", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_P | OT_DQ, AM_Q | OT_DQ, AM_NOT_USED, "punpckhwd" } }, + /* 0x6A */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_D, AM_NOT_USED, "punpckhdq", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_P | OT_DQ, AM_Q | OT_DQ, AM_NOT_USED, "punpckhdq" } }, + /* 0x6B */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_D, AM_NOT_USED, "packssdw", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_P | OT_DQ, AM_Q | OT_DQ, AM_NOT_USED, "packssdw" } }, + /* 0x6C */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "not used without prefix", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "punpcklqdq" } }, + /* 0x6D */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "not used without prefix", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "punpcklqdq" } }, + /* 0x6E */ { 0, IT_GENERIC, AM_P | OT_D, AM_E | OT_D, AM_NOT_USED, "movd", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_E | OT_D, AM_NOT_USED, "movd" } }, + /* 0x6F */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_D, AM_NOT_USED, "movq", true, + /* F2h */ { 0 }, + /* F3h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "movdqu" }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "movdqa" } }, + /* 0x70 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_I | OT_B, "pshuf", true, + /* F2h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_I | OT_B, "pshuflw" }, + /* F3h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_I | OT_B, "pshufhw" }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_I | OT_B, "pshufd" } }, + /* 0x71 */ { 19, IT_REFERENCE, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x72 */ { 20, IT_REFERENCE, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x73 */ { 21, IT_REFERENCE, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x74 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "pcmpeqb", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "pcmpeqb" } }, + /* 0x75 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "pcmpeqw", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "pcmpeqw" } }, + /* 0x76 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "pcmpeqd", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "pcmpeqd" } }, + /* 0x77 */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "emms", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + + // The following six opcodes are escapes into the MMX stuff, which this disassembler does not support. + /* 0x78 */ { 0, IT_UNKNOWN, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x79 */ { 0, IT_UNKNOWN, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x7A */ { 0, IT_UNKNOWN, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x7B */ { 0, IT_UNKNOWN, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x7C */ { 0, IT_UNKNOWN, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x7D */ { 0, IT_UNKNOWN, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + + /* 0x7E */ { 0, IT_GENERIC, AM_E | OT_D, AM_P | OT_D, AM_NOT_USED, "movd", true, + /* F2h */ { 0 }, + /* F3h */ { 0, IT_GENERIC, AM_V | OT_Q, AM_W | OT_Q, AM_NOT_USED, "movq" }, + /* 66h */ { 0, IT_GENERIC, AM_E | OT_D, AM_V | OT_DQ, AM_NOT_USED, "movd" } }, + /* 0x7F */ { 0, IT_GENERIC, AM_Q | OT_Q, AM_P | OT_Q, AM_NOT_USED, "movq", true, + /* F2h */ { 0 }, + /* F3h */ { 0, IT_GENERIC, AM_W | OT_DQ, AM_V | OT_DQ, AM_NOT_USED, "movdqu" }, + /* 66h */ { 0, IT_GENERIC, AM_W | OT_DQ, AM_V | OT_DQ, AM_NOT_USED, "movdqa" } }, + /* 0x80 */ { 0, IT_JUMP, AM_J | OT_V, AM_NOT_USED, AM_NOT_USED, "jo", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x81 */ { 0, IT_JUMP, AM_J | OT_V, AM_NOT_USED, AM_NOT_USED, "jno", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x82 */ { 0, IT_JUMP, AM_J | OT_V, AM_NOT_USED, AM_NOT_USED, "jc", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x83 */ { 0, IT_JUMP, AM_J | OT_V, AM_NOT_USED, AM_NOT_USED, "jnc", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x84 */ { 0, IT_JUMP, AM_J | OT_V, AM_NOT_USED, AM_NOT_USED, "jz", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x85 */ { 0, IT_JUMP, AM_J | OT_V, AM_NOT_USED, AM_NOT_USED, "jnz", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x86 */ { 0, IT_JUMP, AM_J | OT_V, AM_NOT_USED, AM_NOT_USED, "jbe", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x87 */ { 0, IT_JUMP, AM_J | OT_V, AM_NOT_USED, AM_NOT_USED, "ja", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x88 */ { 0, IT_JUMP, AM_J | OT_V, AM_NOT_USED, AM_NOT_USED, "js", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x89 */ { 0, IT_JUMP, AM_J | OT_V, AM_NOT_USED, AM_NOT_USED, "jns", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x8A */ { 0, IT_JUMP, AM_J | OT_V, AM_NOT_USED, AM_NOT_USED, "jpe", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x8B */ { 0, IT_JUMP, AM_J | OT_V, AM_NOT_USED, AM_NOT_USED, "jpo", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x8C */ { 0, IT_JUMP, AM_J | OT_V, AM_NOT_USED, AM_NOT_USED, "jl", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x8D */ { 0, IT_JUMP, AM_J | OT_V, AM_NOT_USED, AM_NOT_USED, "jge", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x8E */ { 0, IT_JUMP, AM_J | OT_V, AM_NOT_USED, AM_NOT_USED, "jle", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x8F */ { 0, IT_JUMP, AM_J | OT_V, AM_NOT_USED, AM_NOT_USED, "jg", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x90 */ { 0, IT_GENERIC, AM_E | OT_B, AM_NOT_USED, AM_NOT_USED, "seto", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x91 */ { 0, IT_GENERIC, AM_E | OT_B, AM_NOT_USED, AM_NOT_USED, "setno", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x92 */ { 0, IT_GENERIC, AM_E | OT_B, AM_NOT_USED, AM_NOT_USED, "setc", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x93 */ { 0, IT_GENERIC, AM_E | OT_B, AM_NOT_USED, AM_NOT_USED, "setnc", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x94 */ { 0, IT_GENERIC, AM_E | OT_B, AM_NOT_USED, AM_NOT_USED, "setz", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x95 */ { 0, IT_GENERIC, AM_E | OT_B, AM_NOT_USED, AM_NOT_USED, "setnz", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x96 */ { 0, IT_GENERIC, AM_E | OT_B, AM_NOT_USED, AM_NOT_USED, "setbe", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x97 */ { 0, IT_GENERIC, AM_E | OT_B, AM_NOT_USED, AM_NOT_USED, "seta", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x98 */ { 0, IT_GENERIC, AM_E | OT_B, AM_NOT_USED, AM_NOT_USED, "sets", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x99 */ { 0, IT_GENERIC, AM_E | OT_B, AM_NOT_USED, AM_NOT_USED, "setns", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x9A */ { 0, IT_GENERIC, AM_E | OT_B, AM_NOT_USED, AM_NOT_USED, "setpe", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x9B */ { 0, IT_GENERIC, AM_E | OT_B, AM_NOT_USED, AM_NOT_USED, "setpo", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x9C */ { 0, IT_GENERIC, AM_E | OT_B, AM_NOT_USED, AM_NOT_USED, "setl", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x9D */ { 0, IT_GENERIC, AM_E | OT_B, AM_NOT_USED, AM_NOT_USED, "setge", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x9E */ { 0, IT_GENERIC, AM_E | OT_B, AM_NOT_USED, AM_NOT_USED, "setle", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x9F */ { 0, IT_GENERIC, AM_E | OT_B, AM_NOT_USED, AM_NOT_USED, "setg", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xA0 */ { 0, IT_GENERIC, AM_REGISTER | OT_W, AM_NOT_USED, AM_NOT_USED, "push", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xA1 */ { 0, IT_GENERIC, AM_REGISTER | OT_W, AM_NOT_USED, AM_NOT_USED, "pop", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xA2 */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "cpuid", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xA3 */ { 0, IT_GENERIC, AM_E | OT_V, AM_G | OT_V, AM_NOT_USED, "bt", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xA4 */ { 0, IT_GENERIC, AM_E | OT_V, AM_G | OT_V, AM_I | OT_B, "shld", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xA5 */ { 0, IT_GENERIC, AM_E | OT_V, AM_G | OT_V, AM_I | OT_B | AM_REGISTER, "shld", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xA6 */ { 0, IT_UNKNOWN, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xA7 */ { 0, IT_UNKNOWN, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xA8 */ { 0, IT_GENERIC, AM_REGISTER | OT_W, AM_NOT_USED, AM_NOT_USED, "push", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xA9 */ { 0, IT_GENERIC, AM_REGISTER | OT_W, AM_NOT_USED, AM_NOT_USED, "pop", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xAA */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "rsm", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xAB */ { 0, IT_GENERIC, AM_E | OT_V, AM_G | OT_V, AM_NOT_USED, "bts", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xAC */ { 0, IT_GENERIC, AM_E | OT_V, AM_G | OT_V, AM_I | OT_B, "shrd", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xAD */ { 0, IT_GENERIC, AM_E | OT_V, AM_G | OT_V, AM_I | OT_B | AM_REGISTER, "shrd", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xAE */ { 22, IT_REFERENCE, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xAF */ { 0, IT_GENERIC, AM_G | OT_V, AM_E | OT_V, AM_NOT_USED, "imul", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xB0 */ { 0, IT_GENERIC, AM_E | OT_B, AM_G | OT_B, AM_NOT_USED, "cmpxchg", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xB1 */ { 0, IT_GENERIC, AM_E | OT_V, AM_G | OT_V, AM_NOT_USED, "cmpxchg", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xB2 */ { 0, IT_GENERIC, AM_M | OT_P, AM_NOT_USED, AM_NOT_USED, "lss", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xB3 */ { 0, IT_GENERIC, AM_E | OT_V, AM_G | OT_V, AM_NOT_USED, "btr", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xB4 */ { 0, IT_GENERIC, AM_M | OT_P, AM_NOT_USED, AM_NOT_USED, "lfs", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xB5 */ { 0, IT_GENERIC, AM_M | OT_P, AM_NOT_USED, AM_NOT_USED, "lgs", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xB6 */ { 0, IT_GENERIC, AM_G | OT_V, AM_E | OT_B, AM_NOT_USED, "movzx", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xB7 */ { 0, IT_GENERIC, AM_G | OT_V, AM_E | OT_W, AM_NOT_USED, "movzx", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xB8 */ { 0, IT_UNKNOWN, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xB9 */ { 0, IT_UNKNOWN, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "ud1", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xBA */ { 23, IT_REFERENCE, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xBB */ { 0, IT_GENERIC, AM_E | OT_V, AM_G | OT_V, AM_NOT_USED, "btc", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xBC */ { 0, IT_GENERIC, AM_G | OT_V, AM_E | OT_V, AM_NOT_USED, "bsf", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xBD */ { 0, IT_GENERIC, AM_G | OT_V, AM_E | OT_V, AM_NOT_USED, "bsr", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xBE */ { 0, IT_GENERIC, AM_G | OT_V, AM_E | OT_B, AM_NOT_USED, "movsx", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xBF */ { 0, IT_GENERIC, AM_G | OT_V, AM_E | OT_W, AM_NOT_USED, "movsx", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xC0 */ { 0, IT_GENERIC, AM_E | OT_B, AM_G | OT_B, AM_NOT_USED, "xadd", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xC1 */ { 0, IT_GENERIC, AM_E | OT_V, AM_NOT_USED, AM_NOT_USED, "xadd", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xC2 */ { 0, IT_GENERIC, AM_V | OT_PS, AM_W | OT_PS, AM_I | OT_B, "cmpps", true, + /* F2h */ { 0, IT_GENERIC, AM_V | OT_SD, AM_W | OT_SD, AM_I | OT_B, "cmpsd" }, + /* F3h */ { 0, IT_GENERIC, AM_V | OT_SS, AM_W | OT_SS, AM_I | OT_B, "cmpss" }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_PD, AM_W | OT_PD, AM_I | OT_B, "cmppd" } }, + /* 0xC3 */ { 0, IT_GENERIC, AM_E | OT_D, AM_G | OT_D, AM_NOT_USED, "movnti", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xC4 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_E | OT_D, AM_I | OT_B, "pinsrw", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_E | OT_D, AM_I | OT_B, "pinsrw" } }, + /* 0xC5 */ { 0, IT_GENERIC, AM_G | OT_D, AM_P | OT_Q, AM_I | OT_B, "pextrw", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_G | OT_D, AM_V | OT_DQ, AM_I | OT_B, "pextrw" } }, + /* 0xC6 */ { 0, IT_GENERIC, AM_V | OT_PS, AM_W | OT_PS, AM_I | OT_B, "shufps", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_PD, AM_W | OT_PD, AM_I | OT_B, "shufpd" } }, + /* 0xC7 */ { 24, IT_REFERENCE, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xC8 */ { 0, IT_GENERIC, AM_REGISTER | OT_D, AM_NOT_USED, AM_NOT_USED, "bswap", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xC9 */ { 0, IT_GENERIC, AM_REGISTER | OT_D, AM_NOT_USED, AM_NOT_USED, "bswap", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xCA */ { 0, IT_GENERIC, AM_REGISTER | OT_D, AM_NOT_USED, AM_NOT_USED, "bswap", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xCB */ { 0, IT_GENERIC, AM_REGISTER | OT_D, AM_NOT_USED, AM_NOT_USED, "bswap", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xCC */ { 0, IT_GENERIC, AM_REGISTER | OT_D, AM_NOT_USED, AM_NOT_USED, "bswap", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xCD */ { 0, IT_GENERIC, AM_REGISTER | OT_D, AM_NOT_USED, AM_NOT_USED, "bswap", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xCE */ { 0, IT_GENERIC, AM_REGISTER | OT_D, AM_NOT_USED, AM_NOT_USED, "bswap", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xCF */ { 0, IT_GENERIC, AM_REGISTER | OT_D, AM_NOT_USED, AM_NOT_USED, "bswap", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xD0 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xD1 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "psrlw", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "psrlw" } }, + /* 0xD2 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "psrld", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "psrld" } }, + /* 0xD3 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "psrlq", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "psrlq" } }, + /* 0xD4 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "paddq", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "paddq" } }, + /* 0xD5 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "pmullw", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "pmullw" } }, + /* 0xD6 */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "unused without prefix", true, + /* F2h */ { 0, IT_GENERIC, AM_P | OT_Q, AM_W | OT_Q, AM_NOT_USED, "movdq2q" }, + /* F3h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_Q | OT_Q, AM_NOT_USED, "movq2dq" }, + /* 66h */ { 0, IT_GENERIC, AM_W | OT_Q, AM_V | OT_Q, AM_NOT_USED, "movq" } }, + /* 0xD7 */ { 0, IT_GENERIC, AM_G | OT_D, AM_P | OT_Q, AM_NOT_USED, "pmovmskb", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_G | OT_D, AM_V | OT_DQ, AM_NOT_USED, "pmovmskb" } }, + /* 0xD8 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "psubusb", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "psubusb" } }, + /* 0xD9 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "psubusw", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "psubusw" } }, + /* 0xDA */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "pminub", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "pminub" } }, + /* 0xDB */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "pand", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "pand" } }, + /* 0xDC */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "paddusb", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "paddusb" } }, + /* 0xDD */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "paddusw", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "paddusw" } }, + /* 0xDE */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "pmaxub", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "pmaxub" } }, + /* 0xDF */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "pandn", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "pandn" } }, + /* 0xE0 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "pavgb", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "pavgb" } }, + /* 0xE1 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "psraw", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "psrqw" } }, + /* 0xE2 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "psrad", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "psrad" } }, + /* 0xE3 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "pavgw", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "pavgw" } }, + /* 0xE4 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "pmulhuw", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "pmulhuw" } }, + /* 0xE5 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "pmulhuw", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "pmulhw" } }, + /* 0xE6 */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "not used without prefix", true, + /* F2h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_PD, AM_NOT_USED, "cvtpd2dq" }, + /* F3h */ { 0, IT_GENERIC, AM_V | OT_PD, AM_W | OT_DQ, AM_NOT_USED, "cvtdq2pd" }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_PD, AM_NOT_USED, "cvttpd2dq" } }, + /* 0xE7 */ { 0, IT_GENERIC, AM_W | OT_Q, AM_V | OT_Q, AM_NOT_USED, "movntq", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_W | OT_DQ, AM_V | OT_DQ, AM_NOT_USED, "movntdq" } }, + /* 0xE8 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "psubsb", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "psubsb" } }, + /* 0xE9 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "psubsw", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "psubsw" } }, + /* 0xEA */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "pminsw", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "pminsw" } }, + /* 0xEB */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "por", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "por" } }, + /* 0xEC */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "paddsb", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "paddsb" } }, + /* 0xED */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "paddsw", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "paddsw" } }, + /* 0xEE */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "pmaxsw", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "pmaxsw" } }, + /* 0xEF */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "pxor", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "pxor" } }, + /* 0xF0 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xF1 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "psllw", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "psllw" } }, + /* 0xF2 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "pslld", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "pslld" } }, + /* 0xF3 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "psllq", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "psllq" } }, + /* 0xF4 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "pmuludq", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "pmuludq" } }, + /* 0xF5 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "pmaddwd", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "pmaddwd" } }, + /* 0xF6 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "psadbw", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "psadbw" } }, + /* 0xF7 */ { 0, IT_GENERIC, AM_P | OT_PI, AM_Q | OT_PI, AM_NOT_USED, "maskmovq", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "maskmovdqu" } }, + /* 0xF8 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "psubb", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "psubb" } }, + /* 0xF9 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "psubw", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "psubw" } }, + /* 0xFA */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "psubd", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "psubd" } }, + /* 0xFB */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "psubq", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "psubq" } }, + /* 0xFC */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "paddb", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "paddb" } }, + /* 0xFD */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "paddw", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "paddw" } }, + /* 0xFE */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "paddd", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "paddd" } }, + /* 0xFF */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } } +}; + +const Opcode s_opcode_byte_after_0f00[] = { + /* 0x0 */ { 0, IT_GENERIC, AM_E | OT_W, AM_NOT_USED, AM_NOT_USED, "sldt", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x1 */ { 0, IT_GENERIC, AM_E | OT_W, AM_NOT_USED, AM_NOT_USED, "str", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x2 */ { 0, IT_GENERIC, AM_E | OT_W, AM_NOT_USED, AM_NOT_USED, "lldt", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x3 */ { 0, IT_GENERIC, AM_E | OT_W, AM_NOT_USED, AM_NOT_USED, "ltr", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x4 */ { 0, IT_GENERIC, AM_E | OT_W, AM_NOT_USED, AM_NOT_USED, "verr", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x5 */ { 0, IT_GENERIC, AM_E | OT_W, AM_NOT_USED, AM_NOT_USED, "verw", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x6 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x7 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } } +}; + +const Opcode s_opcode_byte_after_0f01[] = { + /* 0x0 */ { 0, IT_GENERIC, AM_M | OT_S, AM_NOT_USED, AM_NOT_USED, "sgdt", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x1 */ { 0, IT_GENERIC, AM_M | OT_S, AM_NOT_USED, AM_NOT_USED, "sidt", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x2 */ { 0, IT_GENERIC, AM_M | OT_S, AM_NOT_USED, AM_NOT_USED, "lgdt", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x3 */ { 0, IT_GENERIC, AM_M | OT_S, AM_NOT_USED, AM_NOT_USED, "lidt", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x4 */ { 0, IT_GENERIC, AM_E | OT_W, AM_NOT_USED, AM_NOT_USED, "smsw", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x5 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x6 */ { 0, IT_GENERIC, AM_E | OT_W, AM_NOT_USED, AM_NOT_USED, "lmsw", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x7 */ { 0, IT_GENERIC, AM_M | OT_B, AM_NOT_USED, AM_NOT_USED, "invlpg", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } } +}; + +const Opcode s_opcode_byte_after_0f18[] = { + /* 0x0 */ { 0, IT_GENERIC, AM_M | OT_ADDRESS_MODE_M, AM_NOT_USED, AM_NOT_USED, "prefetch", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x1 */ { 0, IT_GENERIC, AM_REGISTER | OT_D, AM_NOT_USED, AM_NOT_USED, "prefetch", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x2 */ { 0, IT_GENERIC, AM_REGISTER | OT_D, AM_NOT_USED, AM_NOT_USED, "prefetch", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x3 */ { 0, IT_GENERIC, AM_REGISTER | OT_D, AM_NOT_USED, AM_NOT_USED, "prefetch", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x4 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x5 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x6 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x7 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } } +}; + +const Opcode s_opcode_byte_after_0f71[] = { + /* 0x0 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x1 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x2 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_I | OT_B, AM_NOT_USED, "psrlw", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_P | OT_DQ, AM_I | OT_B, AM_NOT_USED, "psrlw" } }, + /* 0x3 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x4 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_I | OT_B, AM_NOT_USED, "psraw", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_P | OT_DQ, AM_I | OT_B, AM_NOT_USED, "psraw" } }, + /* 0x5 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x6 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_I | OT_B, AM_NOT_USED, "psllw", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_P | OT_DQ, AM_I | OT_B, AM_NOT_USED, "psllw" } }, + /* 0x7 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } } +}; + +const Opcode s_opcode_byte_after_0f72[] = { + /* 0x0 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x1 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x2 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_I | OT_B, AM_NOT_USED, "psrld", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_W | OT_DQ, AM_I | OT_B, AM_NOT_USED, "psrld" } }, + /* 0x3 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x4 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_I | OT_B, AM_NOT_USED, "psrad", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_W | OT_DQ, AM_I | OT_B, AM_NOT_USED, "psrad" } }, + /* 0x5 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x6 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_I | OT_B, AM_NOT_USED, "pslld", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_W | OT_DQ, AM_I | OT_B, AM_NOT_USED, "pslld" } }, + /* 0x7 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } } +}; + +const Opcode s_opcode_byte_after_0f73[] = { + /* 0x0 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x1 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x2 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_I | OT_B, AM_NOT_USED, "psrlq", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_W | OT_DQ, AM_I | OT_B, AM_NOT_USED, "psrlq" } }, + /* 0x3 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x4 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x5 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x6 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_I | OT_B, AM_NOT_USED, "psllq", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_W | OT_DQ, AM_I | OT_B, AM_NOT_USED, "psllq" } }, + /* 0x7 */ { 0, IT_GENERIC, AM_W | OT_DQ, AM_I | OT_B, AM_NOT_USED, "pslldq", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_W | OT_DQ, AM_I | OT_B, AM_NOT_USED, "pslldq" } }, +}; + +const Opcode s_opcode_byte_after_0fae[] = { + /* 0x0 */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "fxsave", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x1 */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "fxrstor", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x2 */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "ldmxcsr", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x3 */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "stmxcsr", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x4 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x5 */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "lfence", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x6 */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "mfence", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x7 */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "clflush/sfence", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, +}; + +const Opcode s_opcode_byte_after_0fba[] = { + /* 0x0 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x1 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x2 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x3 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x4 */ { 0, IT_GENERIC, AM_E | OT_V, AM_I | OT_B, AM_NOT_USED, "bt", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x5 */ { 0, IT_GENERIC, AM_E | OT_V, AM_I | OT_B, AM_NOT_USED, "bts", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x6 */ { 0, IT_GENERIC, AM_E | OT_V, AM_I | OT_B, AM_NOT_USED, "btr", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x7 */ { 0, IT_GENERIC, AM_E | OT_V, AM_I | OT_B, AM_NOT_USED, "btc", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } } +}; + +const Opcode s_opcode_byte_after_0fc7[] = { + /* 0x0 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x1 */ { 0, IT_GENERIC, AM_M | OT_Q, AM_NOT_USED, AM_NOT_USED, "cmpxch8b", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } } +}; + +const Opcode s_opcode_byte_after_80[] = { + /* 0x0 */ { 0, IT_GENERIC, AM_E | OT_B, AM_I | OT_B, AM_NOT_USED, "add", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x1 */ { 0, IT_GENERIC, AM_E | OT_B, AM_I | OT_B, AM_NOT_USED, "or", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x2 */ { 0, IT_GENERIC, AM_E | OT_B, AM_I | OT_B, AM_NOT_USED, "adc", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x3 */ { 0, IT_GENERIC, AM_E | OT_B, AM_I | OT_B, AM_NOT_USED, "sbb", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x4 */ { 0, IT_GENERIC, AM_E | OT_B, AM_I | OT_B, AM_NOT_USED, "and", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x5 */ { 0, IT_GENERIC, AM_E | OT_B, AM_I | OT_B, AM_NOT_USED, "sub", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x6 */ { 0, IT_GENERIC, AM_E | OT_B, AM_I | OT_B, AM_NOT_USED, "xor", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x7 */ { 0, IT_GENERIC, AM_E | OT_B, AM_I | OT_B, AM_NOT_USED, "cmp", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } } +}; + +const Opcode s_opcode_byte_after_81[] = { + /* 0x0 */ { 0, IT_GENERIC, AM_E | OT_V, AM_I | OT_V, AM_NOT_USED, "add", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x1 */ { 0, IT_GENERIC, AM_E | OT_V, AM_I | OT_V, AM_NOT_USED, "or", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x2 */ { 0, IT_GENERIC, AM_E | OT_V, AM_I | OT_V, AM_NOT_USED, "adc", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x3 */ { 0, IT_GENERIC, AM_E | OT_V, AM_I | OT_V, AM_NOT_USED, "sbb", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x4 */ { 0, IT_GENERIC, AM_E | OT_V, AM_I | OT_V, AM_NOT_USED, "and", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x5 */ { 0, IT_GENERIC, AM_E | OT_V, AM_I | OT_V, AM_NOT_USED, "sub", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x6 */ { 0, IT_GENERIC, AM_E | OT_V, AM_I | OT_V, AM_NOT_USED, "xor", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x7 */ { 0, IT_GENERIC, AM_E | OT_V, AM_I | OT_V, AM_NOT_USED, "cmp", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } } +}; + +const Opcode s_opcode_byte_after_82[] = { + /* 0x0 */ { 0, IT_GENERIC, AM_E | OT_V, AM_I | OT_B, AM_NOT_USED, "add", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x1 */ { 0, IT_GENERIC, AM_E | OT_V, AM_I | OT_B, AM_NOT_USED, "or", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x2 */ { 0, IT_GENERIC, AM_E | OT_V, AM_I | OT_B, AM_NOT_USED, "adc", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x3 */ { 0, IT_GENERIC, AM_E | OT_V, AM_I | OT_B, AM_NOT_USED, "sbb", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x4 */ { 0, IT_GENERIC, AM_E | OT_V, AM_I | OT_B, AM_NOT_USED, "and", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x5 */ { 0, IT_GENERIC, AM_E | OT_V, AM_I | OT_B, AM_NOT_USED, "sub", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x6 */ { 0, IT_GENERIC, AM_E | OT_V, AM_I | OT_B, AM_NOT_USED, "xor", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x7 */ { 0, IT_GENERIC, AM_E | OT_V, AM_I | OT_B, AM_NOT_USED, "cmp", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } } +}; + +const Opcode s_opcode_byte_after_83[] = { + /* 0x0 */ { 0, IT_GENERIC, AM_E | OT_V, AM_I | OT_B, AM_NOT_USED, "add", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x1 */ { 0, IT_GENERIC, AM_E | OT_V, AM_I | OT_B, AM_NOT_USED, "or", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x2 */ { 0, IT_GENERIC, AM_E | OT_V, AM_I | OT_B, AM_NOT_USED, "adc", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x3 */ { 0, IT_GENERIC, AM_E | OT_V, AM_I | OT_B, AM_NOT_USED, "sbb", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x4 */ { 0, IT_GENERIC, AM_E | OT_V, AM_I | OT_B, AM_NOT_USED, "and", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x5 */ { 0, IT_GENERIC, AM_E | OT_V, AM_I | OT_B, AM_NOT_USED, "sub", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x6 */ { 0, IT_GENERIC, AM_E | OT_V, AM_I | OT_B, AM_NOT_USED, "xor", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x7 */ { 0, IT_GENERIC, AM_E | OT_V, AM_I | OT_B, AM_NOT_USED, "cmp", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } } +}; + +const Opcode s_opcode_byte_after_c0[] = { + /* 0x0 */ { 0, IT_GENERIC, AM_E | OT_B, AM_I | OT_B, AM_NOT_USED, "rol", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x1 */ { 0, IT_GENERIC, AM_E | OT_B, AM_I | OT_B, AM_NOT_USED, "ror", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x2 */ { 0, IT_GENERIC, AM_E | OT_B, AM_I | OT_B, AM_NOT_USED, "rcl", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x3 */ { 0, IT_GENERIC, AM_E | OT_B, AM_I | OT_B, AM_NOT_USED, "rcr", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x4 */ { 0, IT_GENERIC, AM_E | OT_B, AM_I | OT_B, AM_NOT_USED, "shl", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x5 */ { 0, IT_GENERIC, AM_E | OT_B, AM_I | OT_B, AM_NOT_USED, "shr", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x6 */ { 0, IT_GENERIC, AM_E | OT_B, AM_I | OT_B, AM_NOT_USED, "sal", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x7 */ { 0, IT_GENERIC, AM_E | OT_B, AM_I | OT_B, AM_NOT_USED, "sar", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } } +}; + +const Opcode s_opcode_byte_after_c1[] = { + /* 0x0 */ { 0, IT_GENERIC, AM_E | OT_V, AM_I | OT_B, AM_NOT_USED, "rol", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x1 */ { 0, IT_GENERIC, AM_E | OT_V, AM_I | OT_B, AM_NOT_USED, "ror", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x2 */ { 0, IT_GENERIC, AM_E | OT_V, AM_I | OT_B, AM_NOT_USED, "rcl", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x3 */ { 0, IT_GENERIC, AM_E | OT_V, AM_I | OT_B, AM_NOT_USED, "rcr", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x4 */ { 0, IT_GENERIC, AM_E | OT_V, AM_I | OT_B, AM_NOT_USED, "shl", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x5 */ { 0, IT_GENERIC, AM_E | OT_V, AM_I | OT_B, AM_NOT_USED, "shr", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x6 */ { 0, IT_GENERIC, AM_E | OT_V, AM_I | OT_B, AM_NOT_USED, "sal", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x7 */ { 0, IT_GENERIC, AM_E | OT_V, AM_I | OT_B, AM_NOT_USED, "sar", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } } +}; + +const Opcode s_opcode_byte_after_d0[] = { + /* 0x0 */ { 0, IT_GENERIC, AM_E | OT_B, AM_IMPLICIT, AM_NOT_USED, "rol", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x1 */ { 0, IT_GENERIC, AM_E | OT_B, AM_IMPLICIT, AM_NOT_USED, "ror", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x2 */ { 0, IT_GENERIC, AM_E | OT_B, AM_IMPLICIT, AM_NOT_USED, "rcl", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x3 */ { 0, IT_GENERIC, AM_E | OT_B, AM_IMPLICIT, AM_NOT_USED, "rcr", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x4 */ { 0, IT_GENERIC, AM_E | OT_B, AM_IMPLICIT, AM_NOT_USED, "shl", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x5 */ { 0, IT_GENERIC, AM_E | OT_B, AM_IMPLICIT, AM_NOT_USED, "shr", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x6 */ { 0, IT_GENERIC, AM_E | OT_B, AM_IMPLICIT, AM_NOT_USED, "sal", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x7 */ { 0, IT_GENERIC, AM_E | OT_B, AM_IMPLICIT, AM_NOT_USED, "sar", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } } +}; + +const Opcode s_opcode_byte_after_d1[] = { + /* 0x0 */ { 0, IT_GENERIC, AM_E | OT_V, AM_IMPLICIT, AM_NOT_USED, "rol", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x1 */ { 0, IT_GENERIC, AM_E | OT_V, AM_IMPLICIT, AM_NOT_USED, "ror", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x2 */ { 0, IT_GENERIC, AM_E | OT_V, AM_IMPLICIT, AM_NOT_USED, "rcl", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x3 */ { 0, IT_GENERIC, AM_E | OT_V, AM_IMPLICIT, AM_NOT_USED, "rcr", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x4 */ { 0, IT_GENERIC, AM_E | OT_V, AM_IMPLICIT, AM_NOT_USED, "shl", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x5 */ { 0, IT_GENERIC, AM_E | OT_V, AM_IMPLICIT, AM_NOT_USED, "shr", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x6 */ { 0, IT_GENERIC, AM_E | OT_V, AM_IMPLICIT, AM_NOT_USED, "sal", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x7 */ { 0, IT_GENERIC, AM_E | OT_V, AM_IMPLICIT, AM_NOT_USED, "sar", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } } +}; + +const Opcode s_opcode_byte_after_d2[] = { + /* 0x0 */ { 0, IT_GENERIC, AM_E | OT_B, AM_REGISTER | OT_B, AM_NOT_USED, "rol", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x1 */ { 0, IT_GENERIC, AM_E | OT_B, AM_REGISTER | OT_B, AM_NOT_USED, "ror", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x2 */ { 0, IT_GENERIC, AM_E | OT_B, AM_REGISTER | OT_B, AM_NOT_USED, "rcl", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x3 */ { 0, IT_GENERIC, AM_E | OT_B, AM_REGISTER | OT_B, AM_NOT_USED, "rcr", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x4 */ { 0, IT_GENERIC, AM_E | OT_B, AM_REGISTER | OT_B, AM_NOT_USED, "shl", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x5 */ { 0, IT_GENERIC, AM_E | OT_B, AM_REGISTER | OT_B, AM_NOT_USED, "shr", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x6 */ { 0, IT_GENERIC, AM_E | OT_B, AM_REGISTER | OT_B, AM_NOT_USED, "sal", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x7 */ { 0, IT_GENERIC, AM_E | OT_B, AM_REGISTER | OT_B, AM_NOT_USED, "sar", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } } +}; + +const Opcode s_opcode_byte_after_d3[] = { + /* 0x0 */ { 0, IT_GENERIC, AM_E | OT_V, AM_REGISTER | OT_B, AM_NOT_USED, "rol", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x1 */ { 0, IT_GENERIC, AM_E | OT_V, AM_REGISTER | OT_B, AM_NOT_USED, "ror", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x2 */ { 0, IT_GENERIC, AM_E | OT_V, AM_REGISTER | OT_B, AM_NOT_USED, "rcl", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x3 */ { 0, IT_GENERIC, AM_E | OT_V, AM_REGISTER | OT_B, AM_NOT_USED, "rcr", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x4 */ { 0, IT_GENERIC, AM_E | OT_V, AM_REGISTER | OT_B, AM_NOT_USED, "shl", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x5 */ { 0, IT_GENERIC, AM_E | OT_V, AM_REGISTER | OT_B, AM_NOT_USED, "shr", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x6 */ { 0, IT_GENERIC, AM_E | OT_V, AM_REGISTER | OT_B, AM_NOT_USED, "sal", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x7 */ { 0, IT_GENERIC, AM_E | OT_V, AM_REGISTER | OT_B, AM_NOT_USED, "sar", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } } +}; + +const Opcode s_opcode_byte_after_f6[] = { + /* 0x0 */ { 0, IT_GENERIC, AM_E | OT_B, AM_I | OT_B, AM_NOT_USED, "test", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x1 */ { 0, IT_GENERIC, AM_E | OT_B, AM_I | OT_B, AM_NOT_USED, "test", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x2 */ { 0, IT_GENERIC, AM_E | OT_B, AM_NOT_USED, AM_NOT_USED, "not", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x3 */ { 0, IT_GENERIC, AM_E | OT_B, AM_NOT_USED, AM_NOT_USED, "neg", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x4 */ { 0, IT_GENERIC, OT_B | AM_REGISTER, AM_E | OT_B, AM_NOT_USED, "mul", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x5 */ { 0, IT_GENERIC, OT_B | AM_REGISTER, AM_E | OT_B, AM_NOT_USED, "imul", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x6 */ { 0, IT_GENERIC, AM_REGISTER | OT_B, AM_E | OT_B, AM_NOT_USED, "div", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x7 */ { 0, IT_GENERIC, AM_REGISTER | OT_B, AM_E | OT_B, AM_NOT_USED, "idiv", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } } +}; + +const Opcode s_opcode_byte_after_f7[] = { + /* 0x0 */ { 0, IT_GENERIC, AM_E | OT_V, AM_I | OT_V, AM_NOT_USED, "test", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x1 */ { 0, IT_GENERIC, AM_E | OT_V, AM_I | OT_V, AM_NOT_USED, "test", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x2 */ { 0, IT_GENERIC, AM_E | OT_V, AM_NOT_USED, AM_NOT_USED, "not", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x3 */ { 0, IT_GENERIC, AM_E | OT_V, AM_NOT_USED, AM_NOT_USED, "neg", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x4 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_E | OT_V, AM_NOT_USED, "mul", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x5 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_E | OT_V, AM_NOT_USED, "imul", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x6 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_E | OT_V, AM_NOT_USED, "div", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x7 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_E | OT_V, AM_NOT_USED, "idiv", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } } +}; + +const Opcode s_opcode_byte_after_fe[] = { + /* 0x0 */ { 0, IT_GENERIC, AM_E | OT_B, AM_NOT_USED, AM_NOT_USED, "inc", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x1 */ { 0, IT_GENERIC, AM_E | OT_B, AM_NOT_USED, AM_NOT_USED, "dec", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } } +}; + +const Opcode s_opcode_byte_after_ff[] = { + /* 0x0 */ { 0, IT_GENERIC, AM_E | OT_V, AM_NOT_USED, AM_NOT_USED, "inc", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x1 */ { 0, IT_GENERIC, AM_E | OT_V, AM_NOT_USED, AM_NOT_USED, "dec", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x2 */ { 0, IT_JUMP, AM_E | OT_V, AM_NOT_USED, AM_NOT_USED, "call", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x3 */ { 0, IT_JUMP, AM_E | OT_P, AM_NOT_USED, AM_NOT_USED, "call", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x4 */ { 0, IT_JUMP, AM_E | OT_V, AM_NOT_USED, AM_NOT_USED, "jmp", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x5 */ { 0, IT_JUMP, AM_E | OT_P, AM_NOT_USED, AM_NOT_USED, "jmp", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x6 */ { 0, IT_GENERIC, AM_E | OT_V, AM_NOT_USED, AM_NOT_USED, "push", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x7 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } } +}; + +/* +* A table of all the other tables, containing some extra information, e.g. +* how to mask out the byte we're looking at. +*/ +const OpcodeTable MiniDisassembler::s_ia32_opcode_map_[]={ + // One-byte opcodes and jumps to larger + /* 0 */ {s_first_opcode_byte, 0, 0xff, 0, 0xff}, + // Two-byte opcodes (second byte) + /* 1 */ {s_opcode_byte_after_0f, 0, 0xff, 0, 0xff}, + // Start of tables for opcodes using ModR/M bits as extension + /* 2 */ {s_opcode_byte_after_80, 3, 0x07, 0, 0x07}, + /* 3 */ {s_opcode_byte_after_81, 3, 0x07, 0, 0x07}, + /* 4 */ {s_opcode_byte_after_82, 3, 0x07, 0, 0x07}, + /* 5 */ {s_opcode_byte_after_83, 3, 0x07, 0, 0x07}, + /* 6 */ {s_opcode_byte_after_c0, 3, 0x07, 0, 0x07}, + /* 7 */ {s_opcode_byte_after_c1, 3, 0x07, 0, 0x07}, + /* 8 */ {s_opcode_byte_after_d0, 3, 0x07, 0, 0x07}, + /* 9 */ {s_opcode_byte_after_d1, 3, 0x07, 0, 0x07}, + /* 10 */ {s_opcode_byte_after_d2, 3, 0x07, 0, 0x07}, + /* 11 */ {s_opcode_byte_after_d3, 3, 0x07, 0, 0x07}, + /* 12 */ {s_opcode_byte_after_f6, 3, 0x07, 0, 0x07}, + /* 13 */ {s_opcode_byte_after_f7, 3, 0x07, 0, 0x07}, + /* 14 */ {s_opcode_byte_after_fe, 3, 0x07, 0, 0x01}, + /* 15 */ {s_opcode_byte_after_ff, 3, 0x07, 0, 0x07}, + /* 16 */ {s_opcode_byte_after_0f00, 3, 0x07, 0, 0x07}, + /* 17 */ {s_opcode_byte_after_0f01, 3, 0x07, 0, 0x07}, + /* 18 */ {s_opcode_byte_after_0f18, 3, 0x07, 0, 0x07}, + /* 19 */ {s_opcode_byte_after_0f71, 3, 0x07, 0, 0x07}, + /* 20 */ {s_opcode_byte_after_0f72, 3, 0x07, 0, 0x07}, + /* 21 */ {s_opcode_byte_after_0f73, 3, 0x07, 0, 0x07}, + /* 22 */ {s_opcode_byte_after_0fae, 3, 0x07, 0, 0x07}, + /* 23 */ {s_opcode_byte_after_0fba, 3, 0x07, 0, 0x07}, + /* 24 */ {s_opcode_byte_after_0fc7, 3, 0x07, 0, 0x01} +}; + +}; // namespace sidestep diff --git a/src/third_party/gperftools-2.5/src/windows/mingw.h b/src/third_party/gperftools-2.5/src/windows/mingw.h new file mode 100644 index 00000000000..0586e624ce1 --- /dev/null +++ b/src/third_party/gperftools-2.5/src/windows/mingw.h @@ -0,0 +1,72 @@ +/* -*- Mode: C; c-basic-offset: 2; indent-tabs-mode: nil -*- */ +/* Copyright (c) 2007, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: Craig Silverstein + * + * MinGW is an interesting mix of unix and windows. We use a normal + * configure script, but still need the windows port.h to define some + * stuff that MinGW doesn't support, like pthreads. + */ + +#ifndef GOOGLE_PERFTOOLS_WINDOWS_MINGW_H_ +#define GOOGLE_PERFTOOLS_WINDOWS_MINGW_H_ + +#ifdef __MINGW32__ + +// Older version of the mingw msvcrt don't define _aligned_malloc +#if __MSVCRT_VERSION__ < 0x0700 +# define PERFTOOLS_NO_ALIGNED_MALLOC 1 +#endif + +// This must be defined before the windows.h is included. We need at +// least 0x0400 for mutex.h to have access to TryLock, and at least +// 0x0501 for patch_functions.cc to have access to GetModuleHandleEx. +// (This latter is an optimization we could take out if need be.) +#ifndef _WIN32_WINNT +# define _WIN32_WINNT 0x0501 +#endif + +#define HAVE_SNPRINTF 1 + +// Some mingw distributions have a pthreads wrapper, but it doesn't +// work as well as native windows spinlocks (at least for us). So +// pretend the pthreads wrapper doesn't exist, even when it does. +#ifndef HAVE_PTHREAD_DESPITE_ASKING_FOR +#undef HAVE_PTHREAD +#endif + +#define HAVE_PID_T + +#include "windows/port.h" + +#endif /* __MINGW32__ */ + +#endif /* GOOGLE_PERFTOOLS_WINDOWS_MINGW_H_ */ diff --git a/src/third_party/gperftools-2.5/src/windows/mini_disassembler.cc b/src/third_party/gperftools-2.5/src/windows/mini_disassembler.cc new file mode 100644 index 00000000000..0c620047cec --- /dev/null +++ b/src/third_party/gperftools-2.5/src/windows/mini_disassembler.cc @@ -0,0 +1,432 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +/* Copyright (c) 2007, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: Joi Sigurdsson + * + * Implementation of MiniDisassembler. + */ + +#include "mini_disassembler.h" + +namespace sidestep { + +MiniDisassembler::MiniDisassembler(bool operand_default_is_32_bits, + bool address_default_is_32_bits) + : operand_default_is_32_bits_(operand_default_is_32_bits), + address_default_is_32_bits_(address_default_is_32_bits) { + Initialize(); +} + +MiniDisassembler::MiniDisassembler() + : operand_default_is_32_bits_(true), + address_default_is_32_bits_(true) { + Initialize(); +} + +InstructionType MiniDisassembler::Disassemble( + unsigned char* start_byte, + unsigned int& instruction_bytes) { + // Clean up any state from previous invocations. + Initialize(); + + // Start by processing any prefixes. + unsigned char* current_byte = start_byte; + unsigned int size = 0; + InstructionType instruction_type = ProcessPrefixes(current_byte, size); + + if (IT_UNKNOWN == instruction_type) + return instruction_type; + + current_byte += size; + size = 0; + + // Invariant: We have stripped all prefixes, and the operand_is_32_bits_ + // and address_is_32_bits_ flags are correctly set. + + instruction_type = ProcessOpcode(current_byte, 0, size); + + // Check for error processing instruction + if ((IT_UNKNOWN == instruction_type_) || (IT_UNUSED == instruction_type_)) { + return IT_UNKNOWN; + } + + current_byte += size; + + // Invariant: operand_bytes_ indicates the total size of operands + // specified by the opcode and/or ModR/M byte and/or SIB byte. + // pCurrentByte points to the first byte after the ModR/M byte, or after + // the SIB byte if it is present (i.e. the first byte of any operands + // encoded in the instruction). + + // We get the total length of any prefixes, the opcode, and the ModR/M and + // SIB bytes if present, by taking the difference of the original starting + // address and the current byte (which points to the first byte of the + // operands if present, or to the first byte of the next instruction if + // they are not). Adding the count of bytes in the operands encoded in + // the instruction gives us the full length of the instruction in bytes. + instruction_bytes += operand_bytes_ + (current_byte - start_byte); + + // Return the instruction type, which was set by ProcessOpcode(). + return instruction_type_; +} + +void MiniDisassembler::Initialize() { + operand_is_32_bits_ = operand_default_is_32_bits_; + address_is_32_bits_ = address_default_is_32_bits_; +#ifdef _M_X64 + operand_default_support_64_bits_ = true; +#else + operand_default_support_64_bits_ = false; +#endif + operand_is_64_bits_ = false; + operand_bytes_ = 0; + have_modrm_ = false; + should_decode_modrm_ = false; + instruction_type_ = IT_UNKNOWN; + got_f2_prefix_ = false; + got_f3_prefix_ = false; + got_66_prefix_ = false; +} + +InstructionType MiniDisassembler::ProcessPrefixes(unsigned char* start_byte, + unsigned int& size) { + InstructionType instruction_type = IT_GENERIC; + const Opcode& opcode = s_ia32_opcode_map_[0].table_[*start_byte]; + + switch (opcode.type_) { + case IT_PREFIX_ADDRESS: + address_is_32_bits_ = !address_default_is_32_bits_; + goto nochangeoperand; + case IT_PREFIX_OPERAND: + operand_is_32_bits_ = !operand_default_is_32_bits_; + nochangeoperand: + case IT_PREFIX: + + if (0xF2 == (*start_byte)) + got_f2_prefix_ = true; + else if (0xF3 == (*start_byte)) + got_f3_prefix_ = true; + else if (0x66 == (*start_byte)) + got_66_prefix_ = true; + else if (operand_default_support_64_bits_ && (*start_byte) & 0x48) + operand_is_64_bits_ = true; + + instruction_type = opcode.type_; + size ++; + // we got a prefix, so add one and check next byte + ProcessPrefixes(start_byte + 1, size); + default: + break; // not a prefix byte + } + + return instruction_type; +} + +InstructionType MiniDisassembler::ProcessOpcode(unsigned char* start_byte, + unsigned int table_index, + unsigned int& size) { + const OpcodeTable& table = s_ia32_opcode_map_[table_index]; // Get our table + unsigned char current_byte = (*start_byte) >> table.shift_; + current_byte = current_byte & table.mask_; // Mask out the bits we will use + + // Check whether the byte we have is inside the table we have. + if (current_byte < table.min_lim_ || current_byte > table.max_lim_) { + instruction_type_ = IT_UNKNOWN; + return instruction_type_; + } + + const Opcode& opcode = table.table_[current_byte]; + if (IT_UNUSED == opcode.type_) { + // This instruction is not used by the IA-32 ISA, so we indicate + // this to the user. Probably means that we were pointed to + // a byte in memory that was not the start of an instruction. + instruction_type_ = IT_UNUSED; + return instruction_type_; + } else if (IT_REFERENCE == opcode.type_) { + // We are looking at an opcode that has more bytes (or is continued + // in the ModR/M byte). Recursively find the opcode definition in + // the table for the opcode's next byte. + size++; + ProcessOpcode(start_byte + 1, opcode.table_index_, size); + return instruction_type_; + } + + const SpecificOpcode* specific_opcode = (SpecificOpcode*)&opcode; + if (opcode.is_prefix_dependent_) { + if (got_f2_prefix_ && opcode.opcode_if_f2_prefix_.mnemonic_ != 0) { + specific_opcode = &opcode.opcode_if_f2_prefix_; + } else if (got_f3_prefix_ && opcode.opcode_if_f3_prefix_.mnemonic_ != 0) { + specific_opcode = &opcode.opcode_if_f3_prefix_; + } else if (got_66_prefix_ && opcode.opcode_if_66_prefix_.mnemonic_ != 0) { + specific_opcode = &opcode.opcode_if_66_prefix_; + } + } + + // Inv: The opcode type is known. + instruction_type_ = specific_opcode->type_; + + // Let's process the operand types to see if we have any immediate + // operands, and/or a ModR/M byte. + + ProcessOperand(specific_opcode->flag_dest_); + ProcessOperand(specific_opcode->flag_source_); + ProcessOperand(specific_opcode->flag_aux_); + + // Inv: We have processed the opcode and incremented operand_bytes_ + // by the number of bytes of any operands specified by the opcode + // that are stored in the instruction (not registers etc.). Now + // we need to return the total number of bytes for the opcode and + // for the ModR/M or SIB bytes if they are present. + + if (table.mask_ != 0xff) { + if (have_modrm_) { + // we're looking at a ModR/M byte so we're not going to + // count that into the opcode size + ProcessModrm(start_byte, size); + return IT_GENERIC; + } else { + // need to count the ModR/M byte even if it's just being + // used for opcode extension + size++; + return IT_GENERIC; + } + } else { + if (have_modrm_) { + // The ModR/M byte is the next byte. + size++; + ProcessModrm(start_byte + 1, size); + return IT_GENERIC; + } else { + size++; + return IT_GENERIC; + } + } +} + +bool MiniDisassembler::ProcessOperand(int flag_operand) { + bool succeeded = true; + if (AM_NOT_USED == flag_operand) + return succeeded; + + // Decide what to do based on the addressing mode. + switch (flag_operand & AM_MASK) { + // No ModR/M byte indicated by these addressing modes, and no + // additional (e.g. immediate) parameters. + case AM_A: // Direct address + case AM_F: // EFLAGS register + case AM_X: // Memory addressed by the DS:SI register pair + case AM_Y: // Memory addressed by the ES:DI register pair + case AM_IMPLICIT: // Parameter is implicit, occupies no space in + // instruction + break; + + // There is a ModR/M byte but it does not necessarily need + // to be decoded. + case AM_C: // reg field of ModR/M selects a control register + case AM_D: // reg field of ModR/M selects a debug register + case AM_G: // reg field of ModR/M selects a general register + case AM_P: // reg field of ModR/M selects an MMX register + case AM_R: // mod field of ModR/M may refer only to a general register + case AM_S: // reg field of ModR/M selects a segment register + case AM_T: // reg field of ModR/M selects a test register + case AM_V: // reg field of ModR/M selects a 128-bit XMM register + have_modrm_ = true; + break; + + // In these addressing modes, there is a ModR/M byte and it needs to be + // decoded. No other (e.g. immediate) params than indicated in ModR/M. + case AM_E: // Operand is either a general-purpose register or memory, + // specified by ModR/M byte + case AM_M: // ModR/M byte will refer only to memory + case AM_Q: // Operand is either an MMX register or memory (complex + // evaluation), specified by ModR/M byte + case AM_W: // Operand is either a 128-bit XMM register or memory (complex + // eval), specified by ModR/M byte + have_modrm_ = true; + should_decode_modrm_ = true; + break; + + // These addressing modes specify an immediate or an offset value + // directly, so we need to look at the operand type to see how many + // bytes. + case AM_I: // Immediate data. + case AM_J: // Jump to offset. + case AM_O: // Operand is at offset. + switch (flag_operand & OT_MASK) { + case OT_B: // Byte regardless of operand-size attribute. + operand_bytes_ += OS_BYTE; + break; + case OT_C: // Byte or word, depending on operand-size attribute. + if (operand_is_32_bits_) + operand_bytes_ += OS_WORD; + else + operand_bytes_ += OS_BYTE; + break; + case OT_D: // Doubleword, regardless of operand-size attribute. + operand_bytes_ += OS_DOUBLE_WORD; + break; + case OT_DQ: // Double-quadword, regardless of operand-size attribute. + operand_bytes_ += OS_DOUBLE_QUAD_WORD; + break; + case OT_P: // 32-bit or 48-bit pointer, depending on operand-size + // attribute. + if (operand_is_32_bits_) + operand_bytes_ += OS_48_BIT_POINTER; + else + operand_bytes_ += OS_32_BIT_POINTER; + break; + case OT_PS: // 128-bit packed single-precision floating-point data. + operand_bytes_ += OS_128_BIT_PACKED_SINGLE_PRECISION_FLOATING; + break; + case OT_Q: // Quadword, regardless of operand-size attribute. + operand_bytes_ += OS_QUAD_WORD; + break; + case OT_S: // 6-byte pseudo-descriptor. + operand_bytes_ += OS_PSEUDO_DESCRIPTOR; + break; + case OT_SD: // Scalar Double-Precision Floating-Point Value + case OT_PD: // Unaligned packed double-precision floating point value + operand_bytes_ += OS_DOUBLE_PRECISION_FLOATING; + break; + case OT_SS: + // Scalar element of a 128-bit packed single-precision + // floating data. + // We simply return enItUnknown since we don't have to support + // floating point + succeeded = false; + break; + case OT_V: // Word, doubleword or quadword, depending on operand-size + // attribute. + if (operand_is_64_bits_ && flag_operand & AM_I && + flag_operand & IOS_64) + operand_bytes_ += OS_QUAD_WORD; + else if (operand_is_32_bits_) + operand_bytes_ += OS_DOUBLE_WORD; + else + operand_bytes_ += OS_WORD; + break; + case OT_W: // Word, regardless of operand-size attribute. + operand_bytes_ += OS_WORD; + break; + + // Can safely ignore these. + case OT_A: // Two one-word operands in memory or two double-word + // operands in memory + case OT_PI: // Quadword MMX technology register (e.g. mm0) + case OT_SI: // Doubleword integer register (e.g., eax) + break; + + default: + break; + } + break; + + default: + break; + } + + return succeeded; +} + +bool MiniDisassembler::ProcessModrm(unsigned char* start_byte, + unsigned int& size) { + // If we don't need to decode, we just return the size of the ModR/M + // byte (there is never a SIB byte in this case). + if (!should_decode_modrm_) { + size++; + return true; + } + + // We never care about the reg field, only the combination of the mod + // and r/m fields, so let's start by packing those fields together into + // 5 bits. + unsigned char modrm = (*start_byte); + unsigned char mod = modrm & 0xC0; // mask out top two bits to get mod field + modrm = modrm & 0x07; // mask out bottom 3 bits to get r/m field + mod = mod >> 3; // shift the mod field to the right place + modrm = mod | modrm; // combine the r/m and mod fields as discussed + mod = mod >> 3; // shift the mod field to bits 2..0 + + // Invariant: modrm contains the mod field in bits 4..3 and the r/m field + // in bits 2..0, and mod contains the mod field in bits 2..0 + + const ModrmEntry* modrm_entry = 0; + if (address_is_32_bits_) + modrm_entry = &s_ia32_modrm_map_[modrm]; + else + modrm_entry = &s_ia16_modrm_map_[modrm]; + + // Invariant: modrm_entry points to information that we need to decode + // the ModR/M byte. + + // Add to the count of operand bytes, if the ModR/M byte indicates + // that some operands are encoded in the instruction. + if (modrm_entry->is_encoded_in_instruction_) + operand_bytes_ += modrm_entry->operand_size_; + + // Process the SIB byte if necessary, and return the count + // of ModR/M and SIB bytes. + if (modrm_entry->use_sib_byte_) { + size++; + return ProcessSib(start_byte + 1, mod, size); + } else { + size++; + return true; + } +} + +bool MiniDisassembler::ProcessSib(unsigned char* start_byte, + unsigned char mod, + unsigned int& size) { + // get the mod field from the 2..0 bits of the SIB byte + unsigned char sib_base = (*start_byte) & 0x07; + if (0x05 == sib_base) { + switch (mod) { + case 0x00: // mod == 00 + case 0x02: // mod == 10 + operand_bytes_ += OS_DOUBLE_WORD; + break; + case 0x01: // mod == 01 + operand_bytes_ += OS_BYTE; + break; + case 0x03: // mod == 11 + // According to the IA-32 docs, there does not seem to be a disp + // value for this value of mod + default: + break; + } + } + + size++; + return true; +} + +}; // namespace sidestep diff --git a/src/third_party/gperftools-2.5/src/windows/mini_disassembler.h b/src/third_party/gperftools-2.5/src/windows/mini_disassembler.h new file mode 100644 index 00000000000..93bdc0632ff --- /dev/null +++ b/src/third_party/gperftools-2.5/src/windows/mini_disassembler.h @@ -0,0 +1,198 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +/* Copyright (c) 2007, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: Joi Sigurdsson + * + * Definition of MiniDisassembler. + */ + +#ifndef GOOGLE_PERFTOOLS_MINI_DISASSEMBLER_H_ +#define GOOGLE_PERFTOOLS_MINI_DISASSEMBLER_H_ + +#include "config.h" +#include <windows.h> +#include "mini_disassembler_types.h" + +// compatibility shim +#include "base/logging.h" +#define SIDESTEP_ASSERT(cond) RAW_DCHECK(cond, #cond) +#define SIDESTEP_LOG(msg) RAW_VLOG(1, msg) + +namespace sidestep { + +// This small disassembler is very limited +// in its functionality, and in fact does only the bare minimum required by the +// preamble patching utility. It may be useful for other purposes, however. +// +// The limitations include at least the following: +// -# No support for coprocessor opcodes, MMX, etc. +// -# No machine-readable identification of opcodes or decoding of +// assembly parameters. The name of the opcode (as a string) is given, +// however, to aid debugging. +// +// You may ask what this little disassembler actually does, then? The answer is +// that it does the following, which is exactly what the patching utility needs: +// -# Indicates if opcode is a jump (any kind) or a return (any kind) +// because this is important for the patching utility to determine if +// a function is too short or there are jumps too early in it for it +// to be preamble patched. +// -# The opcode length is always calculated, so that the patching utility +// can figure out where the next instruction starts, and whether it +// already has enough instructions to replace with the absolute jump +// to the patching code. +// +// The usage is quite simple; just create a MiniDisassembler and use its +// Disassemble() method. +// +// If you would like to extend this disassembler, please refer to the +// IA-32 Intel® Architecture Software Developer’s Manual Volume 2: +// Instruction Set Reference for information about operand decoding +// etc. +class PERFTOOLS_DLL_DECL MiniDisassembler { + public: + + // Creates a new instance and sets defaults. + // + // @param operand_default_32_bits If true, the default operand size is + // set to 32 bits, which is the default under Win32. Otherwise it is 16 bits. + // @param address_default_32_bits If true, the default address size is + // set to 32 bits, which is the default under Win32. Otherwise it is 16 bits. + MiniDisassembler(bool operand_default_32_bits, + bool address_default_32_bits); + + // Equivalent to MiniDisassembler(true, true); + MiniDisassembler(); + + // Attempts to disassemble a single instruction starting from the + // address in memory it is pointed to. + // + // @param start Address where disassembly should start. + // @param instruction_bytes Variable that will be <b>incremented</b> by + // the length in bytes of the instruction. + // @return enItJump, enItReturn or enItGeneric on success. enItUnknown + // if unable to disassemble, enItUnused if this seems to be an unused + // opcode. In the last two (error) cases, cbInstruction will be set + // to 0xffffffff. + // + // @post This instance of the disassembler is ready to be used again, + // with unchanged defaults from creation time. + InstructionType Disassemble(unsigned char* start, unsigned int& instruction_bytes); + + private: + + // Makes the disassembler ready for reuse. + void Initialize(); + + // Sets the flags for address and operand sizes. + // @return Number of prefix bytes. + InstructionType ProcessPrefixes(unsigned char* start, unsigned int& size); + + // Sets the flag for whether we have ModR/M, and increments + // operand_bytes_ if any are specifies by the opcode directly. + // @return Number of opcode bytes. + InstructionType ProcessOpcode(unsigned char* start, + unsigned int table, + unsigned int& size); + + // Checks the type of the supplied operand. Increments + // operand_bytes_ if it directly indicates an immediate etc. + // operand. Asserts have_modrm_ if the operand specifies + // a ModR/M byte. + bool ProcessOperand(int flag_operand); + + // Increments operand_bytes_ by size specified by ModR/M and + // by SIB if present. + // @return 0 in case of error, 1 if there is just a ModR/M byte, + // 2 if there is a ModR/M byte and a SIB byte. + bool ProcessModrm(unsigned char* start, unsigned int& size); + + // Processes the SIB byte that it is pointed to. + // @param start Pointer to the SIB byte. + // @param mod The mod field from the ModR/M byte. + // @return 1 to indicate success (indicates 1 SIB byte) + bool ProcessSib(unsigned char* start, unsigned char mod, unsigned int& size); + + // The instruction type we have decoded from the opcode. + InstructionType instruction_type_; + + // Counts the number of bytes that is occupied by operands in + // the current instruction (note: we don't care about how large + // operands stored in registers etc. are). + unsigned int operand_bytes_; + + // True iff there is a ModR/M byte in this instruction. + bool have_modrm_; + + // True iff we need to decode the ModR/M byte (sometimes it just + // points to a register, we can tell by the addressing mode). + bool should_decode_modrm_; + + // Current operand size is 32 bits if true, 16 bits if false. + bool operand_is_32_bits_; + + // Default operand size is 32 bits if true, 16 bits if false. + bool operand_default_is_32_bits_; + + // Current address size is 32 bits if true, 16 bits if false. + bool address_is_32_bits_; + + // Default address size is 32 bits if true, 16 bits if false. + bool address_default_is_32_bits_; + + // Determines if 64 bit operands are supported (x64). + bool operand_default_support_64_bits_; + + // Current operand size is 64 bits if true, 32 bits if false. + bool operand_is_64_bits_; + + // Huge big opcode table based on the IA-32 manual, defined + // in Ia32OpcodeMap.cc + static const OpcodeTable s_ia32_opcode_map_[]; + + // Somewhat smaller table to help with decoding ModR/M bytes + // when 16-bit addressing mode is being used. Defined in + // Ia32ModrmMap.cc + static const ModrmEntry s_ia16_modrm_map_[]; + + // Somewhat smaller table to help with decoding ModR/M bytes + // when 32-bit addressing mode is being used. Defined in + // Ia32ModrmMap.cc + static const ModrmEntry s_ia32_modrm_map_[]; + + // Indicators of whether we got certain prefixes that certain + // silly Intel instructions depend on in nonstandard ways for + // their behaviors. + bool got_f2_prefix_, got_f3_prefix_, got_66_prefix_; +}; + +}; // namespace sidestep + +#endif // GOOGLE_PERFTOOLS_MINI_DISASSEMBLER_H_ diff --git a/src/third_party/gperftools-2.5/src/windows/mini_disassembler_types.h b/src/third_party/gperftools-2.5/src/windows/mini_disassembler_types.h new file mode 100644 index 00000000000..06d475504e4 --- /dev/null +++ b/src/third_party/gperftools-2.5/src/windows/mini_disassembler_types.h @@ -0,0 +1,237 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +/* Copyright (c) 2007, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: Joi Sigurdsson + * + * Several simple types used by the disassembler and some of the patching + * mechanisms. + */ + +#ifndef GOOGLE_PERFTOOLS_MINI_DISASSEMBLER_TYPES_H_ +#define GOOGLE_PERFTOOLS_MINI_DISASSEMBLER_TYPES_H_ + +namespace sidestep { + +// Categories of instructions that we care about +enum InstructionType { + // This opcode is not used + IT_UNUSED, + // This disassembler does not recognize this opcode (error) + IT_UNKNOWN, + // This is not an instruction but a reference to another table + IT_REFERENCE, + // This byte is a prefix byte that we can ignore + IT_PREFIX, + // This is a prefix byte that switches to the nondefault address size + IT_PREFIX_ADDRESS, + // This is a prefix byte that switches to the nondefault operand size + IT_PREFIX_OPERAND, + // A jump or call instruction + IT_JUMP, + // A return instruction + IT_RETURN, + // Any other type of instruction (in this case we don't care what it is) + IT_GENERIC, +}; + +// Lists IA-32 operand sizes in multiples of 8 bits +enum OperandSize { + OS_ZERO = 0, + OS_BYTE = 1, + OS_WORD = 2, + OS_DOUBLE_WORD = 4, + OS_QUAD_WORD = 8, + OS_DOUBLE_QUAD_WORD = 16, + OS_32_BIT_POINTER = 32/8, + OS_48_BIT_POINTER = 48/8, + OS_SINGLE_PRECISION_FLOATING = 32/8, + OS_DOUBLE_PRECISION_FLOATING = 64/8, + OS_DOUBLE_EXTENDED_PRECISION_FLOATING = 80/8, + OS_128_BIT_PACKED_SINGLE_PRECISION_FLOATING = 128/8, + OS_PSEUDO_DESCRIPTOR = 6 +}; + +// Operand addressing methods from the IA-32 manual. The enAmMask value +// is a mask for the rest. The other enumeration values are named for the +// names given to the addressing methods in the manual, e.g. enAm_D is for +// the D addressing method. +// +// The reason we use a full 4 bytes and a mask, is that we need to combine +// these flags with the enOperandType to store the details +// on the operand in a single integer. +enum AddressingMethod { + AM_NOT_USED = 0, // This operand is not used for this instruction + AM_MASK = 0x00FF0000, // Mask for the rest of the values in this enumeration + AM_A = 0x00010000, // A addressing type + AM_C = 0x00020000, // C addressing type + AM_D = 0x00030000, // D addressing type + AM_E = 0x00040000, // E addressing type + AM_F = 0x00050000, // F addressing type + AM_G = 0x00060000, // G addressing type + AM_I = 0x00070000, // I addressing type + AM_J = 0x00080000, // J addressing type + AM_M = 0x00090000, // M addressing type + AM_O = 0x000A0000, // O addressing type + AM_P = 0x000B0000, // P addressing type + AM_Q = 0x000C0000, // Q addressing type + AM_R = 0x000D0000, // R addressing type + AM_S = 0x000E0000, // S addressing type + AM_T = 0x000F0000, // T addressing type + AM_V = 0x00100000, // V addressing type + AM_W = 0x00110000, // W addressing type + AM_X = 0x00120000, // X addressing type + AM_Y = 0x00130000, // Y addressing type + AM_REGISTER = 0x00140000, // Specific register is always used as this op + AM_IMPLICIT = 0x00150000, // An implicit, fixed value is used +}; + +// Operand types from the IA-32 manual. The enOtMask value is +// a mask for the rest. The rest of the values are named for the +// names given to these operand types in the manual, e.g. enOt_ps +// is for the ps operand type in the manual. +// +// The reason we use a full 4 bytes and a mask, is that we need +// to combine these flags with the enAddressingMethod to store the details +// on the operand in a single integer. +enum OperandType { + OT_MASK = 0xFF000000, + OT_A = 0x01000000, + OT_B = 0x02000000, + OT_C = 0x03000000, + OT_D = 0x04000000, + OT_DQ = 0x05000000, + OT_P = 0x06000000, + OT_PI = 0x07000000, + OT_PS = 0x08000000, // actually unsupported for (we don't know its size) + OT_Q = 0x09000000, + OT_S = 0x0A000000, + OT_SS = 0x0B000000, + OT_SI = 0x0C000000, + OT_V = 0x0D000000, + OT_W = 0x0E000000, + OT_SD = 0x0F000000, // scalar double-precision floating-point value + OT_PD = 0x10000000, // double-precision floating point + // dummy "operand type" for address mode M - which doesn't specify + // operand type + OT_ADDRESS_MODE_M = 0x80000000 +}; + +// Flag that indicates if an immediate operand is 64-bits. +// +// The Intel 64 and IA-32 Architecture Software Developer's Manual currently +// defines MOV as the only instruction supporting a 64-bit immediate operand. +enum ImmediateOperandSize { + IOS_MASK = 0x0000F000, + IOS_DEFAULT = 0x0, + IOS_64 = 0x00001000 +}; + +// Everything that's in an Opcode (see below) except the three +// alternative opcode structs for different prefixes. +struct SpecificOpcode { + // Index to continuation table, or 0 if this is the last + // byte in the opcode. + int table_index_; + + // The opcode type + InstructionType type_; + + // Description of the type of the dest, src and aux operands, + // put together from enOperandType, enAddressingMethod and + // enImmediateOperandSize flags. + int flag_dest_; + int flag_source_; + int flag_aux_; + + // We indicate the mnemonic for debugging purposes + const char* mnemonic_; +}; + +// The information we keep in our tables about each of the different +// valid instructions recognized by the IA-32 architecture. +struct Opcode { + // Index to continuation table, or 0 if this is the last + // byte in the opcode. + int table_index_; + + // The opcode type + InstructionType type_; + + // Description of the type of the dest, src and aux operands, + // put together from an enOperandType flag and an enAddressingMethod + // flag. + int flag_dest_; + int flag_source_; + int flag_aux_; + + // We indicate the mnemonic for debugging purposes + const char* mnemonic_; + + // Alternative opcode info if certain prefixes are specified. + // In most cases, all of these are zeroed-out. Only used if + // bPrefixDependent is true. + bool is_prefix_dependent_; + SpecificOpcode opcode_if_f2_prefix_; + SpecificOpcode opcode_if_f3_prefix_; + SpecificOpcode opcode_if_66_prefix_; +}; + +// Information about each table entry. +struct OpcodeTable { + // Table of instruction entries + const Opcode* table_; + // How many bytes left to shift ModR/M byte <b>before</b> applying mask + unsigned char shift_; + // Mask to apply to byte being looked at before comparing to table + unsigned char mask_; + // Minimum/maximum indexes in table. + unsigned char min_lim_; + unsigned char max_lim_; +}; + +// Information about each entry in table used to decode ModR/M byte. +struct ModrmEntry { + // Is the operand encoded as bytes in the instruction (rather than + // if it's e.g. a register in which case it's just encoded in the + // ModR/M byte) + bool is_encoded_in_instruction_; + + // Is there a SIB byte? In this case we always need to decode it. + bool use_sib_byte_; + + // What is the size of the operand (only important if it's encoded + // in the instruction)? + OperandSize operand_size_; +}; + +}; // namespace sidestep + +#endif // GOOGLE_PERFTOOLS_MINI_DISASSEMBLER_TYPES_H_ diff --git a/src/third_party/gperftools-2.5/src/windows/nm-pdb.c b/src/third_party/gperftools-2.5/src/windows/nm-pdb.c new file mode 100644 index 00000000000..95a080d6859 --- /dev/null +++ b/src/third_party/gperftools-2.5/src/windows/nm-pdb.c @@ -0,0 +1,273 @@ +/* Copyright (c) 2008, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: David Vitek + * + * Dump function addresses using Microsoft debug symbols. This works + * on PDB files. Note that this program will download symbols to + * c:\websymbols without asking. + */ + +#define WIN32_LEAN_AND_MEAN +#define _CRT_SECURE_NO_WARNINGS +#define _CRT_SECURE_NO_DEPRECATE + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> // for _strdup + +#include <windows.h> +#include <dbghelp.h> + +// Unfortunately, there is no versioning info in dbghelp.h so I can +// tell whether it has an old-style (circa VC7.1) IMAGEHLP_MODULE64 +// struct, with only a few fields, or a new-style (circa VC8) +// IMAGEHLP_MODULE64, with lots of fields. These fields are just used +// for debugging, so it's fine to just assume the smaller struct, but +// for most people, using a modern MSVC, the full struct is available. +// If you are one of those people and would like this extra debugging +// info, you can uncomment the line below. +//#define VC8_OR_ABOVE + +#define SEARCH_CAP (1024*1024) +#define WEBSYM "SRV*c:\\websymbols*http://msdl.microsoft.com/download/symbols" + +typedef struct { + char *name; + ULONG64 addr; + ULONG flags; +} SYM; + +typedef struct { + ULONG64 module_base; + SYM *syms; + DWORD syms_len; + DWORD syms_cap; +} SYM_CONTEXT; + +static int sym_cmp(const void *_s1, const void *_s2) { + const SYM *s1 = (const SYM *)_s1; + const SYM *s2 = (const SYM *)_s2; + + if (s1->addr < s2->addr) + return -1; + if (s1->addr > s2->addr) + return 1; + return 0; +} + +static BOOL CALLBACK EnumSymProc(PSYMBOL_INFO symbol_info, + ULONG symbol_size, + PVOID user_context) { + SYM_CONTEXT *ctx = (SYM_CONTEXT*)user_context; + if (symbol_info->Address < ctx->module_base || + (symbol_info->Flags & SYMFLAG_TLSREL)) { + return TRUE; + } + if (ctx->syms_len == ctx->syms_cap) { + if (!ctx->syms_cap) + ctx->syms_cap++; + ctx->syms_cap *= 2; + ctx->syms = realloc(ctx->syms, sizeof(ctx->syms[0]) * ctx->syms_cap); + } + ctx->syms[ctx->syms_len].name = _strdup(symbol_info->Name); + ctx->syms[ctx->syms_len].addr = symbol_info->Address; + ctx->syms[ctx->syms_len].flags = symbol_info->Flags; + ctx->syms_len++; + return TRUE; +} + +static void MaybePrint(const char* var, const char* description) { + if (var[0]) + printf("%s: %s\n", description, var); +} + +static void PrintAvailability(BOOL var, const char *description) { + printf("%s: %s\n", description, (var ? "Available" : "Not available")); +} + +static void ShowSymbolInfo(HANDLE process, ULONG64 module_base) { + /* Get module information. */ + IMAGEHLP_MODULE64 module_info; + BOOL getmoduleinfo_rv; + printf("Load Address: %I64x\n", module_base); + memset(&module_info, 0, sizeof(module_info)); + module_info.SizeOfStruct = sizeof(module_info); + getmoduleinfo_rv = SymGetModuleInfo64(process, module_base, &module_info); + if (!getmoduleinfo_rv) { + printf("Error: SymGetModuleInfo64() failed. Error code: %u\n", + GetLastError()); + return; + } + /* Display information about symbols, based on kind of symbol. */ + switch (module_info.SymType) { + case SymNone: + printf(("No symbols available for the module.\n")); + break; + case SymExport: + printf(("Loaded symbols: Exports\n")); + break; + case SymCoff: + printf(("Loaded symbols: COFF\n")); + break; + case SymCv: + printf(("Loaded symbols: CodeView\n")); + break; + case SymSym: + printf(("Loaded symbols: SYM\n")); + break; + case SymVirtual: + printf(("Loaded symbols: Virtual\n")); + break; + case SymPdb: + printf(("Loaded symbols: PDB\n")); + break; + case SymDia: + printf(("Loaded symbols: DIA\n")); + break; + case SymDeferred: + printf(("Loaded symbols: Deferred\n")); /* not actually loaded */ + break; + default: + printf(("Loaded symbols: Unknown format.\n")); + break; + } + + MaybePrint("Image name", module_info.ImageName); + MaybePrint("Loaded image name", module_info.LoadedImageName); +#ifdef VC8_OR_ABOVE /* TODO(csilvers): figure out how to tell */ + MaybePrint("PDB file name", module_info.LoadedPdbName); + if (module_info.PdbUnmatched || module_info.DbgUnmatched) { + /* This can only happen if the debug information is contained in a + * separate file (.DBG or .PDB) + */ + printf(("Warning: Unmatched symbols.\n")); + } +#endif + + /* Contents */ +#ifdef VC8_OR_ABOVE /* TODO(csilvers): figure out how to tell */ + PrintAvailability("Line numbers", module_info.LineNumbers); + PrintAvailability("Global symbols", module_info.GlobalSymbols); + PrintAvailability("Type information", module_info.TypeInfo); +#endif +} + +void usage() { + fprintf(stderr, "usage: nm-pdb [-C|--demangle] <module or filename>\n"); +} + +int main(int argc, char *argv[]) { + DWORD error; + HANDLE process; + ULONG64 module_base; + SYM_CONTEXT ctx; + int i; + char* search; + char* filename = NULL; + int rv = 0; + /* We may add SYMOPT_UNDNAME if --demangle is specified: */ + DWORD symopts = SYMOPT_DEFERRED_LOADS | SYMOPT_DEBUG; + + for (i = 1; i < argc; i++) { + if (strcmp(argv[i], "--demangle") == 0 || strcmp(argv[i], "-C") == 0) { + symopts |= SYMOPT_UNDNAME; + } else if (strcmp(argv[i], "--help") == 0) { + usage(); + exit(0); + } else { + break; + } + } + if (i != argc - 1) { + usage(); + exit(1); + } + filename = argv[i]; + + process = GetCurrentProcess(); + + if (!SymInitialize(process, NULL, FALSE)) { + error = GetLastError(); + fprintf(stderr, "SymInitialize returned error : %d\n", error); + return 1; + } + + search = malloc(SEARCH_CAP); + if (SymGetSearchPath(process, search, SEARCH_CAP)) { + if (strlen(search) + sizeof(";" WEBSYM) > SEARCH_CAP) { + fprintf(stderr, "Search path too long\n"); + SymCleanup(process); + return 1; + } + strcat(search, ";" WEBSYM); + } else { + error = GetLastError(); + fprintf(stderr, "SymGetSearchPath returned error : %d\n", error); + rv = 1; /* An error, but not a fatal one */ + strcpy(search, WEBSYM); /* Use a default value */ + } + if (!SymSetSearchPath(process, search)) { + error = GetLastError(); + fprintf(stderr, "SymSetSearchPath returned error : %d\n", error); + rv = 1; /* An error, but not a fatal one */ + } + + SymSetOptions(symopts); + module_base = SymLoadModuleEx(process, NULL, filename, NULL, 0, 0, NULL, 0); + if (!module_base) { + /* SymLoadModuleEx failed */ + error = GetLastError(); + fprintf(stderr, "SymLoadModuleEx returned error : %d for %s\n", + error, filename); + SymCleanup(process); + return 1; + } + + ShowSymbolInfo(process, module_base); + + memset(&ctx, 0, sizeof(ctx)); + ctx.module_base = module_base; + if (!SymEnumSymbols(process, module_base, NULL, EnumSymProc, &ctx)) { + error = GetLastError(); + fprintf(stderr, "SymEnumSymbols returned error: %d\n", error); + rv = 1; + } else { + DWORD j; + qsort(ctx.syms, ctx.syms_len, sizeof(ctx.syms[0]), sym_cmp); + for (j = 0; j < ctx.syms_len; j++) { + printf("%016I64x X %s\n", ctx.syms[j].addr, ctx.syms[j].name); + } + /* In a perfect world, maybe we'd clean up ctx's memory? */ + } + SymUnloadModule64(process, module_base); + SymCleanup(process); + return rv; +} diff --git a/src/third_party/gperftools-2.5/src/windows/override_functions.cc b/src/third_party/gperftools-2.5/src/windows/override_functions.cc new file mode 100644 index 00000000000..e7917d3a7b6 --- /dev/null +++ b/src/third_party/gperftools-2.5/src/windows/override_functions.cc @@ -0,0 +1,123 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2007, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// --- +// Author: Mike Belshe +// +// To link tcmalloc into a EXE or DLL statically without using the patching +// facility, we can take a stock libcmt and remove all the allocator functions. +// When we relink the EXE/DLL with the modified libcmt and tcmalloc, a few +// functions are missing. This file contains the additional overrides which +// are required in the VS2005 libcmt in order to link the modified libcmt. +// +// See also +// http://groups.google.com/group/google-perftools/browse_thread/thread/41cd3710af85e57b + +#include <config.h> + +#ifndef _WIN32 +# error You should only be including this file in a windows environment! +#endif + +#ifndef WIN32_OVERRIDE_ALLOCATORS +# error This file is intended for use when overriding allocators +#endif + +#include "tcmalloc.cc" + +extern "C" void* _recalloc(void* p, size_t n, size_t size) { + void* result = realloc(p, n * size); + memset(result, 0, n * size); + return result; +} + +extern "C" void* _calloc_impl(size_t n, size_t size) { + return calloc(n, size); +} + +extern "C" size_t _msize(void* p) { + return MallocExtension::instance()->GetAllocatedSize(p); +} + +extern "C" intptr_t _get_heap_handle() { + return 0; +} + +// The CRT heap initialization stub. +extern "C" int _heap_init() { + // We intentionally leak this object. It lasts for the process + // lifetime. Trying to teardown at _heap_term() is so late that + // you can't do anything useful anyway. + new TCMallocGuard(); + return 1; +} + +// The CRT heap cleanup stub. +extern "C" void _heap_term() { +} + +extern "C" int _set_new_mode(int flag) { + return tc_set_new_mode(flag); +} + +#ifndef NDEBUG +#undef malloc +#undef free +#undef calloc +int _CrtDbgReport(int, const char*, int, const char*, const char*, ...) { + return 0; +} + +int _CrtDbgReportW(int, const wchar_t*, int, const wchar_t*, const wchar_t*, ...) { + return 0; +} + +int _CrtSetReportMode(int, int) { + return 0; +} + +extern "C" void* _malloc_dbg(size_t size, int , const char*, int) { + return malloc(size); +} + +extern "C" void _free_dbg(void* ptr, int) { + free(ptr); +} + +extern "C" void* _calloc_dbg(size_t n, size_t size, int, const char*, int) { + return calloc(n, size); +} +#endif // NDEBUG + +// We set this to 1 because part of the CRT uses a check of _crtheap != 0 +// to test whether the CRT has been initialized. Once we've ripped out +// the allocators from libcmt, we need to provide this definition so that +// the rest of the CRT is still usable. +extern "C" void* _crtheap = reinterpret_cast<void*>(1); diff --git a/src/third_party/gperftools-2.5/src/windows/patch_functions.cc b/src/third_party/gperftools-2.5/src/windows/patch_functions.cc new file mode 100644 index 00000000000..70771d2911b --- /dev/null +++ b/src/third_party/gperftools-2.5/src/windows/patch_functions.cc @@ -0,0 +1,1088 @@ +// Copyright (c) 2007, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// --- +// Author: Craig Silverstein +// +// The main purpose of this file is to patch the libc allocation +// routines (malloc and friends, but also _msize and other +// windows-specific libc-style routines). However, we also patch +// windows routines to do accounting. We do better at the former than +// the latter. Here are some comments from Paul Pluzhnikov about what +// it might take to do a really good job patching windows routines to +// keep track of memory usage: +// +// "You should intercept at least the following: +// HeapCreate HeapDestroy HeapAlloc HeapReAlloc HeapFree +// RtlCreateHeap RtlDestroyHeap RtlAllocateHeap RtlFreeHeap +// malloc calloc realloc free +// malloc_dbg calloc_dbg realloc_dbg free_dbg +// Some of these call the other ones (but not always), sometimes +// recursively (i.e. HeapCreate may call HeapAlloc on a different +// heap, IIRC)." +// +// Since Paul didn't mention VirtualAllocEx, he may not have even been +// considering all the mmap-like functions that windows has (or he may +// just be ignoring it because he's seen we already patch it). Of the +// above, we do not patch the *_dbg functions, and of the windows +// functions, we only patch HeapAlloc and HeapFree. +// +// The *_dbg functions come into play with /MDd, /MTd, and /MLd, +// probably. It may be ok to just turn off tcmalloc in those cases -- +// if the user wants the windows debug malloc, they probably don't +// want tcmalloc! We should also test with all of /MD, /MT, and /ML, +// which we're not currently doing. + +// TODO(csilvers): try to do better here? Paul does conclude: +// "Keeping track of all of this was a nightmare." + +#ifndef _WIN32 +# error You should only be including windows/patch_functions.cc in a windows environment! +#endif + +#include <config.h> + +#ifdef WIN32_OVERRIDE_ALLOCATORS +#error This file is intended for patching allocators - use override_functions.cc instead. +#endif + +// We use psapi. Non-MSVC systems will have to link this in themselves. +#ifdef _MSC_VER +#pragma comment(lib, "Psapi.lib") +#endif + +// Make sure we always use the 'old' names of the psapi functions. +#ifndef PSAPI_VERSION +#define PSAPI_VERSION 1 +#endif + +#include <windows.h> +#include <stdio.h> +#include <malloc.h> // for _msize and _expand +#include <psapi.h> // for EnumProcessModules, GetModuleInformation, etc. +#include <set> +#include <map> +#include <vector> +#include <base/logging.h> +#include "base/spinlock.h" +#include "gperftools/malloc_hook.h" +#include "malloc_hook-inl.h" +#include "preamble_patcher.h" + +// The maximum number of modules we allow to be in one executable +const int kMaxModules = 8182; + +// These are hard-coded, unfortunately. :-( They are also probably +// compiler specific. See get_mangled_names.cc, in this directory, +// for instructions on how to update these names for your compiler. +#ifdef _WIN64 +const char kMangledNew[] = "??2@YAPEAX_K@Z"; +const char kMangledNewArray[] = "??_U@YAPEAX_K@Z"; +const char kMangledDelete[] = "??3@YAXPEAX@Z"; +const char kMangledDeleteArray[] = "??_V@YAXPEAX@Z"; +const char kMangledNewNothrow[] = "??2@YAPEAX_KAEBUnothrow_t@std@@@Z"; +const char kMangledNewArrayNothrow[] = "??_U@YAPEAX_KAEBUnothrow_t@std@@@Z"; +const char kMangledDeleteNothrow[] = "??3@YAXPEAXAEBUnothrow_t@std@@@Z"; +const char kMangledDeleteArrayNothrow[] = "??_V@YAXPEAXAEBUnothrow_t@std@@@Z"; +#else +const char kMangledNew[] = "??2@YAPAXI@Z"; +const char kMangledNewArray[] = "??_U@YAPAXI@Z"; +const char kMangledDelete[] = "??3@YAXPAX@Z"; +const char kMangledDeleteArray[] = "??_V@YAXPAX@Z"; +const char kMangledNewNothrow[] = "??2@YAPAXIABUnothrow_t@std@@@Z"; +const char kMangledNewArrayNothrow[] = "??_U@YAPAXIABUnothrow_t@std@@@Z"; +const char kMangledDeleteNothrow[] = "??3@YAXPAXABUnothrow_t@std@@@Z"; +const char kMangledDeleteArrayNothrow[] = "??_V@YAXPAXABUnothrow_t@std@@@Z"; +#endif + +// This is an unused but exported symbol that we can use to tell the +// MSVC linker to bring in libtcmalloc, via the /INCLUDE linker flag. +// Without this, the linker will likely decide that libtcmalloc.dll +// doesn't add anything to the executable (since it does all its work +// through patching, which the linker can't see), and ignore it +// entirely. (The name 'tcmalloc' is already reserved for a +// namespace. I'd rather export a variable named "_tcmalloc", but I +// couldn't figure out how to get that to work. This function exports +// the symbol "__tcmalloc".) +extern "C" PERFTOOLS_DLL_DECL void _tcmalloc(); +void _tcmalloc() { } + +// This is the version needed for windows x64, which has a different +// decoration scheme which doesn't auto-add a leading underscore. +extern "C" PERFTOOLS_DLL_DECL void __tcmalloc(); +void __tcmalloc() { } + +namespace { // most everything here is in an unnamed namespace + +typedef void (*GenericFnPtr)(); + +using sidestep::PreamblePatcher; + +struct ModuleEntryCopy; // defined below + +// These functions are how we override the memory allocation +// functions, just like tcmalloc.cc and malloc_hook.cc do. + +// This is information about the routines we're patching, for a given +// module that implements libc memory routines. A single executable +// can have several libc implementations running about (in different +// .dll's), and we need to patch/unpatch them all. This defines +// everything except the new functions we're patching in, which +// are defined in LibcFunctions, below. +class LibcInfo { + public: + LibcInfo() { + memset(this, 0, sizeof(*this)); // easiest way to initialize the array + } + + bool patched() const { return is_valid(); } + void set_is_valid(bool b) { is_valid_ = b; } + // According to http://msdn.microsoft.com/en-us/library/ms684229(VS.85).aspx: + // "The load address of a module (lpBaseOfDll) is the same as the HMODULE + // value." + HMODULE hmodule() const { + return reinterpret_cast<HMODULE>(const_cast<void*>(module_base_address_)); + } + + // Populates all the windows_fn_[] vars based on our module info. + // Returns false if windows_fn_ is all NULL's, because there's + // nothing to patch. Also populates the rest of the module_entry + // info, such as the module's name. + bool PopulateWindowsFn(const ModuleEntryCopy& module_entry); + + protected: + void CopyFrom(const LibcInfo& that) { + if (this == &that) + return; + this->is_valid_ = that.is_valid_; + memcpy(this->windows_fn_, that.windows_fn_, sizeof(windows_fn_)); + this->module_base_address_ = that.module_base_address_; + this->module_base_size_ = that.module_base_size_; + } + + enum { + kMalloc, kFree, kRealloc, kCalloc, + kNew, kNewArray, kDelete, kDeleteArray, + kNewNothrow, kNewArrayNothrow, kDeleteNothrow, kDeleteArrayNothrow, + // These are windows-only functions from malloc.h + k_Msize, k_Expand, + // A MS CRT "internal" function, implemented using _calloc_impl + k_CallocCrt, + kNumFunctions + }; + + // I'd like to put these together in a struct (perhaps in the + // subclass, so we can put in perftools_fn_ as well), but vc8 seems + // to have a bug where it doesn't initialize the struct properly if + // we try to take the address of a function that's not yet loaded + // from a dll, as is the common case for static_fn_. So we need + // each to be in its own array. :-( + static const char* const function_name_[kNumFunctions]; + + // This function is only used when statically linking the binary. + // In that case, loading malloc/etc from the dll (via + // PatchOneModule) won't work, since there are no dlls. Instead, + // you just want to be taking the address of malloc/etc directly. + // In the common, non-static-link case, these pointers will all be + // NULL, since this initializer runs before msvcrt.dll is loaded. + static const GenericFnPtr static_fn_[kNumFunctions]; + + // This is the address of the function we are going to patch + // (malloc, etc). Other info about the function is in the + // patch-specific subclasses, below. + GenericFnPtr windows_fn_[kNumFunctions]; + + // This is set to true when this structure is initialized (because + // we're patching a new library) and set to false when it's + // uninitialized (because we've freed that library). + bool is_valid_; + + const void *module_base_address_; + size_t module_base_size_; + + public: + // These shouldn't have to be public, since only subclasses of + // LibcInfo need it, but they do. Maybe something to do with + // templates. Shrug. I hide them down here so users won't see + // them. :-) (OK, I also need to define ctrgProcAddress late.) + bool is_valid() const { return is_valid_; } + GenericFnPtr windows_fn(int ifunction) const { + return windows_fn_[ifunction]; + } + // These three are needed by ModuleEntryCopy. + static const int ctrgProcAddress = kNumFunctions; + static GenericFnPtr static_fn(int ifunction) { + return static_fn_[ifunction]; + } + static const char* const function_name(int ifunction) { + return function_name_[ifunction]; + } +}; + +// Template trickiness: logically, a LibcInfo would include +// Windows_malloc_, origstub_malloc_, and Perftools_malloc_: for a +// given module, these three go together. And in fact, +// Perftools_malloc_ may need to call origstub_malloc_, which means we +// either need to change Perftools_malloc_ to take origstub_malloc_ as +// an argument -- unfortunately impossible since it needs to keep the +// same API as normal malloc -- or we need to write a different +// version of Perftools_malloc_ for each LibcInfo instance we create. +// We choose the second route, and use templates to implement it (we +// could have also used macros). So to get multiple versions +// of the struct, we say "struct<1> var1; struct<2> var2;". The price +// we pay is some code duplication, and more annoying, each instance +// of this var is a separate type. +template<int> class LibcInfoWithPatchFunctions : public LibcInfo { + public: + // me_info should have had PopulateWindowsFn() called on it, so the + // module_* vars and windows_fn_ are set up. + bool Patch(const LibcInfo& me_info); + void Unpatch(); + + private: + // This holds the original function contents after we patch the function. + // This has to be defined static in the subclass, because the perftools_fns + // reference origstub_fn_. + static GenericFnPtr origstub_fn_[kNumFunctions]; + + // This is the function we want to patch in + static const GenericFnPtr perftools_fn_[kNumFunctions]; + + static void* Perftools_malloc(size_t size) __THROW; + static void Perftools_free(void* ptr) __THROW; + static void* Perftools_realloc(void* ptr, size_t size) __THROW; + static void* Perftools_calloc(size_t nmemb, size_t size) __THROW; + static void* Perftools_new(size_t size); + static void* Perftools_newarray(size_t size); + static void Perftools_delete(void *ptr); + static void Perftools_deletearray(void *ptr); + static void* Perftools_new_nothrow(size_t size, + const std::nothrow_t&) __THROW; + static void* Perftools_newarray_nothrow(size_t size, + const std::nothrow_t&) __THROW; + static void Perftools_delete_nothrow(void *ptr, + const std::nothrow_t&) __THROW; + static void Perftools_deletearray_nothrow(void *ptr, + const std::nothrow_t&) __THROW; + static size_t Perftools__msize(void *ptr) __THROW; + static void* Perftools__expand(void *ptr, size_t size) __THROW; + // malloc.h also defines these functions: + // _aligned_malloc, _aligned_free, + // _recalloc, _aligned_offset_malloc, _aligned_realloc, _aligned_recalloc + // _aligned_offset_realloc, _aligned_offset_recalloc, _malloca, _freea + // But they seem pretty obscure, and I'm fine not overriding them for now. + // It may be they all call into malloc/free anyway. +}; + +// This is a subset of MODDULEENTRY32, that we need for patching. +struct ModuleEntryCopy { + LPVOID modBaseAddr; // the same as hmodule + DWORD modBaseSize; + // This is not part of MODDULEENTRY32, but is needed to avoid making + // windows syscalls while we're holding patch_all_modules_lock (see + // lock-inversion comments at patch_all_modules_lock definition, below). + GenericFnPtr rgProcAddresses[LibcInfo::ctrgProcAddress]; + + ModuleEntryCopy() { + modBaseAddr = NULL; + modBaseSize = 0; + for (int i = 0; i < sizeof(rgProcAddresses)/sizeof(*rgProcAddresses); i++) + rgProcAddresses[i] = LibcInfo::static_fn(i); + } + ModuleEntryCopy(const MODULEINFO& mi) { + this->modBaseAddr = mi.lpBaseOfDll; + this->modBaseSize = mi.SizeOfImage; + LPVOID modEndAddr = (char*)mi.lpBaseOfDll + mi.SizeOfImage; + for (int i = 0; i < sizeof(rgProcAddresses)/sizeof(*rgProcAddresses); i++) { + FARPROC target = ::GetProcAddress( + reinterpret_cast<const HMODULE>(mi.lpBaseOfDll), + LibcInfo::function_name(i)); + // Sometimes a DLL forwards a function to a function in another + // DLL. We don't want to patch those forwarded functions -- + // they'll get patched when the other DLL is processed. + if (target >= modBaseAddr && target < modEndAddr) + rgProcAddresses[i] = (GenericFnPtr)target; + else + rgProcAddresses[i] = (GenericFnPtr)NULL; + } + } +}; + +// This class is easier because there's only one of them. +class WindowsInfo { + public: + void Patch(); + void Unpatch(); + + private: + // TODO(csilvers): should we be patching GlobalAlloc/LocalAlloc instead, + // for pre-XP systems? + enum { + kHeapAlloc, kHeapFree, kVirtualAllocEx, kVirtualFreeEx, + kMapViewOfFileEx, kUnmapViewOfFile, kLoadLibraryExW, kFreeLibrary, + kNumFunctions + }; + + struct FunctionInfo { + const char* const name; // name of fn in a module (eg "malloc") + GenericFnPtr windows_fn; // the fn whose name we call (&malloc) + GenericFnPtr origstub_fn; // original fn contents after we patch + const GenericFnPtr perftools_fn; // fn we want to patch in + }; + + static FunctionInfo function_info_[kNumFunctions]; + + // A Windows-API equivalent of malloc and free + static LPVOID WINAPI Perftools_HeapAlloc(HANDLE hHeap, DWORD dwFlags, + DWORD_PTR dwBytes); + static BOOL WINAPI Perftools_HeapFree(HANDLE hHeap, DWORD dwFlags, + LPVOID lpMem); + // A Windows-API equivalent of mmap and munmap, for "anonymous regions" + static LPVOID WINAPI Perftools_VirtualAllocEx(HANDLE process, LPVOID address, + SIZE_T size, DWORD type, + DWORD protect); + static BOOL WINAPI Perftools_VirtualFreeEx(HANDLE process, LPVOID address, + SIZE_T size, DWORD type); + // A Windows-API equivalent of mmap and munmap, for actual files + static LPVOID WINAPI Perftools_MapViewOfFileEx(HANDLE hFileMappingObject, + DWORD dwDesiredAccess, + DWORD dwFileOffsetHigh, + DWORD dwFileOffsetLow, + SIZE_T dwNumberOfBytesToMap, + LPVOID lpBaseAddress); + static BOOL WINAPI Perftools_UnmapViewOfFile(LPCVOID lpBaseAddress); + // We don't need the other 3 variants because they all call this one. */ + static HMODULE WINAPI Perftools_LoadLibraryExW(LPCWSTR lpFileName, + HANDLE hFile, + DWORD dwFlags); + static BOOL WINAPI Perftools_FreeLibrary(HMODULE hLibModule); +}; + +// If you run out, just add a few more to the array. You'll also need +// to update the switch statement in PatchOneModule(), and the list in +// UnpatchWindowsFunctions(). +// main_executable and main_executable_windows are two windows into +// the same executable. One is responsible for patching the libc +// routines that live in the main executable (if any) to use tcmalloc; +// the other is responsible for patching the windows routines like +// HeapAlloc/etc to use tcmalloc. +static LibcInfoWithPatchFunctions<0> main_executable; +static LibcInfoWithPatchFunctions<1> libc1; +static LibcInfoWithPatchFunctions<2> libc2; +static LibcInfoWithPatchFunctions<3> libc3; +static LibcInfoWithPatchFunctions<4> libc4; +static LibcInfoWithPatchFunctions<5> libc5; +static LibcInfoWithPatchFunctions<6> libc6; +static LibcInfoWithPatchFunctions<7> libc7; +static LibcInfoWithPatchFunctions<8> libc8; +static LibcInfo* g_module_libcs[] = { + &libc1, &libc2, &libc3, &libc4, &libc5, &libc6, &libc7, &libc8 +}; +static WindowsInfo main_executable_windows; + +const char* const LibcInfo::function_name_[] = { + "malloc", "free", "realloc", "calloc", + kMangledNew, kMangledNewArray, kMangledDelete, kMangledDeleteArray, + // Ideally we should patch the nothrow versions of new/delete, but + // at least in msvcrt, nothrow-new machine-code is of a type we + // can't patch. Since these are relatively rare, I'm hoping it's ok + // not to patch them. (NULL name turns off patching.) + NULL, // kMangledNewNothrow, + NULL, // kMangledNewArrayNothrow, + NULL, // kMangledDeleteNothrow, + NULL, // kMangledDeleteArrayNothrow, + "_msize", "_expand", "_calloc_crt", +}; + +// For mingw, I can't patch the new/delete here, because the +// instructions are too small to patch. Luckily, they're so small +// because all they do is call into malloc/free, so they still end up +// calling tcmalloc routines, and we don't actually lose anything +// (except maybe some stacktrace goodness) by not patching. +const GenericFnPtr LibcInfo::static_fn_[] = { + (GenericFnPtr)&::malloc, + (GenericFnPtr)&::free, + (GenericFnPtr)&::realloc, + (GenericFnPtr)&::calloc, +#ifdef __MINGW32__ + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +#else + (GenericFnPtr)(void*(*)(size_t))&::operator new, + (GenericFnPtr)(void*(*)(size_t))&::operator new[], + (GenericFnPtr)(void(*)(void*))&::operator delete, + (GenericFnPtr)(void(*)(void*))&::operator delete[], + (GenericFnPtr) + (void*(*)(size_t, struct std::nothrow_t const &))&::operator new, + (GenericFnPtr) + (void*(*)(size_t, struct std::nothrow_t const &))&::operator new[], + (GenericFnPtr) + (void(*)(void*, struct std::nothrow_t const &))&::operator delete, + (GenericFnPtr) + (void(*)(void*, struct std::nothrow_t const &))&::operator delete[], +#endif + (GenericFnPtr)&::_msize, + (GenericFnPtr)&::_expand, + (GenericFnPtr)&::calloc, +}; + +template<int T> GenericFnPtr LibcInfoWithPatchFunctions<T>::origstub_fn_[] = { + // This will get filled in at run-time, as patching is done. +}; + +template<int T> +const GenericFnPtr LibcInfoWithPatchFunctions<T>::perftools_fn_[] = { + (GenericFnPtr)&Perftools_malloc, + (GenericFnPtr)&Perftools_free, + (GenericFnPtr)&Perftools_realloc, + (GenericFnPtr)&Perftools_calloc, + (GenericFnPtr)&Perftools_new, + (GenericFnPtr)&Perftools_newarray, + (GenericFnPtr)&Perftools_delete, + (GenericFnPtr)&Perftools_deletearray, + (GenericFnPtr)&Perftools_new_nothrow, + (GenericFnPtr)&Perftools_newarray_nothrow, + (GenericFnPtr)&Perftools_delete_nothrow, + (GenericFnPtr)&Perftools_deletearray_nothrow, + (GenericFnPtr)&Perftools__msize, + (GenericFnPtr)&Perftools__expand, + (GenericFnPtr)&Perftools_calloc, +}; + +/*static*/ WindowsInfo::FunctionInfo WindowsInfo::function_info_[] = { + { "HeapAlloc", NULL, NULL, (GenericFnPtr)&Perftools_HeapAlloc }, + { "HeapFree", NULL, NULL, (GenericFnPtr)&Perftools_HeapFree }, + { "VirtualAllocEx", NULL, NULL, (GenericFnPtr)&Perftools_VirtualAllocEx }, + { "VirtualFreeEx", NULL, NULL, (GenericFnPtr)&Perftools_VirtualFreeEx }, + { "MapViewOfFileEx", NULL, NULL, (GenericFnPtr)&Perftools_MapViewOfFileEx }, + { "UnmapViewOfFile", NULL, NULL, (GenericFnPtr)&Perftools_UnmapViewOfFile }, + { "LoadLibraryExW", NULL, NULL, (GenericFnPtr)&Perftools_LoadLibraryExW }, + { "FreeLibrary", NULL, NULL, (GenericFnPtr)&Perftools_FreeLibrary }, +}; + +bool LibcInfo::PopulateWindowsFn(const ModuleEntryCopy& module_entry) { + // First, store the location of the function to patch before + // patching it. If none of these functions are found in the module, + // then this module has no libc in it, and we just return false. + for (int i = 0; i < kNumFunctions; i++) { + if (!function_name_[i]) // we can turn off patching by unsetting name + continue; + // The ::GetProcAddress calls were done in the ModuleEntryCopy + // constructor, so we don't have to make any windows calls here. + const GenericFnPtr fn = module_entry.rgProcAddresses[i]; + if (fn) { + windows_fn_[i] = PreamblePatcher::ResolveTarget(fn); + } + } + + // Some modules use the same function pointer for new and new[]. If + // we find that, set one of the pointers to NULL so we don't double- + // patch. Same may happen with new and nothrow-new, or even new[] + // and nothrow-new. It's easiest just to check each fn-ptr against + // every other. + for (int i = 0; i < kNumFunctions; i++) { + for (int j = i+1; j < kNumFunctions; j++) { + if (windows_fn_[i] == windows_fn_[j]) { + // We NULL the later one (j), so as to minimize the chances we + // NULL kFree and kRealloc. See comments below. This is fragile! + windows_fn_[j] = NULL; + } + } + } + + // There's always a chance that our module uses the same function + // as another module that we've already loaded. In that case, we + // need to set our windows_fn to NULL, to avoid double-patching. + for (int ifn = 0; ifn < kNumFunctions; ifn++) { + for (int imod = 0; + imod < sizeof(g_module_libcs)/sizeof(*g_module_libcs); imod++) { + if (g_module_libcs[imod]->is_valid() && + this->windows_fn(ifn) == g_module_libcs[imod]->windows_fn(ifn)) { + windows_fn_[ifn] = NULL; + } + } + } + + bool found_non_null = false; + for (int i = 0; i < kNumFunctions; i++) { + if (windows_fn_[i]) + found_non_null = true; + } + if (!found_non_null) + return false; + + // It's important we didn't NULL out windows_fn_[kFree] or [kRealloc]. + // The reason is, if those are NULL-ed out, we'll never patch them + // and thus never get an origstub_fn_ value for them, and when we + // try to call origstub_fn_[kFree/kRealloc] in Perftools_free and + // Perftools_realloc, below, it will fail. We could work around + // that by adding a pointer from one patch-unit to the other, but we + // haven't needed to yet. + CHECK(windows_fn_[kFree]); + CHECK(windows_fn_[kRealloc]); + + // OK, we successfully populated. Let's store our member information. + module_base_address_ = module_entry.modBaseAddr; + module_base_size_ = module_entry.modBaseSize; + return true; +} + +template<int T> +bool LibcInfoWithPatchFunctions<T>::Patch(const LibcInfo& me_info) { + CopyFrom(me_info); // copies the module_entry and the windows_fn_ array + for (int i = 0; i < kNumFunctions; i++) { + if (windows_fn_[i] && windows_fn_[i] != perftools_fn_[i]) { + // if origstub_fn_ is not NULL, it's left around from a previous + // patch. We need to set it to NULL for the new Patch call. + // + // Note that origstub_fn_ was logically freed by + // PreamblePatcher::Unpatch, so we don't have to do anything + // about it. + origstub_fn_[i] = NULL; // Patch() will fill this in + CHECK_EQ(sidestep::SIDESTEP_SUCCESS, + PreamblePatcher::Patch(windows_fn_[i], perftools_fn_[i], + &origstub_fn_[i])); + } + } + set_is_valid(true); + return true; +} + +template<int T> +void LibcInfoWithPatchFunctions<T>::Unpatch() { + // We have to cast our GenericFnPtrs to void* for unpatch. This is + // contra the C++ spec; we use C-style casts to empahsize that. + for (int i = 0; i < kNumFunctions; i++) { + if (windows_fn_[i]) + CHECK_EQ(sidestep::SIDESTEP_SUCCESS, + PreamblePatcher::Unpatch((void*)windows_fn_[i], + (void*)perftools_fn_[i], + (void*)origstub_fn_[i])); + } + set_is_valid(false); +} + +void WindowsInfo::Patch() { + HMODULE hkernel32 = ::GetModuleHandleA("kernel32"); + CHECK_NE(hkernel32, NULL); + + // Unlike for libc, we know these exist in our module, so we can get + // and patch at the same time. + for (int i = 0; i < kNumFunctions; i++) { + function_info_[i].windows_fn = (GenericFnPtr) + ::GetProcAddress(hkernel32, function_info_[i].name); + // If origstub_fn is not NULL, it's left around from a previous + // patch. We need to set it to NULL for the new Patch call. + // Since we've patched Unpatch() not to delete origstub_fn_ (it + // causes problems in some contexts, though obviously not this + // one), we should delete it now, before setting it to NULL. + // NOTE: casting from a function to a pointer is contra the C++ + // spec. It's not safe on IA64, but is on i386. We use + // a C-style cast here to emphasize this is not legal C++. + delete[] (char*)(function_info_[i].origstub_fn); + function_info_[i].origstub_fn = NULL; // Patch() will fill this in + CHECK_EQ(sidestep::SIDESTEP_SUCCESS, + PreamblePatcher::Patch(function_info_[i].windows_fn, + function_info_[i].perftools_fn, + &function_info_[i].origstub_fn)); + } +} + +void WindowsInfo::Unpatch() { + // We have to cast our GenericFnPtrs to void* for unpatch. This is + // contra the C++ spec; we use C-style casts to empahsize that. + for (int i = 0; i < kNumFunctions; i++) { + CHECK_EQ(sidestep::SIDESTEP_SUCCESS, + PreamblePatcher::Unpatch((void*)function_info_[i].windows_fn, + (void*)function_info_[i].perftools_fn, + (void*)function_info_[i].origstub_fn)); + } +} + +// You should hold the patch_all_modules_lock when calling this. +void PatchOneModuleLocked(const LibcInfo& me_info) { + // If we don't already have info on this module, let's add it. This + // is where we're sad that each libcX has a different type, so we + // can't use an array; instead, we have to use a switch statement. + // Patch() returns false if there were no libc functions in the module. + for (int i = 0; i < sizeof(g_module_libcs)/sizeof(*g_module_libcs); i++) { + if (!g_module_libcs[i]->is_valid()) { // found an empty spot to add! + switch (i) { + case 0: libc1.Patch(me_info); return; + case 1: libc2.Patch(me_info); return; + case 2: libc3.Patch(me_info); return; + case 3: libc4.Patch(me_info); return; + case 4: libc5.Patch(me_info); return; + case 5: libc6.Patch(me_info); return; + case 6: libc7.Patch(me_info); return; + case 7: libc8.Patch(me_info); return; + } + } + } + printf("PERFTOOLS ERROR: Too many modules containing libc in this executable\n"); +} + +void PatchMainExecutableLocked() { + if (main_executable.patched()) + return; // main executable has already been patched + ModuleEntryCopy fake_module_entry; // make a fake one to pass into Patch() + // No need to call PopulateModuleEntryProcAddresses on the main executable. + main_executable.PopulateWindowsFn(fake_module_entry); + main_executable.Patch(main_executable); +} + +// This lock is subject to a subtle and annoying lock inversion +// problem: it may interact badly with unknown internal windows locks. +// In particular, windows may be holding a lock when it calls +// LoadLibraryExW and FreeLibrary, which we've patched. We have those +// routines call PatchAllModules, which acquires this lock. If we +// make windows system calls while holding this lock, those system +// calls may need the internal windows locks that are being held in +// the call to LoadLibraryExW, resulting in deadlock. The solution is +// to be very careful not to call *any* windows routines while holding +// patch_all_modules_lock, inside PatchAllModules(). +static SpinLock patch_all_modules_lock(SpinLock::LINKER_INITIALIZED); + +// last_loaded: The set of modules that were loaded the last time +// PatchAllModules was called. This is an optimization for only +// looking at modules that were added or removed from the last call. +static std::set<HMODULE> *g_last_loaded; + +// Iterates over all the modules currently loaded by the executable, +// according to windows, and makes sure they're all patched. Most +// modules will already be in loaded_modules, meaning we have already +// loaded and either patched them or determined they did not need to +// be patched. Others will not, which means we need to patch them +// (if necessary). Finally, we have to go through the existing +// g_module_libcs and see if any of those are *not* in the modules +// currently loaded by the executable. If so, we need to invalidate +// them. Returns true if we did any work (patching or invalidating), +// false if we were a noop. May update loaded_modules as well. +// NOTE: you must hold the patch_all_modules_lock to access loaded_modules. +bool PatchAllModules() { + std::vector<ModuleEntryCopy> modules; + bool made_changes = false; + + const HANDLE hCurrentProcess = GetCurrentProcess(); + DWORD num_modules = 0; + HMODULE hModules[kMaxModules]; // max # of modules we support in one process + if (!::EnumProcessModules(hCurrentProcess, hModules, sizeof(hModules), + &num_modules)) { + num_modules = 0; + } + // EnumProcessModules actually set the bytes written into hModules, + // so we need to divide to make num_modules actually be a module-count. + num_modules /= sizeof(*hModules); + if (num_modules >= kMaxModules) { + printf("PERFTOOLS ERROR: Too many modules in this executable to try" + " to patch them all (if you need to, raise kMaxModules in" + " patch_functions.cc).\n"); + num_modules = kMaxModules; + } + + // Now we handle the unpatching of modules we have in g_module_libcs + // but that were not found in EnumProcessModules. We need to + // invalidate them. To speed that up, we store the EnumProcessModules + // output in a set. + // At the same time, we prepare for the adding of new modules, by + // removing from hModules all the modules we know we've already + // patched (or decided don't need to be patched). At the end, + // hModules will hold only the modules that we need to consider patching. + std::set<HMODULE> currently_loaded_modules; + { + SpinLockHolder h(&patch_all_modules_lock); + if (!g_last_loaded) g_last_loaded = new std::set<HMODULE>; + // At the end of this loop, currently_loaded_modules contains the + // full list of EnumProcessModules, and hModules just the ones we + // haven't handled yet. + for (int i = 0; i < num_modules; ) { + currently_loaded_modules.insert(hModules[i]); + if (g_last_loaded->count(hModules[i]) > 0) { + hModules[i] = hModules[--num_modules]; // replace element i with tail + } else { + i++; // keep element i + } + } + // Now we do the unpatching/invalidation. + for (int i = 0; i < sizeof(g_module_libcs)/sizeof(*g_module_libcs); i++) { + if (g_module_libcs[i]->patched() && + currently_loaded_modules.count(g_module_libcs[i]->hmodule()) == 0) { + // Means g_module_libcs[i] is no longer loaded (no me32 matched). + // We could call Unpatch() here, but why bother? The module + // has gone away, so nobody is going to call into it anyway. + g_module_libcs[i]->set_is_valid(false); + made_changes = true; + } + } + // Update the loaded module cache. + g_last_loaded->swap(currently_loaded_modules); + } + + // Now that we know what modules are new, let's get the info we'll + // need to patch them. Note this *cannot* be done while holding the + // lock, since it needs to make windows calls (see the lock-inversion + // comments before the definition of patch_all_modules_lock). + MODULEINFO mi; + for (int i = 0; i < num_modules; i++) { + if (::GetModuleInformation(hCurrentProcess, hModules[i], &mi, sizeof(mi))) + modules.push_back(ModuleEntryCopy(mi)); + } + + // Now we can do the patching of new modules. + { + SpinLockHolder h(&patch_all_modules_lock); + for (std::vector<ModuleEntryCopy>::iterator it = modules.begin(); + it != modules.end(); ++it) { + LibcInfo libc_info; + if (libc_info.PopulateWindowsFn(*it)) { // true==module has libc routines + PatchOneModuleLocked(libc_info); + made_changes = true; + } + } + + // Now that we've dealt with the modules (dlls), update the main + // executable. We do this last because PatchMainExecutableLocked + // wants to look at how other modules were patched. + if (!main_executable.patched()) { + PatchMainExecutableLocked(); + made_changes = true; + } + } + // TODO(csilvers): for this to be reliable, we need to also take + // into account if we *would* have patched any modules had they not + // already been loaded. (That is, made_changes should ignore + // g_last_loaded.) + return made_changes; +} + + +} // end unnamed namespace + +// --------------------------------------------------------------------- +// Now that we've done all the patching machinery, let's actually +// define the functions we're patching in. Mostly these are +// simple wrappers around the do_* routines in tcmalloc.cc. +// +// In fact, we #include tcmalloc.cc to get at the tcmalloc internal +// do_* functions, the better to write our own hook functions. +// U-G-L-Y, I know. But the alternatives are, perhaps, worse. This +// also lets us define _msize(), _expand(), and other windows-specific +// functions here, using tcmalloc internals, without polluting +// tcmalloc.cc. +// ------------------------------------------------------------------- + +// TODO(csilvers): refactor tcmalloc.cc into two files, so I can link +// against the file with do_malloc, and ignore the one with malloc. +#include "tcmalloc.cc" + +template<int T> +void* LibcInfoWithPatchFunctions<T>::Perftools_malloc(size_t size) __THROW { + void* result = do_malloc_or_cpp_alloc(size); + MallocHook::InvokeNewHook(result, size); + return result; +} + +template<int T> +void LibcInfoWithPatchFunctions<T>::Perftools_free(void* ptr) __THROW { + MallocHook::InvokeDeleteHook(ptr); + // This calls the windows free if do_free decides ptr was not + // allocated by tcmalloc. Note it calls the origstub_free from + // *this* templatized instance of LibcInfo. See "template + // trickiness" above. + do_free_with_callback(ptr, (void (*)(void*))origstub_fn_[kFree], false, 0); +} + +template<int T> +void* LibcInfoWithPatchFunctions<T>::Perftools_realloc( + void* old_ptr, size_t new_size) __THROW { + if (old_ptr == NULL) { + void* result = do_malloc_or_cpp_alloc(new_size); + MallocHook::InvokeNewHook(result, new_size); + return result; + } + if (new_size == 0) { + MallocHook::InvokeDeleteHook(old_ptr); + do_free_with_callback(old_ptr, + (void (*)(void*))origstub_fn_[kFree], false, 0); + return NULL; + } + return do_realloc_with_callback( + old_ptr, new_size, + (void (*)(void*))origstub_fn_[kFree], + (size_t (*)(const void*))origstub_fn_[k_Msize]); +} + +template<int T> +void* LibcInfoWithPatchFunctions<T>::Perftools_calloc( + size_t n, size_t elem_size) __THROW { + void* result = do_calloc(n, elem_size); + MallocHook::InvokeNewHook(result, n * elem_size); + return result; +} + +template<int T> +void* LibcInfoWithPatchFunctions<T>::Perftools_new(size_t size) { + void* p = cpp_alloc(size, false); + MallocHook::InvokeNewHook(p, size); + return p; +} + +template<int T> +void* LibcInfoWithPatchFunctions<T>::Perftools_newarray(size_t size) { + void* p = cpp_alloc(size, false); + MallocHook::InvokeNewHook(p, size); + return p; +} + +template<int T> +void LibcInfoWithPatchFunctions<T>::Perftools_delete(void *p) { + MallocHook::InvokeDeleteHook(p); + do_free_with_callback(p, (void (*)(void*))origstub_fn_[kFree], false, 0); +} + +template<int T> +void LibcInfoWithPatchFunctions<T>::Perftools_deletearray(void *p) { + MallocHook::InvokeDeleteHook(p); + do_free_with_callback(p, (void (*)(void*))origstub_fn_[kFree], false, 0); +} + +template<int T> +void* LibcInfoWithPatchFunctions<T>::Perftools_new_nothrow( + size_t size, const std::nothrow_t&) __THROW { + void* p = cpp_alloc(size, true); + MallocHook::InvokeNewHook(p, size); + return p; +} + +template<int T> +void* LibcInfoWithPatchFunctions<T>::Perftools_newarray_nothrow( + size_t size, const std::nothrow_t&) __THROW { + void* p = cpp_alloc(size, true); + MallocHook::InvokeNewHook(p, size); + return p; +} + +template<int T> +void LibcInfoWithPatchFunctions<T>::Perftools_delete_nothrow( + void *p, const std::nothrow_t&) __THROW { + MallocHook::InvokeDeleteHook(p); + do_free_with_callback(p, (void (*)(void*))origstub_fn_[kFree], false, 0); +} + +template<int T> +void LibcInfoWithPatchFunctions<T>::Perftools_deletearray_nothrow( + void *p, const std::nothrow_t&) __THROW { + MallocHook::InvokeDeleteHook(p); + do_free_with_callback(p, (void (*)(void*))origstub_fn_[kFree], false, 0); +} + + +// _msize() lets you figure out how much space is reserved for a +// pointer, in Windows. Even if applications don't call it, any DLL +// with global constructors will call (transitively) something called +// __dllonexit_lk in order to make sure the destructors get called +// when the dll unloads. And that will call msize -- horrible things +// can ensue if this is not hooked. Other parts of libc may also call +// this internally. + +template<int T> +size_t LibcInfoWithPatchFunctions<T>::Perftools__msize(void* ptr) __THROW { + return GetSizeWithCallback(ptr, (size_t (*)(const void*))origstub_fn_[k_Msize]); +} + +// We need to define this because internal windows functions like to +// call into it(?). _expand() is like realloc but doesn't move the +// pointer. We punt, which will cause callers to fall back on realloc. +template<int T> +void* LibcInfoWithPatchFunctions<T>::Perftools__expand(void *ptr, + size_t size) __THROW { + return NULL; +} + +LPVOID WINAPI WindowsInfo::Perftools_HeapAlloc(HANDLE hHeap, DWORD dwFlags, + DWORD_PTR dwBytes) { + LPVOID result = ((LPVOID (WINAPI *)(HANDLE, DWORD, DWORD_PTR)) + function_info_[kHeapAlloc].origstub_fn)( + hHeap, dwFlags, dwBytes); + MallocHook::InvokeNewHook(result, dwBytes); + return result; +} + +BOOL WINAPI WindowsInfo::Perftools_HeapFree(HANDLE hHeap, DWORD dwFlags, + LPVOID lpMem) { + MallocHook::InvokeDeleteHook(lpMem); + return ((BOOL (WINAPI *)(HANDLE, DWORD, LPVOID)) + function_info_[kHeapFree].origstub_fn)( + hHeap, dwFlags, lpMem); +} + +LPVOID WINAPI WindowsInfo::Perftools_VirtualAllocEx(HANDLE process, + LPVOID address, + SIZE_T size, DWORD type, + DWORD protect) { + LPVOID result = ((LPVOID (WINAPI *)(HANDLE, LPVOID, SIZE_T, DWORD, DWORD)) + function_info_[kVirtualAllocEx].origstub_fn)( + process, address, size, type, protect); + // VirtualAllocEx() seems to be the Windows equivalent of mmap() + MallocHook::InvokeMmapHook(result, address, size, protect, type, -1, 0); + return result; +} + +BOOL WINAPI WindowsInfo::Perftools_VirtualFreeEx(HANDLE process, LPVOID address, + SIZE_T size, DWORD type) { + MallocHook::InvokeMunmapHook(address, size); + return ((BOOL (WINAPI *)(HANDLE, LPVOID, SIZE_T, DWORD)) + function_info_[kVirtualFreeEx].origstub_fn)( + process, address, size, type); +} + +LPVOID WINAPI WindowsInfo::Perftools_MapViewOfFileEx( + HANDLE hFileMappingObject, DWORD dwDesiredAccess, DWORD dwFileOffsetHigh, + DWORD dwFileOffsetLow, SIZE_T dwNumberOfBytesToMap, LPVOID lpBaseAddress) { + // For this function pair, you always deallocate the full block of + // data that you allocate, so NewHook/DeleteHook is the right API. + LPVOID result = ((LPVOID (WINAPI *)(HANDLE, DWORD, DWORD, DWORD, + SIZE_T, LPVOID)) + function_info_[kMapViewOfFileEx].origstub_fn)( + hFileMappingObject, dwDesiredAccess, dwFileOffsetHigh, + dwFileOffsetLow, dwNumberOfBytesToMap, lpBaseAddress); + MallocHook::InvokeNewHook(result, dwNumberOfBytesToMap); + return result; +} + +BOOL WINAPI WindowsInfo::Perftools_UnmapViewOfFile(LPCVOID lpBaseAddress) { + MallocHook::InvokeDeleteHook(lpBaseAddress); + return ((BOOL (WINAPI *)(LPCVOID)) + function_info_[kUnmapViewOfFile].origstub_fn)( + lpBaseAddress); +} + +// g_load_map holds a copy of windows' refcount for how many times +// each currently loaded module has been loaded and unloaded. We use +// it as an optimization when the same module is loaded more than +// once: as long as the refcount stays above 1, we don't need to worry +// about patching because it's already patched. Likewise, we don't +// need to unpatch until the refcount drops to 0. load_map is +// maintained in LoadLibraryExW and FreeLibrary, and only covers +// modules explicitly loaded/freed via those interfaces. +static std::map<HMODULE, int>* g_load_map = NULL; + +HMODULE WINAPI WindowsInfo::Perftools_LoadLibraryExW(LPCWSTR lpFileName, + HANDLE hFile, + DWORD dwFlags) { + HMODULE rv; + // Check to see if the modules is already loaded, flag 0 gets a + // reference if it was loaded. If it was loaded no need to call + // PatchAllModules, just increase the reference count to match + // what GetModuleHandleExW does internally inside windows. + if (::GetModuleHandleExW(0, lpFileName, &rv)) { + return rv; + } else { + // Not already loaded, so load it. + rv = ((HMODULE (WINAPI *)(LPCWSTR, HANDLE, DWORD)) + function_info_[kLoadLibraryExW].origstub_fn)( + lpFileName, hFile, dwFlags); + // This will patch any newly loaded libraries, if patching needs + // to be done. + PatchAllModules(); + + return rv; + } +} + +BOOL WINAPI WindowsInfo::Perftools_FreeLibrary(HMODULE hLibModule) { + BOOL rv = ((BOOL (WINAPI *)(HMODULE)) + function_info_[kFreeLibrary].origstub_fn)(hLibModule); + + // Check to see if the module is still loaded by passing the base + // address and seeing if it comes back with the same address. If it + // is the same address it's still loaded, so the FreeLibrary() call + // was a noop, and there's no need to redo the patching. + HMODULE owner = NULL; + BOOL result = ::GetModuleHandleExW( + (GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS | + GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT), + (LPCWSTR)hLibModule, + &owner); + if (result && owner == hLibModule) + return rv; + + PatchAllModules(); // this will fix up the list of patched libraries + return rv; +} + + +// --------------------------------------------------------------------- +// PatchWindowsFunctions() +// This is the function that is exposed to the outside world. +// It should be called before the program becomes multi-threaded, +// since main_executable_windows.Patch() is not thread-safe. +// --------------------------------------------------------------------- + +void PatchWindowsFunctions() { + // This does the libc patching in every module, and the main executable. + PatchAllModules(); + main_executable_windows.Patch(); +} + +#if 0 +// It's possible to unpatch all the functions when we are exiting. + +// The idea is to handle properly windows-internal data that is +// allocated before PatchWindowsFunctions is called. If all +// destruction happened in reverse order from construction, then we +// could call UnpatchWindowsFunctions at just the right time, so that +// that early-allocated data would be freed using the windows +// allocation functions rather than tcmalloc. The problem is that +// windows allocates some structures lazily, so it would allocate them +// late (using tcmalloc) and then try to deallocate them late as well. +// So instead of unpatching, we just modify all the tcmalloc routines +// so they call through to the libc rountines if the memory in +// question doesn't seem to have been allocated with tcmalloc. I keep +// this unpatch code around for reference. + +void UnpatchWindowsFunctions() { + // We need to go back to the system malloc/etc at global destruct time, + // so objects that were constructed before tcmalloc, using the system + // malloc, can destroy themselves using the system free. This depends + // on DLLs unloading in the reverse order in which they load! + // + // We also go back to the default HeapAlloc/etc, just for consistency. + // Who knows, it may help avoid weird bugs in some situations. + main_executable_windows.Unpatch(); + main_executable.Unpatch(); + if (libc1.is_valid()) libc1.Unpatch(); + if (libc2.is_valid()) libc2.Unpatch(); + if (libc3.is_valid()) libc3.Unpatch(); + if (libc4.is_valid()) libc4.Unpatch(); + if (libc5.is_valid()) libc5.Unpatch(); + if (libc6.is_valid()) libc6.Unpatch(); + if (libc7.is_valid()) libc7.Unpatch(); + if (libc8.is_valid()) libc8.Unpatch(); +} +#endif diff --git a/src/third_party/gperftools-2.5/src/windows/port.cc b/src/third_party/gperftools-2.5/src/windows/port.cc new file mode 100644 index 00000000000..76224a23431 --- /dev/null +++ b/src/third_party/gperftools-2.5/src/windows/port.cc @@ -0,0 +1,235 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +/* Copyright (c) 2007, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: Craig Silverstein + */ + +#ifndef _WIN32 +# error You should only be including windows/port.cc in a windows environment! +#endif + +#define NOMINMAX // so std::max, below, compiles correctly +#include <config.h> +#include <string.h> // for strlen(), memset(), memcmp() +#include <assert.h> +#include <stdarg.h> // for va_list, va_start, va_end +#include <algorithm> // for std:{min,max} +#include <windows.h> +#include "port.h" +#include "base/logging.h" +#include "base/spinlock.h" +#include "internal_logging.h" + +// ----------------------------------------------------------------------- +// Basic libraries + +PERFTOOLS_DLL_DECL +int getpagesize() { + static int pagesize = 0; + if (pagesize == 0) { + SYSTEM_INFO system_info; + GetSystemInfo(&system_info); + pagesize = std::max(system_info.dwPageSize, + system_info.dwAllocationGranularity); + } + return pagesize; +} + +extern "C" PERFTOOLS_DLL_DECL void* __sbrk(ptrdiff_t increment) { + LOG(FATAL, "Windows doesn't implement sbrk!\n"); + return NULL; +} + +// We need to write to 'stderr' without having windows allocate memory. +// The safest way is via a low-level call like WriteConsoleA(). But +// even then we need to be sure to print in small bursts so as to not +// require memory allocation. +extern "C" PERFTOOLS_DLL_DECL void WriteToStderr(const char* buf, int len) { + // Looks like windows allocates for writes of >80 bytes + for (int i = 0; i < len; i += 80) { + write(STDERR_FILENO, buf + i, std::min(80, len - i)); + } +} + + +// ----------------------------------------------------------------------- +// Threads code + +// Windows doesn't support pthread_key_create's destr_function, and in +// fact it's a bit tricky to get code to run when a thread exits. This +// is cargo-cult magic from http://www.codeproject.com/threads/tls.asp. +// This code is for VC++ 7.1 and later; VC++ 6.0 support is possible +// but more busy-work -- see the webpage for how to do it. If all +// this fails, we could use DllMain instead. The big problem with +// DllMain is it doesn't run if this code is statically linked into a +// binary (it also doesn't run if the thread is terminated via +// TerminateThread, which if we're lucky this routine does). + +// Force a reference to _tls_used to make the linker create the TLS directory +// if it's not already there (that is, even if __declspec(thread) is not used). +// Force a reference to p_thread_callback_tcmalloc and p_process_term_tcmalloc +// to prevent whole program optimization from discarding the variables. +#ifdef _MSC_VER +#if defined(_M_IX86) +#pragma comment(linker, "/INCLUDE:__tls_used") +#pragma comment(linker, "/INCLUDE:_p_thread_callback_tcmalloc") +#pragma comment(linker, "/INCLUDE:_p_process_term_tcmalloc") +#elif defined(_M_X64) +#pragma comment(linker, "/INCLUDE:_tls_used") +#pragma comment(linker, "/INCLUDE:p_thread_callback_tcmalloc") +#pragma comment(linker, "/INCLUDE:p_process_term_tcmalloc") +#endif +#endif + +// When destr_fn eventually runs, it's supposed to take as its +// argument the tls-value associated with key that pthread_key_create +// creates. (Yeah, it sounds confusing but it's really not.) We +// store the destr_fn/key pair in this data structure. Because we +// store this in a single var, this implies we can only have one +// destr_fn in a program! That's enough in practice. If asserts +// trigger because we end up needing more, we'll have to turn this +// into an array. +struct DestrFnClosure { + void (*destr_fn)(void*); + pthread_key_t key_for_destr_fn_arg; +}; + +static DestrFnClosure destr_fn_info; // initted to all NULL/0. + +static int on_process_term(void) { + if (destr_fn_info.destr_fn) { + void *ptr = TlsGetValue(destr_fn_info.key_for_destr_fn_arg); + // This shouldn't be necessary, but in Release mode, Windows + // sometimes trashes the pointer in the TLS slot, so we need to + // remove the pointer from the TLS slot before the thread dies. + TlsSetValue(destr_fn_info.key_for_destr_fn_arg, NULL); + if (ptr) // pthread semantics say not to call if ptr is NULL + (*destr_fn_info.destr_fn)(ptr); + } + return 0; +} + +static void NTAPI on_tls_callback(HINSTANCE h, DWORD dwReason, PVOID pv) { + if (dwReason == DLL_THREAD_DETACH) { // thread is being destroyed! + on_process_term(); + } +} + +#ifdef _MSC_VER + +// extern "C" suppresses C++ name mangling so we know the symbol names +// for the linker /INCLUDE:symbol pragmas above. +extern "C" { +// This tells the linker to run these functions. +#pragma data_seg(push, old_seg) +#pragma data_seg(".CRT$XLB") +void (NTAPI *p_thread_callback_tcmalloc)( + HINSTANCE h, DWORD dwReason, PVOID pv) = on_tls_callback; +#pragma data_seg(".CRT$XTU") +int (*p_process_term_tcmalloc)(void) = on_process_term; +#pragma data_seg(pop, old_seg) +} // extern "C" + +#else // #ifdef _MSC_VER [probably msys/mingw] + +// We have to try the DllMain solution here, because we can't use the +// msvc-specific pragmas. +BOOL WINAPI DllMain(HINSTANCE h, DWORD dwReason, PVOID pv) { + if (dwReason == DLL_THREAD_DETACH) + on_tls_callback(h, dwReason, pv); + else if (dwReason == DLL_PROCESS_DETACH) + on_process_term(); + return TRUE; +} + +#endif // #ifdef _MSC_VER + +extern "C" pthread_key_t PthreadKeyCreate(void (*destr_fn)(void*)) { + // Semantics are: we create a new key, and then promise to call + // destr_fn with TlsGetValue(key) when the thread is destroyed + // (as long as TlsGetValue(key) is not NULL). + pthread_key_t key = TlsAlloc(); + if (destr_fn) { // register it + // If this assert fails, we'll need to support an array of destr_fn_infos + assert(destr_fn_info.destr_fn == NULL); + destr_fn_info.destr_fn = destr_fn; + destr_fn_info.key_for_destr_fn_arg = key; + } + return key; +} + +// NOTE: this is Win2K and later. For Win98 we could use a CRITICAL_SECTION... +extern "C" int perftools_pthread_once(pthread_once_t *once_control, + void (*init_routine)(void)) { + // Try for a fast path first. Note: this should be an acquire semantics read. + // It is on x86 and x64, where Windows runs. + if (*once_control != 1) { + while (true) { + switch (InterlockedCompareExchange(once_control, 2, 0)) { + case 0: + init_routine(); + InterlockedExchange(once_control, 1); + return 0; + case 1: + // The initializer has already been executed + return 0; + default: + // The initializer is being processed by another thread + SwitchToThread(); + } + } + } + return 0; +} + + +// ----------------------------------------------------------------------- +// These functions rework existing functions of the same name in the +// Google codebase. + +// A replacement for HeapProfiler::CleanupOldProfiles. +void DeleteMatchingFiles(const char* prefix, const char* full_glob) { + WIN32_FIND_DATAA found; // that final A is for Ansi (as opposed to Unicode) + HANDLE hFind = FindFirstFileA(full_glob, &found); // A is for Ansi + if (hFind != INVALID_HANDLE_VALUE) { + const int prefix_length = strlen(prefix); + do { + const char *fname = found.cFileName; + if ((strlen(fname) >= prefix_length) && + (memcmp(fname, prefix, prefix_length) == 0)) { + RAW_VLOG(0, "Removing old heap profile %s\n", fname); + // TODO(csilvers): we really need to unlink dirname + fname + _unlink(fname); + } + } while (FindNextFileA(hFind, &found) != FALSE); // A is for Ansi + FindClose(hFind); + } +} diff --git a/src/third_party/gperftools-2.5/src/windows/port.h b/src/third_party/gperftools-2.5/src/windows/port.h new file mode 100644 index 00000000000..87db9ddc848 --- /dev/null +++ b/src/third_party/gperftools-2.5/src/windows/port.h @@ -0,0 +1,497 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +/* Copyright (c) 2007, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: Craig Silverstein + * + * These are some portability typedefs and defines to make it a bit + * easier to compile this code under VC++. + * + * Several of these are taken from glib: + * http://developer.gnome.org/doc/API/glib/glib-windows-compatability-functions.html + */ + +#ifndef GOOGLE_BASE_WINDOWS_H_ +#define GOOGLE_BASE_WINDOWS_H_ + +/* You should never include this file directly, but always include it + from either config.h (MSVC) or mingw.h (MinGW/msys). */ +#if !defined(GOOGLE_PERFTOOLS_WINDOWS_CONFIG_H_) && \ + !defined(GOOGLE_PERFTOOLS_WINDOWS_MINGW_H_) +# error "port.h should only be included from config.h or mingw.h" +#endif + +#ifdef _WIN32 + +#ifndef WIN32_LEAN_AND_MEAN +#define WIN32_LEAN_AND_MEAN /* We always want minimal includes */ +#endif +#include <windows.h> +#include <io.h> /* because we so often use open/close/etc */ +#include <direct.h> /* for _getcwd */ +#include <process.h> /* for _getpid */ +#include <limits.h> /* for PATH_MAX */ +#include <stdarg.h> /* for va_list */ +#include <stdio.h> /* need this to override stdio's (v)snprintf */ +#include <sys/types.h> /* for _off_t */ +#include <assert.h> +#include <stdlib.h> /* for rand, srand, _strtoxxx */ + +#if defined(_MSC_VER) && _MSC_VER >= 1900 +#define _TIMESPEC_DEFINED +#include <time.h> +#endif + +/* + * 4018: signed/unsigned mismatch is common (and ok for signed_i < unsigned_i) + * 4244: otherwise we get problems when subtracting two size_t's to an int + * 4288: VC++7 gets confused when a var is defined in a loop and then after it + * 4267: too many false positives for "conversion gives possible data loss" + * 4290: it's ok windows ignores the "throw" directive + * 4996: Yes, we're ok using "unsafe" functions like vsnprintf and getenv() + * 4146: internal_logging.cc intentionally negates an unsigned value + */ +#ifdef _MSC_VER +#pragma warning(disable:4018 4244 4288 4267 4290 4996 4146) +#endif + +#ifndef __cplusplus +/* MSVC does not support C99 */ +# if !defined(__STDC_VERSION__) || __STDC_VERSION__ < 199901L +# ifdef _MSC_VER +# define inline __inline +# else +# define inline static +# endif +# endif +#endif + +#ifdef __cplusplus +# define EXTERN_C extern "C" +#else +# define EXTERN_C extern +#endif + +/* ----------------------------------- BASIC TYPES */ + +#ifndef HAVE_STDINT_H +#ifndef HAVE___INT64 /* we need to have all the __intX names */ +# error Do not know how to set up type aliases. Edit port.h for your system. +#endif + +typedef __int8 int8_t; +typedef __int16 int16_t; +typedef __int32 int32_t; +typedef __int64 int64_t; +typedef unsigned __int8 uint8_t; +typedef unsigned __int16 uint16_t; +typedef unsigned __int32 uint32_t; +typedef unsigned __int64 uint64_t; +#endif /* #ifndef HAVE_STDINT_H */ + +/* I guess MSVC's <types.h> doesn't include ssize_t by default? */ +#ifdef _MSC_VER +typedef intptr_t ssize_t; +#endif + +/* ----------------------------------- THREADS */ + +#ifndef HAVE_PTHREAD /* not true for MSVC, but may be true for MSYS */ +typedef DWORD pthread_t; +typedef DWORD pthread_key_t; +typedef LONG pthread_once_t; +enum { PTHREAD_ONCE_INIT = 0 }; /* important that this be 0! for SpinLock */ + +inline pthread_t pthread_self(void) { + return GetCurrentThreadId(); +} + +#ifdef __cplusplus +inline bool pthread_equal(pthread_t left, pthread_t right) { + return left == right; +} + +/* + * windows/port.h defines compatibility APIs for several .h files, which + * we therefore shouldn't be #including directly. This hack keeps us from + * doing so. TODO(csilvers): do something more principled. + */ +#define GOOGLE_MAYBE_THREADS_H_ 1 +/* This replaces maybe_threads.{h,cc} */ + +EXTERN_C pthread_key_t PthreadKeyCreate(void (*destr_fn)(void*)); /* port.cc */ + +inline int perftools_pthread_key_create(pthread_key_t *pkey, + void (*destructor)(void*)) { + pthread_key_t key = PthreadKeyCreate(destructor); + if (key != TLS_OUT_OF_INDEXES) { + *(pkey) = key; + return 0; + } else { + return GetLastError(); + } +} + +inline void* perftools_pthread_getspecific(DWORD key) { + DWORD err = GetLastError(); + void* rv = TlsGetValue(key); + if (err) SetLastError(err); + return rv; +} + +inline int perftools_pthread_setspecific(pthread_key_t key, const void *value) { + if (TlsSetValue(key, (LPVOID)value)) + return 0; + else + return GetLastError(); +} + +EXTERN_C int perftools_pthread_once(pthread_once_t *once_control, + void (*init_routine)(void)); + +#endif /* __cplusplus */ + +inline void sched_yield(void) { + Sleep(0); +} + +#endif /* HAVE_PTHREAD */ + +/* + * __declspec(thread) isn't usable in a dll opened via LoadLibrary(). + * But it doesn't work to LoadLibrary() us anyway, because of all the + * things we need to do before main()! So this kind of TLS is safe for us. + */ +#define __thread __declspec(thread) + +/* + * This code is obsolete, but I keep it around in case we are ever in + * an environment where we can't or don't want to use google spinlocks + * (from base/spinlock.{h,cc}). In that case, uncommenting this out, + * and removing spinlock.cc from the build, should be enough to revert + * back to using native spinlocks. + */ +#if 0 +// Windows uses a spinlock internally for its mutexes, making our life easy! +// However, the Windows spinlock must always be initialized, making life hard, +// since we want LINKER_INITIALIZED. We work around this by having the +// linker initialize a bool to 0, and check that before accessing the mutex. +// This replaces spinlock.{h,cc}, and all the stuff it depends on (atomicops) +#ifdef __cplusplus +class SpinLock { + public: + SpinLock() : initialize_token_(PTHREAD_ONCE_INIT) {} + // Used for global SpinLock vars (see base/spinlock.h for more details). + enum StaticInitializer { LINKER_INITIALIZED }; + explicit SpinLock(StaticInitializer) : initialize_token_(PTHREAD_ONCE_INIT) { + perftools_pthread_once(&initialize_token_, InitializeMutex); + } + + // It's important SpinLock not have a destructor: otherwise we run + // into problems when the main thread has exited, but other threads + // are still running and try to access a main-thread spinlock. This + // means we leak mutex_ (we should call DeleteCriticalSection() + // here). However, I've verified that all SpinLocks used in + // perftools have program-long scope anyway, so the leak is + // perfectly fine. But be aware of this for the future! + + void Lock() { + // You'd thionk this would be unnecessary, since we call + // InitializeMutex() in our constructor. But sometimes Lock() can + // be called before our constructor is! This can only happen in + // global constructors, when this is a global. If we live in + // bar.cc, and some global constructor in foo.cc calls a routine + // in bar.cc that calls this->Lock(), then Lock() may well run + // before our global constructor does. To protect against that, + // we do this check. For SpinLock objects created after main() + // has started, this pthread_once call will always be a noop. + perftools_pthread_once(&initialize_token_, InitializeMutex); + EnterCriticalSection(&mutex_); + } + void Unlock() { + LeaveCriticalSection(&mutex_); + } + + // Used in assertion checks: assert(lock.IsHeld()) (see base/spinlock.h). + inline bool IsHeld() const { + // This works, but probes undocumented internals, so I've commented it out. + // c.f. http://msdn.microsoft.com/msdnmag/issues/03/12/CriticalSections/ + //return mutex_.LockCount>=0 && mutex_.OwningThread==GetCurrentThreadId(); + return true; + } + private: + void InitializeMutex() { InitializeCriticalSection(&mutex_); } + + pthread_once_t initialize_token_; + CRITICAL_SECTION mutex_; +}; + +class SpinLockHolder { // Acquires a spinlock for as long as the scope lasts + private: + SpinLock* lock_; + public: + inline explicit SpinLockHolder(SpinLock* l) : lock_(l) { l->Lock(); } + inline ~SpinLockHolder() { lock_->Unlock(); } +}; +#endif // #ifdef __cplusplus + +// This keeps us from using base/spinlock.h's implementation of SpinLock. +#define BASE_SPINLOCK_H_ 1 + +#endif /* #if 0 */ + +/* ----------------------------------- MMAP and other memory allocation */ + +#ifndef HAVE_MMAP /* not true for MSVC, but may be true for msys */ +#define MAP_FAILED 0 +#define MREMAP_FIXED 2 /* the value in linux, though it doesn't really matter */ +/* These, when combined with the mmap invariants below, yield the proper action */ +#define PROT_READ PAGE_READWRITE +#define PROT_WRITE PAGE_READWRITE +#define MAP_ANONYMOUS MEM_RESERVE +#define MAP_PRIVATE MEM_COMMIT +#define MAP_SHARED MEM_RESERVE /* value of this #define is 100% arbitrary */ + +#if __STDC__ && !defined(__MINGW32__) +typedef _off_t off_t; +#endif + +/* VirtualAlloc only replaces for mmap when certain invariants are kept. */ +inline void *mmap(void *addr, size_t length, int prot, int flags, + int fd, off_t offset) { + if (addr == NULL && fd == -1 && offset == 0 && + prot == (PROT_READ|PROT_WRITE) && flags == (MAP_PRIVATE|MAP_ANONYMOUS)) { + return VirtualAlloc(0, length, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE); + } else { + return NULL; + } +} + +inline int munmap(void *addr, size_t length) { + return VirtualFree(addr, 0, MEM_RELEASE) ? 0 : -1; +} +#endif /* HAVE_MMAP */ + +/* We could maybe use VirtualAlloc for sbrk as well, but no need */ +inline void *sbrk(intptr_t increment) { + // sbrk returns -1 on failure + return (void*)-1; +} + + +/* ----------------------------------- STRING ROUTINES */ + +/* + * We can't just use _vsnprintf and _snprintf as drop-in-replacements, + * because they don't always NUL-terminate. :-( We also can't use the + * name vsnprintf, since windows defines that (but not snprintf (!)). + */ +#if defined(_MSC_VER) && _MSC_VER >= 1400 +/* We can use safe CRT functions, which the required functionality */ +inline int perftools_vsnprintf(char *str, size_t size, const char *format, + va_list ap) { + return vsnprintf_s(str, size, _TRUNCATE, format, ap); +} +#else +inline int perftools_vsnprintf(char *str, size_t size, const char *format, + va_list ap) { + if (size == 0) /* not even room for a \0? */ + return -1; /* not what C99 says to do, but what windows does */ + str[size-1] = '\0'; + return _vsnprintf(str, size-1, format, ap); +} +#endif + +#ifndef HAVE_SNPRINTF +inline int snprintf(char *str, size_t size, const char *format, ...) { + va_list ap; + int r; + va_start(ap, format); + r = perftools_vsnprintf(str, size, format, ap); + va_end(ap); + return r; +} +#endif + +#define PRIx64 "I64x" +#define SCNx64 "I64x" +#define PRId64 "I64d" +#define SCNd64 "I64d" +#define PRIu64 "I64u" +#ifdef _WIN64 +# define PRIuPTR "llu" +# define PRIxPTR "llx" +#else +# define PRIuPTR "lu" +# define PRIxPTR "lx" +#endif + +/* ----------------------------------- FILE IO */ + +#ifndef PATH_MAX +#define PATH_MAX 1024 +#endif +#ifndef __MINGW32__ +enum { STDIN_FILENO = 0, STDOUT_FILENO = 1, STDERR_FILENO = 2 }; +#endif +#ifndef O_RDONLY +#define O_RDONLY _O_RDONLY +#endif + +#if __STDC__ && !defined(__MINGW32__) +/* These functions are considered non-standard */ +inline int access(const char *pathname, int mode) { + return _access(pathname, mode); +} +inline int open(const char *pathname, int flags, int mode = 0) { + return _open(pathname, flags, mode); +} +inline int close(int fd) { + return _close(fd); +} +inline ssize_t read(int fd, void *buf, size_t count) { + return _read(fd, buf, count); +} +inline ssize_t write(int fd, const void *buf, size_t count) { + return _write(fd, buf, count); +} +inline off_t lseek(int fd, off_t offset, int whence) { + return _lseek(fd, offset, whence); +} +inline char *getcwd(char *buf, size_t size) { + return _getcwd(buf, size); +} +inline int mkdir(const char *pathname, int) { + return _mkdir(pathname); +} + +inline FILE *popen(const char *command, const char *type) { + return _popen(command, type); +} +inline int pclose(FILE *stream) { + return _pclose(stream); +} +#endif + +EXTERN_C PERFTOOLS_DLL_DECL void WriteToStderr(const char* buf, int len); + +/* ----------------------------------- SYSTEM/PROCESS */ + +#ifndef HAVE_PID_T +typedef int pid_t; +#endif + +#if __STDC__ && !defined(__MINGW32__) +inline pid_t getpid(void) { return _getpid(); } +#endif +inline pid_t getppid(void) { return 0; } + +/* Handle case when poll is used to simulate sleep. */ +inline int poll(struct pollfd* fds, int nfds, int timeout) { + assert(fds == NULL); + assert(nfds == 0); + Sleep(timeout); + return 0; +} + +EXTERN_C PERFTOOLS_DLL_DECL int getpagesize(); /* in port.cc */ + +/* ----------------------------------- OTHER */ + +inline void srandom(unsigned int seed) { srand(seed); } +inline long random(void) { return rand(); } + +#ifndef HAVE_DECL_SLEEP +#define HAVE_DECL_SLEEP 0 +#endif + +#if !HAVE_DECL_SLEEP +inline unsigned int sleep(unsigned int seconds) { + Sleep(seconds * 1000); + return 0; +} +#endif + +// mingw64 seems to define timespec (though mingw.org mingw doesn't), +// protected by the _TIMESPEC_DEFINED macro. +#ifndef _TIMESPEC_DEFINED +struct timespec { + int tv_sec; + int tv_nsec; +}; +#endif + +#ifndef HAVE_DECL_NANOSLEEP +#define HAVE_DECL_NANOSLEEP 0 +#endif + +// latest mingw64 has nanosleep. Earlier mingw and MSVC do not +#if !HAVE_DECL_NANOSLEEP +inline int nanosleep(const struct timespec *req, struct timespec *rem) { + Sleep(req->tv_sec * 1000 + req->tv_nsec / 1000000); + return 0; +} +#endif + +#ifndef __MINGW32__ +#if defined(_MSC_VER) && _MSC_VER < 1800 +inline long long int strtoll(const char *nptr, char **endptr, int base) { + return _strtoi64(nptr, endptr, base); +} +inline unsigned long long int strtoull(const char *nptr, char **endptr, + int base) { + return _strtoui64(nptr, endptr, base); +} +inline long long int strtoq(const char *nptr, char **endptr, int base) { + return _strtoi64(nptr, endptr, base); +} +#endif +inline unsigned long long int strtouq(const char *nptr, char **endptr, + int base) { + return _strtoui64(nptr, endptr, base); +} +inline long long atoll(const char *nptr) { + return _atoi64(nptr); +} +#endif + +#define __THROW throw() + +/* ----------------------------------- TCMALLOC-SPECIFIC */ + +/* tcmalloc.cc calls this so we can patch VirtualAlloc() et al. */ +extern void PatchWindowsFunctions(); + +#endif /* _WIN32 */ + +#undef inline +#undef EXTERN_C + +#endif /* GOOGLE_BASE_WINDOWS_H_ */ diff --git a/src/third_party/gperftools-2.5/src/windows/preamble_patcher.cc b/src/third_party/gperftools-2.5/src/windows/preamble_patcher.cc new file mode 100644 index 00000000000..ec055373c11 --- /dev/null +++ b/src/third_party/gperftools-2.5/src/windows/preamble_patcher.cc @@ -0,0 +1,736 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +/* Copyright (c) 2007, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: Joi Sigurdsson + * Author: Scott Francis + * + * Implementation of PreamblePatcher + */ + +#include "preamble_patcher.h" + +#include "mini_disassembler.h" + +// compatibility shims +#include "base/logging.h" + +// Definitions of assembly statements we need +#define ASM_JMP32REL 0xE9 +#define ASM_INT3 0xCC +#define ASM_JMP32ABS_0 0xFF +#define ASM_JMP32ABS_1 0x25 +#define ASM_JMP8REL 0xEB +#define ASM_JCC32REL_0 0x0F +#define ASM_JCC32REL_1_MASK 0x80 +#define ASM_NOP 0x90 +// X64 opcodes +#define ASM_REXW 0x48 +#define ASM_MOVRAX_IMM 0xB8 +#define ASM_JMP 0xFF +#define ASM_JMP_RAX 0xE0 + +namespace sidestep { + +PreamblePatcher::PreamblePage* PreamblePatcher::preamble_pages_ = NULL; +long PreamblePatcher::granularity_ = 0; +long PreamblePatcher::pagesize_ = 0; +bool PreamblePatcher::initialized_ = false; + +static const unsigned int kPreamblePageMagic = 0x4347414D; // "MAGC" + +// Handle a special case that we see with functions that point into an +// IAT table (including functions linked statically into the +// application): these function already starts with ASM_JMP32*. For +// instance, malloc() might be implemented as a JMP to __malloc(). +// This function follows the initial JMPs for us, until we get to the +// place where the actual code is defined. If we get to STOP_BEFORE, +// we return the address before stop_before. The stop_before_trampoline +// flag is used in 64-bit mode. If true, we will return the address +// before a trampoline is detected. Trampolines are defined as: +// +// nop +// mov rax, <replacement_function> +// jmp rax +// +// See PreamblePatcher::RawPatchWithStub for more information. +void* PreamblePatcher::ResolveTargetImpl(unsigned char* target, + unsigned char* stop_before, + bool stop_before_trampoline) { + if (target == NULL) + return NULL; + while (1) { + unsigned char* new_target; + if (target[0] == ASM_JMP32REL) { + // target[1-4] holds the place the jmp goes to, but it's + // relative to the next instruction. + int relative_offset; // Windows guarantees int is 4 bytes + SIDESTEP_ASSERT(sizeof(relative_offset) == 4); + memcpy(reinterpret_cast<void*>(&relative_offset), + reinterpret_cast<void*>(target + 1), 4); + new_target = target + 5 + relative_offset; + } else if (target[0] == ASM_JMP8REL) { + // Visual Studio 7.1 implements new[] as an 8 bit jump to new + signed char relative_offset; + memcpy(reinterpret_cast<void*>(&relative_offset), + reinterpret_cast<void*>(target + 1), 1); + new_target = target + 2 + relative_offset; + } else if (target[0] == ASM_JMP32ABS_0 && + target[1] == ASM_JMP32ABS_1) { + jmp32rel: + // Visual studio seems to sometimes do it this way instead of the + // previous way. Not sure what the rules are, but it was happening + // with operator new in some binaries. + void** new_target_v; + if (kIs64BitBinary) { + // In 64-bit mode JMPs are RIP-relative, not absolute + int target_offset; + memcpy(reinterpret_cast<void*>(&target_offset), + reinterpret_cast<void*>(target + 2), 4); + new_target_v = reinterpret_cast<void**>(target + target_offset + 6); + } else { + SIDESTEP_ASSERT(sizeof(new_target) == 4); + memcpy(&new_target_v, reinterpret_cast<void*>(target + 2), 4); + } + new_target = reinterpret_cast<unsigned char*>(*new_target_v); + } else if (kIs64BitBinary && target[0] == ASM_REXW + && target[1] == ASM_JMP32ABS_0 + && target[2] == ASM_JMP32ABS_1) { + // in Visual Studio 2012 we're seeing jump like that: + // rex.W jmpq *0x11d019(%rip) + // + // according to docs I have, rex prefix is actually unneeded and + // can be ignored. I.e. docs say for jumps like that operand + // already defaults to 64-bit. But clearly it breaks abs. jump + // detection above and we just skip rex + target++; + goto jmp32rel; + } else { + break; + } + if (new_target == stop_before) + break; + if (stop_before_trampoline && *new_target == ASM_NOP + && new_target[1] == ASM_REXW && new_target[2] == ASM_MOVRAX_IMM) + break; + target = new_target; + } + return target; +} + +// Special case scoped_ptr to avoid dependency on scoped_ptr below. +class DeleteUnsignedCharArray { + public: + DeleteUnsignedCharArray(unsigned char* array) : array_(array) { + } + + ~DeleteUnsignedCharArray() { + if (array_) { + PreamblePatcher::FreePreambleBlock(array_); + } + } + + unsigned char* Release() { + unsigned char* temp = array_; + array_ = NULL; + return temp; + } + + private: + unsigned char* array_; +}; + +SideStepError PreamblePatcher::RawPatchWithStubAndProtections( + void* target_function, void *replacement_function, + unsigned char* preamble_stub, unsigned long stub_size, + unsigned long* bytes_needed) { + // We need to be able to write to a process-local copy of the first + // MAX_PREAMBLE_STUB_SIZE bytes of target_function + DWORD old_target_function_protect = 0; + BOOL succeeded = ::VirtualProtect(reinterpret_cast<void*>(target_function), + MAX_PREAMBLE_STUB_SIZE, + PAGE_EXECUTE_READWRITE, + &old_target_function_protect); + if (!succeeded) { + SIDESTEP_ASSERT(false && "Failed to make page containing target function " + "copy-on-write."); + return SIDESTEP_ACCESS_DENIED; + } + + SideStepError error_code = RawPatchWithStub(target_function, + replacement_function, + preamble_stub, + stub_size, + bytes_needed); + + // Restore the protection of the first MAX_PREAMBLE_STUB_SIZE bytes of + // pTargetFunction to what they were before we started goofing around. + // We do this regardless of whether the patch succeeded or not. + succeeded = ::VirtualProtect(reinterpret_cast<void*>(target_function), + MAX_PREAMBLE_STUB_SIZE, + old_target_function_protect, + &old_target_function_protect); + if (!succeeded) { + SIDESTEP_ASSERT(false && + "Failed to restore protection to target function."); + // We must not return an error here because the function has + // likely actually been patched, and returning an error might + // cause our client code not to unpatch it. So we just keep + // going. + } + + if (SIDESTEP_SUCCESS != error_code) { // Testing RawPatchWithStub, above + SIDESTEP_ASSERT(false); + return error_code; + } + + // Flush the instruction cache to make sure the processor doesn't execute the + // old version of the instructions (before our patch). + // + // FlushInstructionCache is actually a no-op at least on + // single-processor XP machines. I'm not sure why this is so, but + // it is, yet I want to keep the call to the API here for + // correctness in case there is a difference in some variants of + // Windows/hardware. + succeeded = ::FlushInstructionCache(::GetCurrentProcess(), + target_function, + MAX_PREAMBLE_STUB_SIZE); + if (!succeeded) { + SIDESTEP_ASSERT(false && "Failed to flush instruction cache."); + // We must not return an error here because the function has actually + // been patched, and returning an error would likely cause our client + // code not to unpatch it. So we just keep going. + } + + return SIDESTEP_SUCCESS; +} + +SideStepError PreamblePatcher::RawPatch(void* target_function, + void* replacement_function, + void** original_function_stub) { + if (!target_function || !replacement_function || !original_function_stub || + (*original_function_stub) || target_function == replacement_function) { + SIDESTEP_ASSERT(false && "Preconditions not met"); + return SIDESTEP_INVALID_PARAMETER; + } + + BOOL succeeded = FALSE; + + // First, deal with a special case that we see with functions that + // point into an IAT table (including functions linked statically + // into the application): these function already starts with + // ASM_JMP32REL. For instance, malloc() might be implemented as a + // JMP to __malloc(). In that case, we replace the destination of + // the JMP (__malloc), rather than the JMP itself (malloc). This + // way we get the correct behavior no matter how malloc gets called. + void* new_target = ResolveTarget(target_function); + if (new_target != target_function) { + target_function = new_target; + } + + // In 64-bit mode, preamble_stub must be within 2GB of target function + // so that if target contains a jump, we can translate it. + unsigned char* preamble_stub = AllocPreambleBlockNear(target_function); + if (!preamble_stub) { + SIDESTEP_ASSERT(false && "Unable to allocate preamble-stub."); + return SIDESTEP_INSUFFICIENT_BUFFER; + } + + // Frees the array at end of scope. + DeleteUnsignedCharArray guard_preamble_stub(preamble_stub); + + SideStepError error_code = RawPatchWithStubAndProtections( + target_function, replacement_function, preamble_stub, + MAX_PREAMBLE_STUB_SIZE, NULL); + + if (SIDESTEP_SUCCESS != error_code) { + SIDESTEP_ASSERT(false); + return error_code; + } + + // Flush the instruction cache to make sure the processor doesn't execute the + // old version of the instructions (before our patch). + // + // FlushInstructionCache is actually a no-op at least on + // single-processor XP machines. I'm not sure why this is so, but + // it is, yet I want to keep the call to the API here for + // correctness in case there is a difference in some variants of + // Windows/hardware. + succeeded = ::FlushInstructionCache(::GetCurrentProcess(), + target_function, + MAX_PREAMBLE_STUB_SIZE); + if (!succeeded) { + SIDESTEP_ASSERT(false && "Failed to flush instruction cache."); + // We must not return an error here because the function has actually + // been patched, and returning an error would likely cause our client + // code not to unpatch it. So we just keep going. + } + + SIDESTEP_LOG("PreamblePatcher::RawPatch successfully patched."); + + // detach the scoped pointer so the memory is not freed + *original_function_stub = + reinterpret_cast<void*>(guard_preamble_stub.Release()); + return SIDESTEP_SUCCESS; +} + +SideStepError PreamblePatcher::Unpatch(void* target_function, + void* replacement_function, + void* original_function_stub) { + SIDESTEP_ASSERT(target_function && replacement_function && + original_function_stub); + if (!target_function || !replacement_function || + !original_function_stub) { + return SIDESTEP_INVALID_PARAMETER; + } + + // Before unpatching, target_function should be a JMP to + // replacement_function. If it's not, then either it's an error, or + // we're falling into the case where the original instruction was a + // JMP, and we patched the jumped_to address rather than the JMP + // itself. (For instance, if malloc() is just a JMP to __malloc(), + // we patched __malloc() and not malloc().) + unsigned char* target = reinterpret_cast<unsigned char*>(target_function); + target = reinterpret_cast<unsigned char*>( + ResolveTargetImpl( + target, reinterpret_cast<unsigned char*>(replacement_function), + true)); + // We should end at the function we patched. When we patch, we insert + // a ASM_JMP32REL instruction, so look for that as a sanity check. + if (target[0] != ASM_JMP32REL) { + SIDESTEP_ASSERT(false && + "target_function does not look like it was patched."); + return SIDESTEP_INVALID_PARAMETER; + } + + const unsigned int kRequiredTargetPatchBytes = 5; + + // We need to be able to write to a process-local copy of the first + // kRequiredTargetPatchBytes bytes of target_function + DWORD old_target_function_protect = 0; + BOOL succeeded = ::VirtualProtect(reinterpret_cast<void*>(target), + kRequiredTargetPatchBytes, + PAGE_EXECUTE_READWRITE, + &old_target_function_protect); + if (!succeeded) { + SIDESTEP_ASSERT(false && "Failed to make page containing target function " + "copy-on-write."); + return SIDESTEP_ACCESS_DENIED; + } + + unsigned char* preamble_stub = reinterpret_cast<unsigned char*>( + original_function_stub); + + // Disassemble the preamble of stub and copy the bytes back to target. + // If we've done any conditional jumps in the preamble we need to convert + // them back to the original REL8 jumps in the target. + MiniDisassembler disassembler; + unsigned int preamble_bytes = 0; + unsigned int target_bytes = 0; + while (target_bytes < kRequiredTargetPatchBytes) { + unsigned int cur_bytes = 0; + InstructionType instruction_type = + disassembler.Disassemble(preamble_stub + preamble_bytes, cur_bytes); + if (IT_JUMP == instruction_type) { + unsigned int jump_bytes = 0; + SideStepError jump_ret = SIDESTEP_JUMP_INSTRUCTION; + if (IsNearConditionalJump(preamble_stub + preamble_bytes, cur_bytes) || + IsNearRelativeJump(preamble_stub + preamble_bytes, cur_bytes) || + IsNearAbsoluteCall(preamble_stub + preamble_bytes, cur_bytes) || + IsNearRelativeCall(preamble_stub + preamble_bytes, cur_bytes)) { + jump_ret = PatchNearJumpOrCall(preamble_stub + preamble_bytes, + cur_bytes, target + target_bytes, + &jump_bytes, MAX_PREAMBLE_STUB_SIZE); + } + if (jump_ret == SIDESTEP_JUMP_INSTRUCTION) { + SIDESTEP_ASSERT(false && + "Found unsupported jump instruction in stub!!"); + return SIDESTEP_UNSUPPORTED_INSTRUCTION; + } + target_bytes += jump_bytes; + } else if (IT_GENERIC == instruction_type) { + if (IsMovWithDisplacement(preamble_stub + preamble_bytes, cur_bytes)) { + unsigned int mov_bytes = 0; + if (PatchMovWithDisplacement(preamble_stub + preamble_bytes, cur_bytes, + target + target_bytes, &mov_bytes, + MAX_PREAMBLE_STUB_SIZE) + != SIDESTEP_SUCCESS) { + SIDESTEP_ASSERT(false && + "Found unsupported generic instruction in stub!!"); + return SIDESTEP_UNSUPPORTED_INSTRUCTION; + } + } else { + memcpy(reinterpret_cast<void*>(target + target_bytes), + reinterpret_cast<void*>(reinterpret_cast<unsigned char*>( + original_function_stub) + preamble_bytes), cur_bytes); + target_bytes += cur_bytes; + } + } else { + SIDESTEP_ASSERT(false && + "Found unsupported instruction in stub!!"); + return SIDESTEP_UNSUPPORTED_INSTRUCTION; + } + preamble_bytes += cur_bytes; + } + + FreePreambleBlock(reinterpret_cast<unsigned char*>(original_function_stub)); + + // Restore the protection of the first kRequiredTargetPatchBytes bytes of + // target to what they were before we started goofing around. + succeeded = ::VirtualProtect(reinterpret_cast<void*>(target), + kRequiredTargetPatchBytes, + old_target_function_protect, + &old_target_function_protect); + + // Flush the instruction cache to make sure the processor doesn't execute the + // old version of the instructions (before our patch). + // + // See comment on FlushInstructionCache elsewhere in this file. + succeeded = ::FlushInstructionCache(::GetCurrentProcess(), + target, + MAX_PREAMBLE_STUB_SIZE); + if (!succeeded) { + SIDESTEP_ASSERT(false && "Failed to flush instruction cache."); + return SIDESTEP_UNEXPECTED; + } + + SIDESTEP_LOG("PreamblePatcher::Unpatch successfully unpatched."); + return SIDESTEP_SUCCESS; +} + +void PreamblePatcher::Initialize() { + if (!initialized_) { + SYSTEM_INFO si = { 0 }; + ::GetSystemInfo(&si); + granularity_ = si.dwAllocationGranularity; + pagesize_ = si.dwPageSize; + initialized_ = true; + } +} + +unsigned char* PreamblePatcher::AllocPreambleBlockNear(void* target) { + PreamblePage* preamble_page = preamble_pages_; + while (preamble_page != NULL) { + if (preamble_page->free_ != NULL) { + __int64 val = reinterpret_cast<__int64>(preamble_page) - + reinterpret_cast<__int64>(target); + if ((val > 0 && val + pagesize_ <= INT_MAX) || + (val < 0 && val >= INT_MIN)) { + break; + } + } + preamble_page = preamble_page->next_; + } + + // The free_ member of the page is used to store the next available block + // of memory to use or NULL if there are no chunks available, in which case + // we'll allocate a new page. + if (preamble_page == NULL || preamble_page->free_ == NULL) { + // Create a new preamble page and initialize the free list + preamble_page = reinterpret_cast<PreamblePage*>(AllocPageNear(target)); + SIDESTEP_ASSERT(preamble_page != NULL && "Could not allocate page!"); + void** pp = &preamble_page->free_; + unsigned char* ptr = reinterpret_cast<unsigned char*>(preamble_page) + + MAX_PREAMBLE_STUB_SIZE; + unsigned char* limit = reinterpret_cast<unsigned char*>(preamble_page) + + pagesize_; + while (ptr < limit) { + *pp = ptr; + pp = reinterpret_cast<void**>(ptr); + ptr += MAX_PREAMBLE_STUB_SIZE; + } + *pp = NULL; + // Insert the new page into the list + preamble_page->magic_ = kPreamblePageMagic; + preamble_page->next_ = preamble_pages_; + preamble_pages_ = preamble_page; + } + unsigned char* ret = reinterpret_cast<unsigned char*>(preamble_page->free_); + preamble_page->free_ = *(reinterpret_cast<void**>(preamble_page->free_)); + return ret; +} + +void PreamblePatcher::FreePreambleBlock(unsigned char* block) { + SIDESTEP_ASSERT(block != NULL); + SIDESTEP_ASSERT(granularity_ != 0); + uintptr_t ptr = reinterpret_cast<uintptr_t>(block); + ptr -= ptr & (granularity_ - 1); + PreamblePage* preamble_page = reinterpret_cast<PreamblePage*>(ptr); + SIDESTEP_ASSERT(preamble_page->magic_ == kPreamblePageMagic); + *(reinterpret_cast<void**>(block)) = preamble_page->free_; + preamble_page->free_ = block; +} + +void* PreamblePatcher::AllocPageNear(void* target) { + MEMORY_BASIC_INFORMATION mbi = { 0 }; + if (!::VirtualQuery(target, &mbi, sizeof(mbi))) { + SIDESTEP_ASSERT(false && "VirtualQuery failed on target address"); + return 0; + } + if (initialized_ == false) { + PreamblePatcher::Initialize(); + SIDESTEP_ASSERT(initialized_); + } + void* pv = NULL; + unsigned char* allocation_base = reinterpret_cast<unsigned char*>( + mbi.AllocationBase); + __int64 i = 1; + bool high_target = reinterpret_cast<__int64>(target) > UINT_MAX; + while (pv == NULL) { + __int64 val = reinterpret_cast<__int64>(allocation_base) - + (i * granularity_); + if (high_target && + reinterpret_cast<__int64>(target) - val > INT_MAX) { + // We're further than 2GB from the target + break; + } else if (val <= NULL) { + // Less than 0 + break; + } + pv = ::VirtualAlloc(reinterpret_cast<void*>(allocation_base - + (i++ * granularity_)), + pagesize_, MEM_COMMIT | MEM_RESERVE, + PAGE_EXECUTE_READWRITE); + } + + // We couldn't allocate low, try to allocate high + if (pv == NULL) { + i = 1; + // Round up to the next multiple of page granularity + allocation_base = reinterpret_cast<unsigned char*>( + (reinterpret_cast<__int64>(target) & + (~(granularity_ - 1))) + granularity_); + while (pv == NULL) { + __int64 val = reinterpret_cast<__int64>(allocation_base) + + (i * granularity_) - reinterpret_cast<__int64>(target); + if (val > INT_MAX || val < 0) { + // We're too far or we overflowed + break; + } + pv = ::VirtualAlloc(reinterpret_cast<void*>(allocation_base + + (i++ * granularity_)), + pagesize_, MEM_COMMIT | MEM_RESERVE, + PAGE_EXECUTE_READWRITE); + } + } + return pv; +} + +bool PreamblePatcher::IsShortConditionalJump( + unsigned char* target, + unsigned int instruction_size) { + return (*(target) & 0x70) == 0x70 && instruction_size == 2; +} + +bool PreamblePatcher::IsShortJump( + unsigned char* target, + unsigned int instruction_size) { + return target[0] == 0xeb && instruction_size == 2; +} + +bool PreamblePatcher::IsNearConditionalJump( + unsigned char* target, + unsigned int instruction_size) { + return *(target) == 0xf && (*(target + 1) & 0x80) == 0x80 && + instruction_size == 6; +} + +bool PreamblePatcher::IsNearRelativeJump( + unsigned char* target, + unsigned int instruction_size) { + return *(target) == 0xe9 && instruction_size == 5; +} + +bool PreamblePatcher::IsNearAbsoluteCall( + unsigned char* target, + unsigned int instruction_size) { + return *(target) == 0xff && (*(target + 1) & 0x10) == 0x10 && + instruction_size == 6; +} + +bool PreamblePatcher::IsNearRelativeCall( + unsigned char* target, + unsigned int instruction_size) { + return *(target) == 0xe8 && instruction_size == 5; +} + +bool PreamblePatcher::IsMovWithDisplacement( + unsigned char* target, + unsigned int instruction_size) { + // In this case, the ModRM byte's mod field will be 0 and r/m will be 101b (5) + return instruction_size == 7 && *target == 0x48 && *(target + 1) == 0x8b && + (*(target + 2) >> 6) == 0 && (*(target + 2) & 0x7) == 5; +} + +SideStepError PreamblePatcher::PatchShortConditionalJump( + unsigned char* source, + unsigned int instruction_size, + unsigned char* target, + unsigned int* target_bytes, + unsigned int target_size) { + // note: rel8 offset is signed. Thus we need to ask for signed char + // to negative offsets right + unsigned char* original_jump_dest = (source + 2) + static_cast<signed char>(source[1]); + unsigned char* stub_jump_from = target + 6; + __int64 fixup_jump_offset = original_jump_dest - stub_jump_from; + if (fixup_jump_offset > INT_MAX || fixup_jump_offset < INT_MIN) { + SIDESTEP_ASSERT(false && + "Unable to fix up short jump because target" + " is too far away."); + return SIDESTEP_JUMP_INSTRUCTION; + } + + *target_bytes = 6; + if (target_size > *target_bytes) { + // Convert the short jump to a near jump. + // + // 0f 8x xx xx xx xx = Jcc rel32off + unsigned short jmpcode = ((0x80 | (source[0] & 0xf)) << 8) | 0x0f; + memcpy(reinterpret_cast<void*>(target), + reinterpret_cast<void*>(&jmpcode), 2); + memcpy(reinterpret_cast<void*>(target + 2), + reinterpret_cast<void*>(&fixup_jump_offset), 4); + } + + return SIDESTEP_SUCCESS; +} + +SideStepError PreamblePatcher::PatchShortJump( + unsigned char* source, + unsigned int instruction_size, + unsigned char* target, + unsigned int* target_bytes, + unsigned int target_size) { + // note: rel8 offset is _signed_. Thus we need signed char here. + unsigned char* original_jump_dest = (source + 2) + static_cast<signed char>(source[1]); + unsigned char* stub_jump_from = target + 5; + __int64 fixup_jump_offset = original_jump_dest - stub_jump_from; + if (fixup_jump_offset > INT_MAX || fixup_jump_offset < INT_MIN) { + SIDESTEP_ASSERT(false && + "Unable to fix up short jump because target" + " is too far away."); + return SIDESTEP_JUMP_INSTRUCTION; + } + + *target_bytes = 5; + if (target_size > *target_bytes) { + // Convert the short jump to a near jump. + // + // e9 xx xx xx xx = jmp rel32off + target[0] = 0xe9; + memcpy(reinterpret_cast<void*>(target + 1), + reinterpret_cast<void*>(&fixup_jump_offset), 4); + } + + return SIDESTEP_SUCCESS; +} + +SideStepError PreamblePatcher::PatchNearJumpOrCall( + unsigned char* source, + unsigned int instruction_size, + unsigned char* target, + unsigned int* target_bytes, + unsigned int target_size) { + SIDESTEP_ASSERT(instruction_size == 5 || instruction_size == 6); + unsigned int jmp_offset_in_instruction = instruction_size == 5 ? 1 : 2; + unsigned char* original_jump_dest = reinterpret_cast<unsigned char *>( + reinterpret_cast<__int64>(source + instruction_size) + + *(reinterpret_cast<int*>(source + jmp_offset_in_instruction))); + unsigned char* stub_jump_from = target + instruction_size; + __int64 fixup_jump_offset = original_jump_dest - stub_jump_from; + if (fixup_jump_offset > INT_MAX || fixup_jump_offset < INT_MIN) { + SIDESTEP_ASSERT(false && + "Unable to fix up near jump because target" + " is too far away."); + return SIDESTEP_JUMP_INSTRUCTION; + } + + if ((fixup_jump_offset < SCHAR_MAX && fixup_jump_offset > SCHAR_MIN)) { + *target_bytes = 2; + if (target_size > *target_bytes) { + // If the new offset is in range, use a short jump instead of a near jump. + if (source[0] == ASM_JCC32REL_0 && + (source[1] & ASM_JCC32REL_1_MASK) == ASM_JCC32REL_1_MASK) { + unsigned short jmpcode = (static_cast<unsigned char>( + fixup_jump_offset) << 8) | (0x70 | (source[1] & 0xf)); + memcpy(reinterpret_cast<void*>(target), + reinterpret_cast<void*>(&jmpcode), + 2); + } else { + target[0] = ASM_JMP8REL; + target[1] = static_cast<unsigned char>(fixup_jump_offset); + } + } + } else { + *target_bytes = instruction_size; + if (target_size > *target_bytes) { + memcpy(reinterpret_cast<void*>(target), + reinterpret_cast<void*>(source), + jmp_offset_in_instruction); + memcpy(reinterpret_cast<void*>(target + jmp_offset_in_instruction), + reinterpret_cast<void*>(&fixup_jump_offset), + 4); + } + } + + return SIDESTEP_SUCCESS; +} + +SideStepError PreamblePatcher::PatchMovWithDisplacement( + unsigned char* source, + unsigned int instruction_size, + unsigned char* target, + unsigned int* target_bytes, + unsigned int target_size) { + SIDESTEP_ASSERT(instruction_size == 7); + const int mov_offset_in_instruction = 3; // 0x48 0x8b 0x0d <offset> + unsigned char* original_mov_dest = reinterpret_cast<unsigned char*>( + reinterpret_cast<__int64>(source + instruction_size) + + *(reinterpret_cast<int*>(source + mov_offset_in_instruction))); + unsigned char* stub_mov_from = target + instruction_size; + __int64 fixup_mov_offset = original_mov_dest - stub_mov_from; + if (fixup_mov_offset > INT_MAX || fixup_mov_offset < INT_MIN) { + SIDESTEP_ASSERT(false && + "Unable to fix up near MOV because target is too far away."); + return SIDESTEP_UNEXPECTED; + } + *target_bytes = instruction_size; + if (target_size > *target_bytes) { + memcpy(reinterpret_cast<void*>(target), + reinterpret_cast<void*>(source), + mov_offset_in_instruction); + memcpy(reinterpret_cast<void*>(target + mov_offset_in_instruction), + reinterpret_cast<void*>(&fixup_mov_offset), + 4); + } + return SIDESTEP_SUCCESS; +} + +}; // namespace sidestep diff --git a/src/third_party/gperftools-2.5/src/windows/preamble_patcher.h b/src/third_party/gperftools-2.5/src/windows/preamble_patcher.h new file mode 100644 index 00000000000..76f158a19a1 --- /dev/null +++ b/src/third_party/gperftools-2.5/src/windows/preamble_patcher.h @@ -0,0 +1,620 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +/* Copyright (c) 2007, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: Joi Sigurdsson + * Author: Scott Francis + * + * Definition of PreamblePatcher + */ + +#ifndef GOOGLE_PERFTOOLS_PREAMBLE_PATCHER_H_ +#define GOOGLE_PERFTOOLS_PREAMBLE_PATCHER_H_ + +#include "config.h" +#include <windows.h> + +// compatibility shim +#include "base/logging.h" +#define SIDESTEP_ASSERT(cond) RAW_DCHECK(cond, #cond) +#define SIDESTEP_LOG(msg) RAW_VLOG(1, msg) + +// Maximum size of the preamble stub. We overwrite at least the first 5 +// bytes of the function. Considering the worst case scenario, we need 4 +// bytes + the max instruction size + 5 more bytes for our jump back to +// the original code. With that in mind, 32 is a good number :) +#ifdef _M_X64 +// In 64-bit mode we may need more room. In 64-bit mode all jumps must be +// within +/-2GB of RIP. Because of this limitation we may need to use a +// trampoline to jump to the replacement function if it is further than 2GB +// away from the target. The trampoline is 14 bytes. +// +// So 4 bytes + max instruction size (17 bytes) + 5 bytes to jump back to the +// original code + trampoline size. 64 bytes is a nice number :-) +#define MAX_PREAMBLE_STUB_SIZE (64) +#else +#define MAX_PREAMBLE_STUB_SIZE (32) +#endif + +// Determines if this is a 64-bit binary. +#ifdef _M_X64 +static const bool kIs64BitBinary = true; +#else +static const bool kIs64BitBinary = false; +#endif + +namespace sidestep { + +// Possible results of patching/unpatching +enum SideStepError { + SIDESTEP_SUCCESS = 0, + SIDESTEP_INVALID_PARAMETER, + SIDESTEP_INSUFFICIENT_BUFFER, + SIDESTEP_JUMP_INSTRUCTION, + SIDESTEP_FUNCTION_TOO_SMALL, + SIDESTEP_UNSUPPORTED_INSTRUCTION, + SIDESTEP_NO_SUCH_MODULE, + SIDESTEP_NO_SUCH_FUNCTION, + SIDESTEP_ACCESS_DENIED, + SIDESTEP_UNEXPECTED, +}; + +#define SIDESTEP_TO_HRESULT(error) \ + MAKE_HRESULT(SEVERITY_ERROR, FACILITY_NULL, error) + +class DeleteUnsignedCharArray; + +// Implements a patching mechanism that overwrites the first few bytes of +// a function preamble with a jump to our hook function, which is then +// able to call the original function via a specially-made preamble-stub +// that imitates the action of the original preamble. +// +// NOTE: This patching mechanism should currently only be used for +// non-production code, e.g. unit tests, because it is not threadsafe. +// See the TODO in preamble_patcher_with_stub.cc for instructions on what +// we need to do before using it in production code; it's fairly simple +// but unnecessary for now since we only intend to use it in unit tests. +// +// To patch a function, use either of the typesafe Patch() methods. You +// can unpatch a function using Unpatch(). +// +// Typical usage goes something like this: +// @code +// typedef int (*MyTypesafeFuncPtr)(int x); +// MyTypesafeFuncPtr original_func_stub; +// int MyTypesafeFunc(int x) { return x + 1; } +// int HookMyTypesafeFunc(int x) { return 1 + original_func_stub(x); } +// +// void MyPatchInitializingFunction() { +// original_func_stub = PreamblePatcher::Patch( +// MyTypesafeFunc, HookMyTypesafeFunc); +// if (!original_func_stub) { +// // ... error handling ... +// } +// +// // ... continue - you have patched the function successfully ... +// } +// @endcode +// +// Note that there are a number of ways that this method of patching can +// fail. The most common are: +// - If there is a jump (jxx) instruction in the first 5 bytes of +// the function being patched, we cannot patch it because in the +// current implementation we do not know how to rewrite relative +// jumps after relocating them to the preamble-stub. Note that +// if you really really need to patch a function like this, it +// would be possible to add this functionality (but at some cost). +// - If there is a return (ret) instruction in the first 5 bytes +// we cannot patch the function because it may not be long enough +// for the jmp instruction we use to inject our patch. +// - If there is another thread currently executing within the bytes +// that are copied to the preamble stub, it will crash in an undefined +// way. +// +// If you get any other error than the above, you're either pointing the +// patcher at an invalid instruction (e.g. into the middle of a multi- +// byte instruction, or not at memory containing executable instructions) +// or, there may be a bug in the disassembler we use to find +// instruction boundaries. +// +// NOTE: In optimized builds, when you have very trivial functions that +// the compiler can reason do not have side effects, the compiler may +// reuse the result of calling the function with a given parameter, which +// may mean if you patch the function in between your patch will never get +// invoked. See preamble_patcher_test.cc for an example. +class PERFTOOLS_DLL_DECL PreamblePatcher { + public: + + // This is a typesafe version of RawPatch(), identical in all other + // ways than it takes a template parameter indicating the type of the + // function being patched. + // + // @param T The type of the function you are patching. Usually + // you will establish this type using a typedef, as in the following + // example: + // @code + // typedef BOOL (WINAPI *MessageBoxPtr)(HWND, LPCTSTR, LPCTSTR, UINT); + // MessageBoxPtr original = NULL; + // PreamblePatcher::Patch(MessageBox, Hook_MessageBox, &original); + // @endcode + template <class T> + static SideStepError Patch(T target_function, + T replacement_function, + T* original_function_stub) { + // NOTE: casting from a function to a pointer is contra the C++ + // spec. It's not safe on IA64, but is on i386. We use + // a C-style cast here to emphasize this is not legal C++. + return RawPatch((void*)(target_function), + (void*)(replacement_function), + (void**)(original_function_stub)); + } + + // Patches a named function imported from the named module using + // preamble patching. Uses RawPatch() to do the actual patching + // work. + // + // @param T The type of the function you are patching. Must + // exactly match the function you specify using module_name and + // function_name. + // + // @param module_name The name of the module from which the function + // is being imported. Note that the patch will fail if this module + // has not already been loaded into the current process. + // + // @param function_name The name of the function you wish to patch. + // + // @param replacement_function Your replacement function which + // will be called whenever code tries to call the original function. + // + // @param original_function_stub Pointer to memory that should receive a + // pointer that can be used (e.g. in the replacement function) to call the + // original function, or NULL to indicate failure. + // + // @return One of the EnSideStepError error codes; only SIDESTEP_SUCCESS + // indicates success. + template <class T> + static SideStepError Patch(LPCTSTR module_name, + LPCSTR function_name, + T replacement_function, + T* original_function_stub) { + SIDESTEP_ASSERT(module_name && function_name); + if (!module_name || !function_name) { + SIDESTEP_ASSERT(false && + "You must specify a module name and function name."); + return SIDESTEP_INVALID_PARAMETER; + } + HMODULE module = ::GetModuleHandle(module_name); + SIDESTEP_ASSERT(module != NULL); + if (!module) { + SIDESTEP_ASSERT(false && "Invalid module name."); + return SIDESTEP_NO_SUCH_MODULE; + } + FARPROC existing_function = ::GetProcAddress(module, function_name); + if (!existing_function) { + SIDESTEP_ASSERT( + false && "Did not find any function with that name in the module."); + return SIDESTEP_NO_SUCH_FUNCTION; + } + // NOTE: casting from a function to a pointer is contra the C++ + // spec. It's not safe on IA64, but is on i386. We use + // a C-style cast here to emphasize this is not legal C++. + return RawPatch((void*)existing_function, (void*)replacement_function, + (void**)(original_function_stub)); + } + + // Patches a function by overwriting its first few bytes with + // a jump to a different function. This is the "worker" function + // for each of the typesafe Patch() functions. In most cases, + // it is preferable to use the Patch() functions rather than + // this one as they do more checking at compile time. + // + // @param target_function A pointer to the function that should be + // patched. + // + // @param replacement_function A pointer to the function that should + // replace the target function. The replacement function must have + // exactly the same calling convention and parameters as the original + // function. + // + // @param original_function_stub Pointer to memory that should receive a + // pointer that can be used (e.g. in the replacement function) to call the + // original function, or NULL to indicate failure. + // + // @param original_function_stub Pointer to memory that should receive a + // pointer that can be used (e.g. in the replacement function) to call the + // original function, or NULL to indicate failure. + // + // @return One of the EnSideStepError error codes; only SIDESTEP_SUCCESS + // indicates success. + // + // @note The preamble-stub (the memory pointed to by + // *original_function_stub) is allocated on the heap, and (in + // production binaries) never destroyed, resulting in a memory leak. This + // will be the case until we implement safe unpatching of a method. + // However, it is quite difficult to unpatch a method (because other + // threads in the process may be using it) so we are leaving it for now. + // See however UnsafeUnpatch, which can be used for binaries where you + // know only one thread is running, e.g. unit tests. + static SideStepError RawPatch(void* target_function, + void* replacement_function, + void** original_function_stub); + + // Unpatches target_function and deletes the stub that previously could be + // used to call the original version of the function. + // + // DELETES the stub that is passed to the function. + // + // @param target_function Pointer to the target function which was + // previously patched, i.e. a pointer which value should match the value + // of the symbol prior to patching it. + // + // @param replacement_function Pointer to the function target_function + // was patched to. + // + // @param original_function_stub Pointer to the stub returned when + // patching, that could be used to call the original version of the + // patched function. This function will also delete the stub, which after + // unpatching is useless. + // + // If your original call was + // Patch(VirtualAlloc, MyVirtualAlloc, &origptr) + // then to undo it you would call + // Unpatch(VirtualAlloc, MyVirtualAlloc, origptr); + // + // @return One of the EnSideStepError error codes; only SIDESTEP_SUCCESS + // indicates success. + static SideStepError Unpatch(void* target_function, + void* replacement_function, + void* original_function_stub); + + // A helper routine when patching, which follows jmp instructions at + // function addresses, to get to the "actual" function contents. + // This allows us to identify two functions that are at different + // addresses but actually resolve to the same code. + // + // @param target_function Pointer to a function. + // + // @return Either target_function (the input parameter), or if + // target_function's body consists entirely of a JMP instruction, + // the address it JMPs to (or more precisely, the address at the end + // of a chain of JMPs). + template <class T> + static T ResolveTarget(T target_function) { + return (T)ResolveTargetImpl((unsigned char*)target_function, NULL); + } + + // Allocates a block of memory of size MAX_PREAMBLE_STUB_SIZE that is as + // close (within 2GB) as possible to target. This is done to ensure that + // we can perform a relative jump from target to a trampoline if the + // replacement function is > +-2GB from target. This means that we only need + // to patch 5 bytes in the target function. + // + // @param target Pointer to target function. + // + // @return Returns a block of memory of size MAX_PREAMBLE_STUB_SIZE that can + // be used to store a function preamble block. + static unsigned char* AllocPreambleBlockNear(void* target); + + // Frees a block allocated by AllocPreambleBlockNear. + // + // @param block Block that was returned by AllocPreambleBlockNear. + static void FreePreambleBlock(unsigned char* block); + + private: + friend class DeleteUnsignedCharArray; + + // Used to store data allocated for preamble stubs + struct PreamblePage { + unsigned int magic_; + PreamblePage* next_; + // This member points to a linked list of free blocks within the page + // or NULL if at the end + void* free_; + }; + + // In 64-bit mode, the replacement function must be within 2GB of the original + // target in order to only require 5 bytes for the function patch. To meet + // this requirement we're creating an allocator within this class to + // allocate blocks that are within 2GB of a given target. This member is the + // head of a linked list of pages used to allocate blocks that are within + // 2GB of the target. + static PreamblePage* preamble_pages_; + + // Page granularity + static long granularity_; + + // Page size + static long pagesize_; + + // Determines if the patcher has been initialized. + static bool initialized_; + + // Used to initialize static members. + static void Initialize(); + + // Patches a function by overwriting its first few bytes with + // a jump to a different function. This is similar to the RawPatch + // function except that it uses the stub allocated by the caller + // instead of allocating it. + // + // We call VirtualProtect to make the + // target function writable at least for the duration of the call. + // + // @param target_function A pointer to the function that should be + // patched. + // + // @param replacement_function A pointer to the function that should + // replace the target function. The replacement function must have + // exactly the same calling convention and parameters as the original + // function. + // + // @param preamble_stub A pointer to a buffer where the preamble stub + // should be copied. The size of the buffer should be sufficient to + // hold the preamble bytes. + // + // @param stub_size Size in bytes of the buffer allocated for the + // preamble_stub + // + // @param bytes_needed Pointer to a variable that receives the minimum + // number of bytes required for the stub. Can be set to NULL if you're + // not interested. + // + // @return An error code indicating the result of patching. + static SideStepError RawPatchWithStubAndProtections( + void* target_function, + void* replacement_function, + unsigned char* preamble_stub, + unsigned long stub_size, + unsigned long* bytes_needed); + + // A helper function used by RawPatchWithStubAndProtections -- it + // does everything but the VirtualProtect work. Defined in + // preamble_patcher_with_stub.cc. + // + // @param target_function A pointer to the function that should be + // patched. + // + // @param replacement_function A pointer to the function that should + // replace the target function. The replacement function must have + // exactly the same calling convention and parameters as the original + // function. + // + // @param preamble_stub A pointer to a buffer where the preamble stub + // should be copied. The size of the buffer should be sufficient to + // hold the preamble bytes. + // + // @param stub_size Size in bytes of the buffer allocated for the + // preamble_stub + // + // @param bytes_needed Pointer to a variable that receives the minimum + // number of bytes required for the stub. Can be set to NULL if you're + // not interested. + // + // @return An error code indicating the result of patching. + static SideStepError RawPatchWithStub(void* target_function, + void* replacement_function, + unsigned char* preamble_stub, + unsigned long stub_size, + unsigned long* bytes_needed); + + + // A helper routine when patching, which follows jmp instructions at + // function addresses, to get to the "actual" function contents. + // This allows us to identify two functions that are at different + // addresses but actually resolve to the same code. + // + // @param target_function Pointer to a function. + // + // @param stop_before If, when following JMP instructions from + // target_function, we get to the address stop, we return + // immediately, the address that jumps to stop_before. + // + // @param stop_before_trampoline When following JMP instructions from + // target_function, stop before a trampoline is detected. See comment in + // PreamblePatcher::RawPatchWithStub for more information. This parameter + // has no effect in 32-bit mode. + // + // @return Either target_function (the input parameter), or if + // target_function's body consists entirely of a JMP instruction, + // the address it JMPs to (or more precisely, the address at the end + // of a chain of JMPs). + static void* ResolveTargetImpl(unsigned char* target_function, + unsigned char* stop_before, + bool stop_before_trampoline = false); + + // Helper routine that attempts to allocate a page as close (within 2GB) + // as possible to target. + // + // @param target Pointer to target function. + // + // @return Returns an address that is within 2GB of target. + static void* AllocPageNear(void* target); + + // Helper routine that determines if a target instruction is a short + // conditional jump. + // + // @param target Pointer to instruction. + // + // @param instruction_size Size of the instruction in bytes. + // + // @return Returns true if the instruction is a short conditional jump. + static bool IsShortConditionalJump(unsigned char* target, + unsigned int instruction_size); + + static bool IsShortJump(unsigned char *target, unsigned int instruction_size); + + // Helper routine that determines if a target instruction is a near + // conditional jump. + // + // @param target Pointer to instruction. + // + // @param instruction_size Size of the instruction in bytes. + // + // @return Returns true if the instruction is a near conditional jump. + static bool IsNearConditionalJump(unsigned char* target, + unsigned int instruction_size); + + // Helper routine that determines if a target instruction is a near + // relative jump. + // + // @param target Pointer to instruction. + // + // @param instruction_size Size of the instruction in bytes. + // + // @return Returns true if the instruction is a near absolute jump. + static bool IsNearRelativeJump(unsigned char* target, + unsigned int instruction_size); + + // Helper routine that determines if a target instruction is a near + // absolute call. + // + // @param target Pointer to instruction. + // + // @param instruction_size Size of the instruction in bytes. + // + // @return Returns true if the instruction is a near absolute call. + static bool IsNearAbsoluteCall(unsigned char* target, + unsigned int instruction_size); + + // Helper routine that determines if a target instruction is a near + // absolute call. + // + // @param target Pointer to instruction. + // + // @param instruction_size Size of the instruction in bytes. + // + // @return Returns true if the instruction is a near absolute call. + static bool IsNearRelativeCall(unsigned char* target, + unsigned int instruction_size); + + // Helper routine that determines if a target instruction is a 64-bit MOV + // that uses a RIP-relative displacement. + // + // @param target Pointer to instruction. + // + // @param instruction_size Size of the instruction in bytes. + // + // @return Returns true if the instruction is a MOV with displacement. + static bool IsMovWithDisplacement(unsigned char* target, + unsigned int instruction_size); + + // Helper routine that converts a short conditional jump instruction + // to a near conditional jump in a target buffer. Note that the target + // buffer must be within 2GB of the source for the near jump to work. + // + // A short conditional jump instruction is in the format: + // 7x xx = Jcc rel8off + // + // @param source Pointer to instruction. + // + // @param instruction_size Size of the instruction. + // + // @param target Target buffer to write the new instruction. + // + // @param target_bytes Pointer to a buffer that contains the size + // of the target instruction, in bytes. + // + // @param target_size Size of the target buffer. + // + // @return Returns SIDESTEP_SUCCESS if successful, otherwise an error. + static SideStepError PatchShortConditionalJump(unsigned char* source, + unsigned int instruction_size, + unsigned char* target, + unsigned int* target_bytes, + unsigned int target_size); + + static SideStepError PatchShortJump(unsigned char* source, + unsigned int instruction_size, + unsigned char* target, + unsigned int* target_bytes, + unsigned int target_size); + + // Helper routine that converts an instruction that will convert various + // jump-like instructions to corresponding instructions in the target buffer. + // What this routine does is fix up the relative offsets contained in jump + // instructions to point back to the original target routine. Like with + // PatchShortConditionalJump, the target buffer must be within 2GB of the + // source. + // + // We currently handle the following instructions: + // + // E9 xx xx xx xx = JMP rel32off + // 0F 8x xx xx xx xx = Jcc rel32off + // FF /2 xx xx xx xx = CALL reg/mem32/mem64 + // E8 xx xx xx xx = CALL rel32off + // + // It should not be hard to update this function to support other + // instructions that jump to relative targets. + // + // @param source Pointer to instruction. + // + // @param instruction_size Size of the instruction. + // + // @param target Target buffer to write the new instruction. + // + // @param target_bytes Pointer to a buffer that contains the size + // of the target instruction, in bytes. + // + // @param target_size Size of the target buffer. + // + // @return Returns SIDESTEP_SUCCESS if successful, otherwise an error. + static SideStepError PatchNearJumpOrCall(unsigned char* source, + unsigned int instruction_size, + unsigned char* target, + unsigned int* target_bytes, + unsigned int target_size); + + // Helper routine that patches a 64-bit MOV instruction with a RIP-relative + // displacement. The target buffer must be within 2GB of the source. + // + // 48 8B 0D XX XX XX XX = MOV rel32off + // + // @param source Pointer to instruction. + // + // @param instruction_size Size of the instruction. + // + // @param target Target buffer to write the new instruction. + // + // @param target_bytes Pointer to a buffer that contains the size + // of the target instruction, in bytes. + // + // @param target_size Size of the target buffer. + // + // @return Returns SIDESTEP_SUCCESS if successful, otherwise an error. + static SideStepError PatchMovWithDisplacement(unsigned char* source, + unsigned int instruction_size, + unsigned char* target, + unsigned int* target_bytes, + unsigned int target_size); +}; + +}; // namespace sidestep + +#endif // GOOGLE_PERFTOOLS_PREAMBLE_PATCHER_H_ diff --git a/src/third_party/gperftools-2.5/src/windows/preamble_patcher_test.cc b/src/third_party/gperftools-2.5/src/windows/preamble_patcher_test.cc new file mode 100644 index 00000000000..e4605c6fb86 --- /dev/null +++ b/src/third_party/gperftools-2.5/src/windows/preamble_patcher_test.cc @@ -0,0 +1,368 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +/* Copyright (c) 2011, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: Joi Sigurdsson + * Author: Scott Francis + * + * Unit tests for PreamblePatcher + */ + +#include "config_for_unittests.h" +#include "preamble_patcher.h" +#include "mini_disassembler.h" +#pragma warning(push) +#pragma warning(disable:4553) +#include "auto_testing_hook.h" +#pragma warning(pop) + +#define WIN32_LEAN_AND_MEAN +#include <windows.h> +#include <tchar.h> + +// Turning off all optimizations for this file, since the official build's +// "Whole program optimization" seems to cause the TestPatchUsingDynamicStub +// test to crash with an access violation. We debugged this and found +// that the optimized access a register that is changed by a call to the hook +// function. +#pragma optimize("", off) + +// A convenience macro to avoid a lot of casting in the tests. +// I tried to make this a templated function, but windows complained: +// error C2782: 'sidestep::SideStepError `anonymous-namespace'::Unpatch(T,T,T *)' : template parameter 'T' is ambiguous +// could be 'int (int)' +// or 'int (__cdecl *)(int)' +// My life isn't long enough to try to figure out how to fix this. +#define UNPATCH(target_function, replacement_function, original_function_stub) \ + sidestep::PreamblePatcher::Unpatch((void*)(target_function), \ + (void*)(replacement_function), \ + (void*)(original_function)) + +namespace { + +// Function for testing - this is what we patch +// +// NOTE: Because of the way the compiler optimizes this function in +// release builds, we need to use a different input value every time we +// call it within a function, otherwise the compiler will just reuse the +// last calculated incremented value. +int __declspec(noinline) IncrementNumber(int i) { +#ifdef _M_X64 + __int64 i2 = i + 1; + return (int) i2; +#else + return i + 1; +#endif +} + +extern "C" int TooShortFunction(int); + +extern "C" int JumpShortCondFunction(int); + +extern "C" int JumpNearCondFunction(int); + +extern "C" int JumpAbsoluteFunction(int); + +extern "C" int CallNearRelativeFunction(int); + +typedef int (*IncrementingFunc)(int); +IncrementingFunc original_function = NULL; + +int HookIncrementNumber(int i) { + SIDESTEP_ASSERT(original_function != NULL); + int incremented_once = original_function(i); + return incremented_once + 1; +} + +// For the AutoTestingHook test, we can't use original_function, because +// all that is encapsulated. +// This function "increments" by 10, just to set it apart from the other +// functions. +int __declspec(noinline) AutoHookIncrementNumber(int i) { + return i + 10; +} + +}; // namespace + +namespace sidestep { + +bool TestDisassembler() { + unsigned int instruction_size = 0; + sidestep::MiniDisassembler disassembler; + void * target = reinterpret_cast<unsigned char *>(IncrementNumber); + void * new_target = PreamblePatcher::ResolveTarget(target); + if (target != new_target) + target = new_target; + + while (1) { + sidestep::InstructionType instructionType = disassembler.Disassemble( + reinterpret_cast<unsigned char *>(target) + instruction_size, + instruction_size); + if (sidestep::IT_RETURN == instructionType) { + return true; + } + } +} + +bool TestPatchWithLongJump() { + original_function = NULL; + void *p = ::VirtualAlloc(reinterpret_cast<void *>(0x0000020000000000), 4096, + MEM_RESERVE | MEM_COMMIT, PAGE_EXECUTE_READWRITE); + SIDESTEP_EXPECT_TRUE(p != NULL); + memset(p, 0xcc, 4096); + SIDESTEP_EXPECT_TRUE(sidestep::SIDESTEP_SUCCESS == + sidestep::PreamblePatcher::Patch(IncrementNumber, + (IncrementingFunc) p, + &original_function)); + SIDESTEP_ASSERT((*original_function)(1) == 2); + SIDESTEP_EXPECT_TRUE(sidestep::SIDESTEP_SUCCESS == + UNPATCH(IncrementNumber, + (IncrementingFunc)p, + original_function)); + ::VirtualFree(p, 0, MEM_RELEASE); + return true; +} + +bool TestPatchWithPreambleShortCondJump() { + original_function = NULL; + SIDESTEP_EXPECT_TRUE(sidestep::SIDESTEP_SUCCESS == + sidestep::PreamblePatcher::Patch(JumpShortCondFunction, + HookIncrementNumber, + &original_function)); + (*original_function)(1); + SIDESTEP_EXPECT_TRUE(sidestep::SIDESTEP_SUCCESS == + UNPATCH(JumpShortCondFunction, + (void*)HookIncrementNumber, + original_function)); + return true; +} + +bool TestPatchWithPreambleNearRelativeCondJump() { + original_function = NULL; + SIDESTEP_EXPECT_TRUE(sidestep::SIDESTEP_SUCCESS == + sidestep::PreamblePatcher::Patch(JumpNearCondFunction, + HookIncrementNumber, + &original_function)); + (*original_function)(0); + (*original_function)(1); + SIDESTEP_EXPECT_TRUE(sidestep::SIDESTEP_SUCCESS == + UNPATCH(JumpNearCondFunction, + HookIncrementNumber, + original_function)); + return true; +} + +bool TestPatchWithPreambleAbsoluteJump() { + original_function = NULL; + SIDESTEP_EXPECT_TRUE(sidestep::SIDESTEP_SUCCESS == + sidestep::PreamblePatcher::Patch(JumpAbsoluteFunction, + HookIncrementNumber, + &original_function)); + (*original_function)(0); + (*original_function)(1); + SIDESTEP_EXPECT_TRUE(sidestep::SIDESTEP_SUCCESS == + UNPATCH(JumpAbsoluteFunction, + HookIncrementNumber, + original_function)); + return true; +} + +bool TestPatchWithPreambleNearRelativeCall() { + original_function = NULL; + SIDESTEP_EXPECT_TRUE(sidestep::SIDESTEP_SUCCESS == + sidestep::PreamblePatcher::Patch( + CallNearRelativeFunction, + HookIncrementNumber, + &original_function)); + (*original_function)(0); + (*original_function)(1); + SIDESTEP_EXPECT_TRUE(sidestep::SIDESTEP_SUCCESS == + UNPATCH(CallNearRelativeFunction, + HookIncrementNumber, + original_function)); + return true; +} + +bool TestPatchUsingDynamicStub() { + original_function = NULL; + SIDESTEP_EXPECT_TRUE(IncrementNumber(1) == 2); + SIDESTEP_EXPECT_TRUE(sidestep::SIDESTEP_SUCCESS == + sidestep::PreamblePatcher::Patch(IncrementNumber, + HookIncrementNumber, + &original_function)); + SIDESTEP_EXPECT_TRUE(original_function); + SIDESTEP_EXPECT_TRUE(IncrementNumber(2) == 4); + SIDESTEP_EXPECT_TRUE(original_function(3) == 4); + + // Clearbox test to see that the function has been patched. + sidestep::MiniDisassembler disassembler; + unsigned int instruction_size = 0; + SIDESTEP_EXPECT_TRUE(sidestep::IT_JUMP == disassembler.Disassemble( + reinterpret_cast<unsigned char*>(IncrementNumber), + instruction_size)); + + // Since we patched IncrementNumber, its first statement is a + // jmp to the hook function. So verify that we now can not patch + // IncrementNumber because it starts with a jump. +#if 0 + IncrementingFunc dummy = NULL; + // TODO(joi@chromium.org): restore this test once flag is added to + // disable JMP following + SIDESTEP_EXPECT_TRUE(sidestep::SIDESTEP_JUMP_INSTRUCTION == + sidestep::PreamblePatcher::Patch(IncrementNumber, + HookIncrementNumber, + &dummy)); + + // This test disabled because code in preamble_patcher_with_stub.cc + // asserts before returning the error code -- so there is no way + // to get an error code here, in debug build. + dummy = NULL; + SIDESTEP_EXPECT_TRUE(sidestep::SIDESTEP_FUNCTION_TOO_SMALL == + sidestep::PreamblePatcher::Patch(TooShortFunction, + HookIncrementNumber, + &dummy)); +#endif + + SIDESTEP_EXPECT_TRUE(sidestep::SIDESTEP_SUCCESS == + UNPATCH(IncrementNumber, + HookIncrementNumber, + original_function)); + return true; +} + +bool PatchThenUnpatch() { + original_function = NULL; + SIDESTEP_EXPECT_TRUE(sidestep::SIDESTEP_SUCCESS == + sidestep::PreamblePatcher::Patch(IncrementNumber, + HookIncrementNumber, + &original_function)); + SIDESTEP_EXPECT_TRUE(original_function); + SIDESTEP_EXPECT_TRUE(IncrementNumber(1) == 3); + SIDESTEP_EXPECT_TRUE(original_function(2) == 3); + + SIDESTEP_EXPECT_TRUE(sidestep::SIDESTEP_SUCCESS == + UNPATCH(IncrementNumber, + HookIncrementNumber, + original_function)); + original_function = NULL; + SIDESTEP_EXPECT_TRUE(IncrementNumber(3) == 4); + + return true; +} + +bool AutoTestingHookTest() { + SIDESTEP_EXPECT_TRUE(IncrementNumber(1) == 2); + + // Inner scope, so we can test what happens when the AutoTestingHook + // goes out of scope + { + AutoTestingHook hook = MakeTestingHook(IncrementNumber, + AutoHookIncrementNumber); + (void) hook; + SIDESTEP_EXPECT_TRUE(IncrementNumber(2) == 12); + } + SIDESTEP_EXPECT_TRUE(IncrementNumber(3) == 4); + + return true; +} + +bool AutoTestingHookInContainerTest() { + SIDESTEP_EXPECT_TRUE(IncrementNumber(1) == 2); + + // Inner scope, so we can test what happens when the AutoTestingHook + // goes out of scope + { + AutoTestingHookHolder hook(MakeTestingHookHolder(IncrementNumber, + AutoHookIncrementNumber)); + (void) hook; + SIDESTEP_EXPECT_TRUE(IncrementNumber(2) == 12); + } + SIDESTEP_EXPECT_TRUE(IncrementNumber(3) == 4); + + return true; +} + +bool TestPreambleAllocation() { + __int64 diff = 0; + void* p1 = reinterpret_cast<void*>(0x110000000); + void* p2 = reinterpret_cast<void*>(0x810000000); + unsigned char* b1 = PreamblePatcher::AllocPreambleBlockNear(p1); + SIDESTEP_EXPECT_TRUE(b1 != NULL); + diff = reinterpret_cast<__int64>(p1) - reinterpret_cast<__int64>(b1); + // Ensure blocks are within 2GB + SIDESTEP_EXPECT_TRUE(diff <= INT_MAX && diff >= INT_MIN); + unsigned char* b2 = PreamblePatcher::AllocPreambleBlockNear(p2); + SIDESTEP_EXPECT_TRUE(b2 != NULL); + diff = reinterpret_cast<__int64>(p2) - reinterpret_cast<__int64>(b2); + SIDESTEP_EXPECT_TRUE(diff <= INT_MAX && diff >= INT_MIN); + + // Ensure we're reusing free blocks + unsigned char* b3 = b1; + unsigned char* b4 = b2; + PreamblePatcher::FreePreambleBlock(b1); + PreamblePatcher::FreePreambleBlock(b2); + b1 = PreamblePatcher::AllocPreambleBlockNear(p1); + SIDESTEP_EXPECT_TRUE(b1 == b3); + b2 = PreamblePatcher::AllocPreambleBlockNear(p2); + SIDESTEP_EXPECT_TRUE(b2 == b4); + PreamblePatcher::FreePreambleBlock(b1); + PreamblePatcher::FreePreambleBlock(b2); + + return true; +} + +bool UnitTests() { + return TestPatchWithPreambleNearRelativeCall() && + TestPatchWithPreambleAbsoluteJump() && + TestPatchWithPreambleNearRelativeCondJump() && + TestPatchWithPreambleShortCondJump() && + TestDisassembler() && TestPatchWithLongJump() && + TestPatchUsingDynamicStub() && PatchThenUnpatch() && + AutoTestingHookTest() && AutoTestingHookInContainerTest() && + TestPreambleAllocation(); +} + +}; // namespace sidestep + +int safe_vsnprintf(char *str, size_t size, const char *format, va_list ap) { + if (size == 0) // not even room for a \0? + return -1; // not what C99 says to do, but what windows does + str[size-1] = '\0'; + return _vsnprintf(str, size-1, format, ap); +} + +int _tmain(int argc, _TCHAR* argv[]) +{ + bool ret = sidestep::UnitTests(); + printf("%s\n", ret ? "PASS" : "FAIL"); + return ret ? 0 : -1; +} + +#pragma optimize("", on) diff --git a/src/third_party/gperftools-2.5/src/windows/preamble_patcher_with_stub.cc b/src/third_party/gperftools-2.5/src/windows/preamble_patcher_with_stub.cc new file mode 100644 index 00000000000..23f9d3a0823 --- /dev/null +++ b/src/third_party/gperftools-2.5/src/windows/preamble_patcher_with_stub.cc @@ -0,0 +1,302 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +/* Copyright (c) 2007, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: Joi Sigurdsson + * Author: Scott Francis + * + * Implementation of PreamblePatcher + */ + +#include "preamble_patcher.h" + +#include "mini_disassembler.h" + +// Definitions of assembly statements we need +#define ASM_JMP32REL 0xE9 +#define ASM_INT3 0xCC +#define ASM_NOP 0x90 +// X64 opcodes +#define ASM_MOVRAX_IMM 0xB8 +#define ASM_REXW 0x48 +#define ASM_JMP 0xFF +#define ASM_JMP_RAX 0xE0 +#define ASM_PUSH 0x68 +#define ASM_RET 0xC3 + +namespace sidestep { + +SideStepError PreamblePatcher::RawPatchWithStub( + void* target_function, + void* replacement_function, + unsigned char* preamble_stub, + unsigned long stub_size, + unsigned long* bytes_needed) { + if ((NULL == target_function) || + (NULL == replacement_function) || + (NULL == preamble_stub)) { + SIDESTEP_ASSERT(false && + "Invalid parameters - either pTargetFunction or " + "pReplacementFunction or pPreambleStub were NULL."); + return SIDESTEP_INVALID_PARAMETER; + } + + // TODO(V7:joi) Siggi and I just had a discussion and decided that both + // patching and unpatching are actually unsafe. We also discussed a + // method of making it safe, which is to freeze all other threads in the + // process, check their thread context to see if their eip is currently + // inside the block of instructions we need to copy to the stub, and if so + // wait a bit and try again, then unfreeze all threads once we've patched. + // Not implementing this for now since we're only using SideStep for unit + // testing, but if we ever use it for production code this is what we + // should do. + // + // NOTE: Stoyan suggests we can write 8 or even 10 bytes atomically using + // FPU instructions, and on newer processors we could use cmpxchg8b or + // cmpxchg16b. So it might be possible to do the patching/unpatching + // atomically and avoid having to freeze other threads. Note though, that + // doing it atomically does not help if one of the other threads happens + // to have its eip in the middle of the bytes you change while you change + // them. + unsigned char* target = reinterpret_cast<unsigned char*>(target_function); + unsigned int required_trampoline_bytes = 0; + const unsigned int kRequiredStubJumpBytes = 5; + const unsigned int kRequiredTargetPatchBytes = 5; + + // Initialize the stub with INT3's just in case. + if (stub_size) { + memset(preamble_stub, 0xcc, stub_size); + } + if (kIs64BitBinary) { + // In 64-bit mode JMP instructions are always relative to RIP. If the + // replacement - target offset is > 2GB, we can't JMP to the replacement + // function. In this case, we're going to use a trampoline - that is, + // we're going to do a relative jump to a small chunk of code in the stub + // that will then do the absolute jump to the replacement function. By + // doing this, we only need to patch 5 bytes in the target function, as + // opposed to patching 12 bytes if we were to do an absolute jump. + // + // Note that the first byte of the trampoline is a NOP instruction. This + // is used as a trampoline signature that will be detected when unpatching + // the function. + // + // jmp <trampoline> + // + // trampoline: + // nop + // mov rax, <replacement_function> + // jmp rax + // + __int64 replacement_target_offset = reinterpret_cast<__int64>( + replacement_function) - reinterpret_cast<__int64>(target) - 5; + if (replacement_target_offset > INT_MAX + || replacement_target_offset < INT_MIN) { + // The stub needs to be within 2GB of the target for the trampoline to + // work! + __int64 trampoline_offset = reinterpret_cast<__int64>(preamble_stub) + - reinterpret_cast<__int64>(target) - 5; + if (trampoline_offset > INT_MAX || trampoline_offset < INT_MIN) { + // We're screwed. + SIDESTEP_ASSERT(false + && "Preamble stub is too far from target to patch."); + return SIDESTEP_UNEXPECTED; + } + required_trampoline_bytes = 13; + } + } + + // Let's disassemble the preamble of the target function to see if we can + // patch, and to see how much of the preamble we need to take. We need 5 + // bytes for our jmp instruction, so let's find the minimum number of + // instructions to get 5 bytes. + MiniDisassembler disassembler; + unsigned int preamble_bytes = 0; + unsigned int stub_bytes = 0; + while (preamble_bytes < kRequiredTargetPatchBytes) { + unsigned int cur_bytes = 0; + InstructionType instruction_type = + disassembler.Disassemble(target + preamble_bytes, cur_bytes); + if (IT_JUMP == instruction_type) { + unsigned int jump_bytes = 0; + SideStepError jump_ret = SIDESTEP_JUMP_INSTRUCTION; + if (IsShortConditionalJump(target + preamble_bytes, cur_bytes)) { + jump_ret = PatchShortConditionalJump(target + preamble_bytes, cur_bytes, + preamble_stub + stub_bytes, + &jump_bytes, + stub_size - stub_bytes); + } else if (IsShortJump(target + preamble_bytes, cur_bytes)) { + jump_ret = PatchShortJump(target + preamble_bytes, cur_bytes, + preamble_stub + stub_bytes, + &jump_bytes, + stub_size - stub_bytes); + } else if (IsNearConditionalJump(target + preamble_bytes, cur_bytes) || + IsNearRelativeJump(target + preamble_bytes, cur_bytes) || + IsNearAbsoluteCall(target + preamble_bytes, cur_bytes) || + IsNearRelativeCall(target + preamble_bytes, cur_bytes)) { + jump_ret = PatchNearJumpOrCall(target + preamble_bytes, cur_bytes, + preamble_stub + stub_bytes, &jump_bytes, + stub_size - stub_bytes); + } + if (jump_ret != SIDESTEP_SUCCESS) { + SIDESTEP_ASSERT(false && + "Unable to patch because there is an unhandled branch " + "instruction in the initial preamble bytes."); + return SIDESTEP_JUMP_INSTRUCTION; + } + stub_bytes += jump_bytes; + } else if (IT_RETURN == instruction_type) { + SIDESTEP_ASSERT(false && + "Unable to patch because function is too short"); + return SIDESTEP_FUNCTION_TOO_SMALL; + } else if (IT_GENERIC == instruction_type) { + if (IsMovWithDisplacement(target + preamble_bytes, cur_bytes)) { + unsigned int mov_bytes = 0; + if (PatchMovWithDisplacement(target + preamble_bytes, cur_bytes, + preamble_stub + stub_bytes, &mov_bytes, + stub_size - stub_bytes) + != SIDESTEP_SUCCESS) { + return SIDESTEP_UNSUPPORTED_INSTRUCTION; + } + stub_bytes += mov_bytes; + } else { + memcpy(reinterpret_cast<void*>(preamble_stub + stub_bytes), + reinterpret_cast<void*>(target + preamble_bytes), cur_bytes); + stub_bytes += cur_bytes; + } + } else { + SIDESTEP_ASSERT(false && + "Disassembler encountered unsupported instruction " + "(either unused or unknown"); + return SIDESTEP_UNSUPPORTED_INSTRUCTION; + } + preamble_bytes += cur_bytes; + } + + if (NULL != bytes_needed) + *bytes_needed = stub_bytes + kRequiredStubJumpBytes + + required_trampoline_bytes; + + // Inv: cbPreamble is the number of bytes (at least 5) that we need to take + // from the preamble to have whole instructions that are 5 bytes or more + // in size total. The size of the stub required is cbPreamble + + // kRequiredStubJumpBytes (5) + required_trampoline_bytes (0 or 13) + if (stub_bytes + kRequiredStubJumpBytes + required_trampoline_bytes + > stub_size) { + SIDESTEP_ASSERT(false); + return SIDESTEP_INSUFFICIENT_BUFFER; + } + + // Now, make a jmp instruction to the rest of the target function (minus the + // preamble bytes we moved into the stub) and copy it into our preamble-stub. + // find address to jump to, relative to next address after jmp instruction +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable:4244) +#endif + int relative_offset_to_target_rest + = ((reinterpret_cast<unsigned char*>(target) + preamble_bytes) - + (preamble_stub + stub_bytes + kRequiredStubJumpBytes)); +#ifdef _MSC_VER +#pragma warning(pop) +#endif + // jmp (Jump near, relative, displacement relative to next instruction) + preamble_stub[stub_bytes] = ASM_JMP32REL; + // copy the address + memcpy(reinterpret_cast<void*>(preamble_stub + stub_bytes + 1), + reinterpret_cast<void*>(&relative_offset_to_target_rest), 4); + + if (kIs64BitBinary && required_trampoline_bytes != 0) { + // Construct the trampoline + unsigned int trampoline_pos = stub_bytes + kRequiredStubJumpBytes; + preamble_stub[trampoline_pos] = ASM_NOP; + preamble_stub[trampoline_pos + 1] = ASM_REXW; + preamble_stub[trampoline_pos + 2] = ASM_MOVRAX_IMM; + memcpy(reinterpret_cast<void*>(preamble_stub + trampoline_pos + 3), + reinterpret_cast<void*>(&replacement_function), + sizeof(void *)); + preamble_stub[trampoline_pos + 11] = ASM_JMP; + preamble_stub[trampoline_pos + 12] = ASM_JMP_RAX; + + // Now update replacement_function to point to the trampoline + replacement_function = preamble_stub + trampoline_pos; + } + + // Inv: preamble_stub points to assembly code that will execute the + // original function by first executing the first cbPreamble bytes of the + // preamble, then jumping to the rest of the function. + + // Overwrite the first 5 bytes of the target function with a jump to our + // replacement function. + // (Jump near, relative, displacement relative to next instruction) + target[0] = ASM_JMP32REL; + + // Find offset from instruction after jmp, to the replacement function. +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable:4244) +#endif + int offset_to_replacement_function = + reinterpret_cast<unsigned char*>(replacement_function) - + reinterpret_cast<unsigned char*>(target) - 5; +#ifdef _MSC_VER +#pragma warning(pop) +#endif + // complete the jmp instruction + memcpy(reinterpret_cast<void*>(target + 1), + reinterpret_cast<void*>(&offset_to_replacement_function), 4); + + // Set any remaining bytes that were moved to the preamble-stub to INT3 so + // as not to cause confusion (otherwise you might see some strange + // instructions if you look at the disassembly, or even invalid + // instructions). Also, by doing this, we will break into the debugger if + // some code calls into this portion of the code. If this happens, it + // means that this function cannot be patched using this patcher without + // further thought. + if (preamble_bytes > kRequiredTargetPatchBytes) { + memset(reinterpret_cast<void*>(target + kRequiredTargetPatchBytes), + ASM_INT3, preamble_bytes - kRequiredTargetPatchBytes); + } + + // Inv: The memory pointed to by target_function now points to a relative + // jump instruction that jumps over to the preamble_stub. The preamble + // stub contains the first stub_size bytes of the original target + // function's preamble code, followed by a relative jump back to the next + // instruction after the first cbPreamble bytes. + // + // In 64-bit mode the memory pointed to by target_function *may* point to a + // relative jump instruction that jumps to a trampoline which will then + // perform an absolute jump to the replacement function. The preamble stub + // still contains the original target function's preamble code, followed by a + // jump back to the instructions after the first preamble bytes. + // + return SIDESTEP_SUCCESS; +} + +}; // namespace sidestep diff --git a/src/third_party/gperftools-2.5/src/windows/shortproc.asm b/src/third_party/gperftools-2.5/src/windows/shortproc.asm new file mode 100644 index 00000000000..7e8e3d78395 --- /dev/null +++ b/src/third_party/gperftools-2.5/src/windows/shortproc.asm @@ -0,0 +1,169 @@ +; Copyright (c) 2011, Google Inc. +; All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions are +; met: +; +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above +; copyright notice, this list of conditions and the following disclaimer +; in the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Google Inc. nor the names of its +; contributors may be used to endorse or promote products derived from +; this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +; +; --- +; Author: Scott Francis +; +; Unit tests for PreamblePatcher
+
+.MODEL small
+
+.CODE
+
+TooShortFunction PROC
+ ret
+TooShortFunction ENDP
+
+JumpShortCondFunction PROC
+ test cl, 1
+ jnz jumpspot
+ int 3
+ int 3
+ int 3
+ int 3
+ int 3
+ int 3
+ int 3
+ int 3
+ int 3
+ int 3
+ int 3
+ int 3
+ int 3
+ int 3
+ int 3
+ int 3
+ int 3
+ int 3
+ int 3
+ int 3
+ int 3
+ int 3
+ int 3
+ int 3
+ int 3
+ int 3
+ int 3
+ int 3
+jumpspot:
+ nop
+ nop
+ nop
+ nop
+ mov rax, 1
+ ret
+JumpShortCondFunction ENDP
+
+JumpNearCondFunction PROC
+ test cl, 1
+ jnz jumpspot
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+jumpspot:
+ nop
+ nop
+ mov rax, 1
+ ret
+JumpNearCondFunction ENDP
+
+JumpAbsoluteFunction PROC
+ test cl, 1
+ jmp jumpspot
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+jumpspot:
+ nop
+ nop
+ mov rax, 1
+ ret
+JumpAbsoluteFunction ENDP
+
+CallNearRelativeFunction PROC
+ test cl, 1
+ call TooShortFunction
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ nop
+ nop
+ nop
+ ret
+CallNearRelativeFunction ENDP
+
+END
diff --git a/src/third_party/gperftools-2.5/src/windows/system-alloc.cc b/src/third_party/gperftools-2.5/src/windows/system-alloc.cc new file mode 100644 index 00000000000..9537745b86f --- /dev/null +++ b/src/third_party/gperftools-2.5/src/windows/system-alloc.cc @@ -0,0 +1,204 @@ +// Copyright (c) 2013, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Petr Hosek + +#ifndef _WIN32 +# error You should only be including windows/system-alloc.cc in a windows environment! +#endif + +#include <config.h> +#include <windows.h> +#include <algorithm> // std::min +#include <gperftools/malloc_extension.h> +#include "base/logging.h" +#include "base/spinlock.h" +#include "internal_logging.h" +#include "system-alloc.h" + +static SpinLock spinlock(SpinLock::LINKER_INITIALIZED); + +// The current system allocator declaration +SysAllocator* sys_alloc = NULL; +// Number of bytes taken from system. +size_t TCMalloc_SystemTaken = 0; + +class VirtualSysAllocator : public SysAllocator { +public: + VirtualSysAllocator() : SysAllocator() { + } + void* Alloc(size_t size, size_t *actual_size, size_t alignment); +}; +static char virtual_space[sizeof(VirtualSysAllocator)]; + +// This is mostly like MmapSysAllocator::Alloc, except it does these weird +// munmap's in the middle of the page, which is forbidden in windows. +void* VirtualSysAllocator::Alloc(size_t size, size_t *actual_size, + size_t alignment) { + // Align on the pagesize boundary + const int pagesize = getpagesize(); + if (alignment < pagesize) alignment = pagesize; + size = ((size + alignment - 1) / alignment) * alignment; + + // Report the total number of bytes the OS actually delivered. This might be + // greater than |size| because of alignment concerns. The full size is + // necessary so that adjacent spans can be coalesced. + // TODO(antonm): proper processing of alignments + // in actual_size and decommitting. + if (actual_size) { + *actual_size = size; + } + + // We currently do not support alignments larger than the pagesize or + // alignments that are not multiples of the pagesize after being floored. + // If this ability is needed it can be done by the caller (assuming it knows + // the page size). + assert(alignment <= pagesize); + + void* result = VirtualAlloc(0, size, + MEM_COMMIT|MEM_RESERVE, PAGE_READWRITE); + if (result == NULL) + return NULL; + + // If the result is not aligned memory fragmentation will result which can + // lead to pathological memory use. + assert((reinterpret_cast<uintptr_t>(result) & (alignment - 1)) == 0); + + return result; +} + +#ifdef _MSC_VER + +extern "C" SysAllocator* tc_get_sysalloc_override(SysAllocator *def); +extern "C" SysAllocator* tc_get_sysalloc_default(SysAllocator *def) +{ + return def; +} + +#if defined(_M_IX86) +#pragma comment(linker, "/alternatename:_tc_get_sysalloc_override=_tc_get_sysalloc_default") +#elif defined(_M_X64) +#pragma comment(linker, "/alternatename:tc_get_sysalloc_override=tc_get_sysalloc_default") +#endif + +#else // !_MSC_VER + +extern "C" ATTRIBUTE_NOINLINE +SysAllocator* tc_get_sysalloc_override(SysAllocator *def) +{ + return def; +} + +#endif + +static bool system_alloc_inited = false; +void InitSystemAllocators(void) { + VirtualSysAllocator *alloc = new (virtual_space) VirtualSysAllocator(); + sys_alloc = tc_get_sysalloc_override(alloc); +} + +extern PERFTOOLS_DLL_DECL +void* TCMalloc_SystemAlloc(size_t size, size_t *actual_size, + size_t alignment) { + SpinLockHolder lock_holder(&spinlock); + + if (!system_alloc_inited) { + InitSystemAllocators(); + system_alloc_inited = true; + } + + void* result = sys_alloc->Alloc(size, actual_size, alignment); + if (result != NULL) { + if (actual_size) { + TCMalloc_SystemTaken += *actual_size; + } else { + TCMalloc_SystemTaken += size; + } + } + return result; +} + +extern PERFTOOLS_DLL_DECL +bool TCMalloc_SystemRelease(void* start, size_t length) { + if (VirtualFree(start, length, MEM_DECOMMIT)) + return true; + + // The decommit may fail if the memory region consists of allocations + // from more than one call to VirtualAlloc. In this case, fall back to + // using VirtualQuery to retrieve the allocation boundaries and decommit + // them each individually. + + char* ptr = static_cast<char*>(start); + char* end = ptr + length; + MEMORY_BASIC_INFORMATION info; + while (ptr < end) { + size_t resultSize = VirtualQuery(ptr, &info, sizeof(info)); + assert(resultSize == sizeof(info)); + size_t decommitSize = std::min<size_t>(info.RegionSize, end - ptr); + BOOL success = VirtualFree(ptr, decommitSize, MEM_DECOMMIT); + assert(success == TRUE); + ptr += decommitSize; + } + + return true; +} + +extern PERFTOOLS_DLL_DECL +void TCMalloc_SystemCommit(void* start, size_t length) { + if (VirtualAlloc(start, length, MEM_COMMIT, PAGE_READWRITE) == start) + return; + + // The commit may fail if the memory region consists of allocations + // from more than one call to VirtualAlloc. In this case, fall back to + // using VirtualQuery to retrieve the allocation boundaries and commit them + // each individually. + + char* ptr = static_cast<char*>(start); + char* end = ptr + length; + MEMORY_BASIC_INFORMATION info; + while (ptr < end) { + size_t resultSize = VirtualQuery(ptr, &info, sizeof(info)); + assert(resultSize == sizeof(info)); + + size_t commitSize = std::min<size_t>(info.RegionSize, end - ptr); + void* newAddress = VirtualAlloc(ptr, commitSize, MEM_COMMIT, + PAGE_READWRITE); + assert(newAddress == ptr); + ptr += commitSize; + } +} + +bool RegisterSystemAllocator(SysAllocator *allocator, int priority) { + return false; // we don't allow registration on windows, right now +} + +void DumpSystemAllocatorStats(TCMalloc_Printer* printer) { + // We don't dump stats on windows, right now +} |